1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.catalina.util;
18
19 import java.io.ByteArrayOutputStream;
20 import java.io.IOException;
21 import java.io.OutputStreamWriter;
22 import java.nio.charset.Charset;
23 import java.util.BitSet;
24
25 /**
26 *
27 * This class is very similar to the java.net.URLEncoder class.
28 *
29 * Unfortunately, with java.net.URLEncoder there is no way to specify to the
30 * java.net.URLEncoder which characters should NOT be encoded.
31 *
32 * This code was moved from DefaultServlet.java
33 *
34 * @author Craig R. McClanahan
35 * @author Remy Maucherat
36 */
37 public final class URLEncoder implements Cloneable {
38
39 private static final char[] hexadecimal =
40 {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
41
42 public static final URLEncoder DEFAULT = new URLEncoder();
43 public static final URLEncoder QUERY = new URLEncoder();
44
45 static {
46 /*
47 * Encoder for URI paths, so from the spec:
48 *
49 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
50 *
51 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
52 *
53 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
54 * / "*" / "+" / "," / ";" / "="
55 */
56 // ALPHA and DIGIT are always treated as safe characters
57 // Add the remaining unreserved characters
58 DEFAULT.addSafeCharacter('-');
59 DEFAULT.addSafeCharacter('.');
60 DEFAULT.addSafeCharacter('_');
61 DEFAULT.addSafeCharacter('~');
62 // Add the sub-delims
63 DEFAULT.addSafeCharacter('!');
64 DEFAULT.addSafeCharacter('$');
65 DEFAULT.addSafeCharacter('&');
66 DEFAULT.addSafeCharacter('\'');
67 DEFAULT.addSafeCharacter('(');
68 DEFAULT.addSafeCharacter(')');
69 DEFAULT.addSafeCharacter('*');
70 DEFAULT.addSafeCharacter('+');
71 DEFAULT.addSafeCharacter(',');
72 DEFAULT.addSafeCharacter(';');
73 DEFAULT.addSafeCharacter('=');
74 // Add the remaining literals
75 DEFAULT.addSafeCharacter(':');
76 DEFAULT.addSafeCharacter('@');
77 // Add '/' so it isn't encoded when we encode a path
78 DEFAULT.addSafeCharacter('/');
79
80 /*
81 * Encoder for query strings
82 * https://www.w3.org/TR/html5/forms.html#application/x-www-form-urlencoded-encoding-algorithm
83 * 0x20 ' ' -> '+'
84 * 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F, 0x61 to 0x7A as-is
85 * '*', '-', '.', '0' to '9', 'A' to 'Z', '_', 'a' to 'z'
86 * Also '=' and '&' are not encoded
87 * Everything else %nn encoded
88 */
89 // Special encoding for space
90 QUERY.setEncodeSpaceAsPlus(true);
91 // Alpha and digit are safe by default
92 // Add the other permitted characters
93 QUERY.addSafeCharacter('*');
94 QUERY.addSafeCharacter('-');
95 QUERY.addSafeCharacter('.');
96 QUERY.addSafeCharacter('_');
97 QUERY.addSafeCharacter('=');
98 QUERY.addSafeCharacter('&');
99 }
100
101 //Array containing the safe characters set.
102 private final BitSet safeCharacters;
103
104 private boolean encodeSpaceAsPlus = false;
105
106
107 public URLEncoder() {
108 this(new BitSet(256));
109
110 for (char i = 'a'; i <= 'z'; i++) {
111 addSafeCharacter(i);
112 }
113 for (char i = 'A'; i <= 'Z'; i++) {
114 addSafeCharacter(i);
115 }
116 for (char i = '0'; i <= '9'; i++) {
117 addSafeCharacter(i);
118 }
119 }
120
121
122 private URLEncoder(BitSet safeCharacters) {
123 this.safeCharacters = safeCharacters;
124 }
125
126
127 public void addSafeCharacter(char c) {
128 safeCharacters.set(c);
129 }
130
131
132 public void removeSafeCharacter(char c) {
133 safeCharacters.clear(c);
134 }
135
136
137 public void setEncodeSpaceAsPlus(boolean encodeSpaceAsPlus) {
138 this.encodeSpaceAsPlus = encodeSpaceAsPlus;
139 }
140
141
142 /**
143 * URL encodes the provided path using the given character set.
144 *
145 * @param path The path to encode
146 * @param charset The character set to use to convert the path to bytes
147 *
148 * @return The encoded path
149 */
150 public String encode(String path, Charset charset) {
151
152 int maxBytesPerChar = 10;
153 StringBuilder rewrittenPath = new StringBuilder(path.length());
154 ByteArrayOutputStream buf = new ByteArrayOutputStream(maxBytesPerChar);
155 OutputStreamWriter writer = new OutputStreamWriter(buf, charset);
156
157 for (int i = 0; i < path.length(); i++) {
158 int c = path.charAt(i);
159 if (safeCharacters.get(c)) {
160 rewrittenPath.append((char)c);
161 } else if (encodeSpaceAsPlus && c == ' ') {
162 rewrittenPath.append('+');
163 } else {
164 // convert to external encoding before hex conversion
165 try {
166 writer.write((char)c);
167 writer.flush();
168 } catch(IOException e) {
169 buf.reset();
170 continue;
171 }
172 byte[] ba = buf.toByteArray();
173 for (int j = 0; j < ba.length; j++) {
174 // Converting each byte in the buffer
175 byte toEncode = ba[j];
176 rewrittenPath.append('%');
177 int low = toEncode & 0x0f;
178 int high = (toEncode & 0xf0) >> 4;
179 rewrittenPath.append(hexadecimal[high]);
180 rewrittenPath.append(hexadecimal[low]);
181 }
182 buf.reset();
183 }
184 }
185 return rewrittenPath.toString();
186 }
187
188
189 @Override
190 public Object clone() {
191 URLEncoder result = new URLEncoder((BitSet) safeCharacters.clone());
192 result.setEncodeSpaceAsPlus(encodeSpaceAsPlus);
193 return result;
194 }
195 }
196