1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.tomcat.util.buf;
18
19 import java.io.ByteArrayOutputStream;
20 import java.io.CharConversionException;
21 import java.io.IOException;
22 import java.io.OutputStreamWriter;
23 import java.nio.charset.Charset;
24 import java.nio.charset.StandardCharsets;
25
26 import org.apache.tomcat.util.res.StringManager;
27
28 /**
29 * All URL decoding happens here. This way we can reuse, review, optimize
30 * without adding complexity to the buffers.
31 *
32 * The conversion will modify the original buffer.
33 *
34 * @author Costin Manolache
35 */
36 public final class UDecoder {
37
38 private static final StringManager sm = StringManager.getManager(UDecoder.class);
39
40 public static final boolean ALLOW_ENCODED_SLASH =
41 Boolean.parseBoolean(System.getProperty("org.apache.tomcat.util.buf.UDecoder.ALLOW_ENCODED_SLASH", "false"));
42
43 private static class DecodeException extends CharConversionException {
44 private static final long serialVersionUID = 1L;
45 public DecodeException(String s) {
46 super(s);
47 }
48
49 @Override
50 public synchronized Throwable fillInStackTrace() {
51 // This class does not provide a stack trace
52 return this;
53 }
54 }
55
56 /** Unexpected end of data. */
57 private static final IOException EXCEPTION_EOF = new DecodeException(sm.getString("uDecoder.eof"));
58
59 /** %xx with not-hex digit */
60 private static final IOException EXCEPTION_NOT_HEX_DIGIT = new DecodeException(
61 "isHexDigit");
62
63 /** %-encoded slash is forbidden in resource path */
64 private static final IOException EXCEPTION_SLASH = new DecodeException(
65 "noSlash");
66
67 public UDecoder()
68 {
69 }
70
71 /**
72 * URLDecode, will modify the source.
73 * @param mb The URL encoded bytes
74 * @param query <code>true</code> if this is a query string
75 * @throws IOException Invalid %xx URL encoding
76 */
77 public void convert( ByteChunk mb, boolean query )
78 throws IOException
79 {
80 int start=mb.getOffset();
81 byte buff[]=mb.getBytes();
82 int end=mb.getEnd();
83
84 int idx= ByteChunk.findByte( buff, start, end, (byte) '%' );
85 int idx2=-1;
86 if( query ) {
87 idx2= ByteChunk.findByte( buff, start, (idx >= 0 ? idx : end), (byte) '+' );
88 }
89 if( idx<0 && idx2<0 ) {
90 return;
91 }
92
93 // idx will be the smallest positive index ( first % or + )
94 if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) {
95 idx=idx2;
96 }
97
98 final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
99
100 for( int j=idx; j<end; j++, idx++ ) {
101 if( buff[ j ] == '+' && query) {
102 buff[idx]= (byte)' ' ;
103 } else if( buff[ j ] != '%' ) {
104 buff[idx]= buff[j];
105 } else {
106 // read next 2 digits
107 if( j+2 >= end ) {
108 throw EXCEPTION_EOF;
109 }
110 byte b1= buff[j+1];
111 byte b2=buff[j+2];
112 if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) {
113 throw EXCEPTION_NOT_HEX_DIGIT;
114 }
115
116 j+=2;
117 int res=x2c( b1, b2 );
118 if (noSlash && (res == '/')) {
119 throw EXCEPTION_SLASH;
120 }
121 buff[idx]=(byte)res;
122 }
123 }
124
125 mb.setEnd( idx );
126 }
127
128 // -------------------- Additional methods --------------------
129 // XXX What do we do about charset ????
130
131 /**
132 * In-buffer processing - the buffer will be modified.
133 * @param mb The URL encoded chars
134 * @param query <code>true</code> if this is a query string
135 * @throws IOException Invalid %xx URL encoding
136 */
137 public void convert( CharChunk mb, boolean query )
138 throws IOException
139 {
140 // log( "Converting a char chunk ");
141 int start=mb.getOffset();
142 char buff[]=mb.getBuffer();
143 int cend=mb.getEnd();
144
145 int idx= CharChunk.indexOf( buff, start, cend, '%' );
146 int idx2=-1;
147 if( query ) {
148 idx2= CharChunk.indexOf( buff, start, (idx >= 0 ? idx : cend), '+' );
149 }
150 if( idx<0 && idx2<0 ) {
151 return;
152 }
153
154 // idx will be the smallest positive index ( first % or + )
155 if( (idx2 >= 0 && idx2 < idx) || idx < 0 ) {
156 idx=idx2;
157 }
158
159 final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
160
161 for( int j=idx; j<cend; j++, idx++ ) {
162 if( buff[ j ] == '+' && query ) {
163 buff[idx]=( ' ' );
164 } else if( buff[ j ] != '%' ) {
165 buff[idx]=buff[j];
166 } else {
167 // read next 2 digits
168 if( j+2 >= cend ) {
169 // invalid
170 throw EXCEPTION_EOF;
171 }
172 char b1= buff[j+1];
173 char b2=buff[j+2];
174 if( !isHexDigit( b1 ) || ! isHexDigit(b2 )) {
175 throw EXCEPTION_NOT_HEX_DIGIT;
176 }
177
178 j+=2;
179 int res=x2c( b1, b2 );
180 if (noSlash && (res == '/')) {
181 throw EXCEPTION_SLASH;
182 }
183 buff[idx]=(char)res;
184 }
185 }
186 mb.setEnd( idx );
187 }
188
189 /**
190 * URLDecode, will modify the source
191 * @param mb The URL encoded String, bytes or chars
192 * @param query <code>true</code> if this is a query string
193 * @throws IOException Invalid %xx URL encoding
194 */
195 public void convert(MessageBytes mb, boolean query)
196 throws IOException
197 {
198
199 switch (mb.getType()) {
200 case MessageBytes.T_STR:
201 String strValue=mb.toString();
202 if( strValue==null ) {
203 return;
204 }
205 try {
206 mb.setString( convert( strValue, query ));
207 } catch (RuntimeException ex) {
208 throw new DecodeException(ex.getMessage());
209 }
210 break;
211 case MessageBytes.T_CHARS:
212 CharChunk charC=mb.getCharChunk();
213 convert( charC, query );
214 break;
215 case MessageBytes.T_BYTES:
216 ByteChunk bytesC=mb.getByteChunk();
217 convert( bytesC, query );
218 break;
219 }
220 }
221
222 /**
223 * %xx decoding of a string. FIXME: this is inefficient.
224 * @param str The URL encoded string
225 * @param query <code>true</code> if this is a query string
226 * @return the decoded string
227 */
228 public final String convert(String str, boolean query)
229 {
230 if (str == null) {
231 return null;
232 }
233
234 if( (!query || str.indexOf( '+' ) < 0) && str.indexOf( '%' ) < 0 ) {
235 return str;
236 }
237
238 final boolean noSlash = !(ALLOW_ENCODED_SLASH || query);
239
240 StringBuilder dec = new StringBuilder(); // decoded string output
241 int strPos = 0;
242 int strLen = str.length();
243
244 dec.ensureCapacity(str.length());
245 while (strPos < strLen) {
246 int laPos; // lookahead position
247
248 // look ahead to next URLencoded metacharacter, if any
249 for (laPos = strPos; laPos < strLen; laPos++) {
250 char laChar = str.charAt(laPos);
251 if ((laChar == '+' && query) || (laChar == '%')) {
252 break;
253 }
254 }
255
256 // if there were non-metacharacters, copy them all as a block
257 if (laPos > strPos) {
258 dec.append(str.substring(strPos,laPos));
259 strPos = laPos;
260 }
261
262 // shortcut out of here if we're at the end of the string
263 if (strPos >= strLen) {
264 break;
265 }
266
267 // process next metacharacter
268 char metaChar = str.charAt(strPos);
269 if (metaChar == '+') {
270 dec.append(' ');
271 strPos++;
272 continue;
273 } else if (metaChar == '%') {
274 // We throw the original exception - the super will deal with
275 // it
276 // try {
277 char res = (char) Integer.parseInt(
278 str.substring(strPos + 1, strPos + 3), 16);
279 if (noSlash && (res == '/')) {
280 throw new IllegalArgumentException(sm.getString("uDecoder.noSlash"));
281 }
282 dec.append(res);
283 strPos += 3;
284 }
285 }
286
287 return dec.toString();
288 }
289
290
291 /**
292 * Decode and return the specified URL-encoded String.
293 * When the byte array is converted to a string, UTF-8 is used. This may
294 * be different than some other servers. It is assumed the string is not a
295 * query string.
296 *
297 * @param str The url-encoded string
298 * @return the decoded string
299 * @exception IllegalArgumentException if a '%' character is not followed
300 * by a valid 2-digit hexadecimal number
301 */
302 public static String URLDecode(String str) {
303 return URLDecode(str, StandardCharsets.UTF_8);
304 }
305
306
307 /**
308 * Decode and return the specified URL-encoded String. It is assumed the
309 * string is not a query string.
310 *
311 * @param str The url-encoded string
312 * @param charset The character encoding to use; if null, UTF-8 is used.
313 * @return the decoded string
314 * @exception IllegalArgumentException if a '%' character is not followed
315 * by a valid 2-digit hexadecimal number
316 */
317 public static String URLDecode(String str, Charset charset) {
318 if (str == null) {
319 return null;
320 }
321
322 if (str.indexOf('%') == -1) {
323 // No %nn sequences, so return string unchanged
324 return str;
325 }
326
327 if (charset == null) {
328 charset = StandardCharsets.UTF_8;
329 }
330
331 /*
332 * Decoding is required.
333 *
334 * Potential complications:
335 * - The source String may be partially decoded so it is not valid to
336 * assume that the source String is ASCII.
337 * - Have to process as characters since there is no guarantee that the
338 * byte sequence for '%' is going to be the same in all character
339 * sets.
340 * - We don't know how many '%nn' sequences are required for a single
341 * character. It varies between character sets and some use a variable
342 * length.
343 */
344
345 // This isn't perfect but it is a reasonable guess for the size of the
346 // array required
347 ByteArrayOutputStream baos = new ByteArrayOutputStream(str.length() * 2);
348
349 OutputStreamWriter osw = new OutputStreamWriter(baos, charset);
350
351 char[] sourceChars = str.toCharArray();
352 int len = sourceChars.length;
353 int ix = 0;
354
355 try {
356 while (ix < len) {
357 char c = sourceChars[ix++];
358 if (c == '%') {
359 osw.flush();
360 if (ix + 2 > len) {
361 throw new IllegalArgumentException(
362 sm.getString("uDecoder.urlDecode.missingDigit", str));
363 }
364 char c1 = sourceChars[ix++];
365 char c2 = sourceChars[ix++];
366 if (isHexDigit(c1) && isHexDigit(c2)) {
367 baos.write(x2c(c1, c2));
368 } else {
369 throw new IllegalArgumentException(
370 sm.getString("uDecoder.urlDecode.missingDigit", str));
371 }
372 } else {
373 osw.append(c);
374 }
375 }
376 osw.flush();
377
378 return baos.toString(charset.name());
379 } catch (IOException ioe) {
380 throw new IllegalArgumentException(
381 sm.getString("uDecoder.urlDecode.conversionError", str, charset.name()), ioe);
382 }
383 }
384
385
386 private static boolean isHexDigit( int c ) {
387 return ( ( c>='0' && c<='9' ) ||
388 ( c>='a' && c<='f' ) ||
389 ( c>='A' && c<='F' ));
390 }
391
392
393 private static int x2c( byte b1, byte b2 ) {
394 int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 :
395 (b1 -'0');
396 digit*=16;
397 digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 :
398 (b2 -'0');
399 return digit;
400 }
401
402
403 private static int x2c( char b1, char b2 ) {
404 int digit= (b1>='A') ? ( (b1 & 0xDF)-'A') + 10 :
405 (b1 -'0');
406 digit*=16;
407 digit +=(b2>='A') ? ( (b2 & 0xDF)-'A') + 10 :
408 (b2 -'0');
409 return digit;
410 }
411 }
412