1
17 package org.apache.tomcat.util.http.parser;
18
19 import java.io.IOException;
20 import java.io.Reader;
21
22 import org.apache.tomcat.util.res.StringManager;
23
24
37 public class HttpParser {
38
39 private static final StringManager sm = StringManager.getManager(HttpParser.class);
40
41 private static final int ARRAY_SIZE = 128;
42
43 private static final boolean[] IS_CONTROL = new boolean[ARRAY_SIZE];
44 private static final boolean[] IS_SEPARATOR = new boolean[ARRAY_SIZE];
45 private static final boolean[] IS_TOKEN = new boolean[ARRAY_SIZE];
46 private static final boolean[] IS_HEX = new boolean[ARRAY_SIZE];
47 private static final boolean[] IS_HTTP_PROTOCOL = new boolean[ARRAY_SIZE];
48 private static final boolean[] IS_ALPHA = new boolean[ARRAY_SIZE];
49 private static final boolean[] IS_NUMERIC = new boolean[ARRAY_SIZE];
50 private static final boolean[] IS_UNRESERVED = new boolean[ARRAY_SIZE];
51 private static final boolean[] IS_SUBDELIM = new boolean[ARRAY_SIZE];
52 private static final boolean[] IS_USERINFO = new boolean[ARRAY_SIZE];
53 private static final boolean[] IS_RELAXABLE = new boolean[ARRAY_SIZE];
54
55 private static final HttpParser DEFAULT;
56
57
58 static {
59 for (int i = 0; i < ARRAY_SIZE; i++) {
60
61 if (i < 32 || i == 127) {
62 IS_CONTROL[i] = true;
63 }
64
65
66 if ( i == '(' || i == ')' || i == '<' || i == '>' || i == '@' ||
67 i == ',' || i == ';' || i == ':' || i == '\\' || i == '\"' ||
68 i == '/' || i == '[' || i == ']' || i == '?' || i == '=' ||
69 i == '{' || i == '}' || i == ' ' || i == '\t') {
70 IS_SEPARATOR[i] = true;
71 }
72
73
74 if (!IS_CONTROL[i] && !IS_SEPARATOR[i] && i < 128) {
75 IS_TOKEN[i] = true;
76 }
77
78
79 if ((i >= '0' && i <='9') || (i >= 'a' && i <= 'f') || (i >= 'A' && i <= 'F')) {
80 IS_HEX[i] = true;
81 }
82
83
84
85 if (i == 'H' || i == 'T' || i == 'P' || i == '/' || i == '.' || (i >= '0' && i <= '9')) {
86 IS_HTTP_PROTOCOL[i] = true;
87 }
88
89 if (i >= '0' && i <= '9') {
90 IS_NUMERIC[i] = true;
91 }
92
93 if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z') {
94 IS_ALPHA[i] = true;
95 }
96
97 if (IS_ALPHA[i] || IS_NUMERIC[i] || i == '-' || i == '.' || i == '_' || i == '~') {
98 IS_UNRESERVED[i] = true;
99 }
100
101 if (i == '!' || i == '$' || i == '&' || i == '\'' || i == '(' || i == ')' || i == '*' ||
102 i == '+' || i == ',' || i == ';' || i == '=') {
103 IS_SUBDELIM[i] = true;
104 }
105
106
107 if (IS_UNRESERVED[i] || i == '%' || IS_SUBDELIM[i] || i == ':') {
108 IS_USERINFO[i] = true;
109 }
110
111
112
113
114 if (i == '\"' || i == '<' || i == '>' || i == '[' || i == '\\' || i == ']' ||
115 i == '^' || i == '`' || i == '{' || i == '|' || i == '}') {
116 IS_RELAXABLE[i] = true;
117 }
118 }
119
120 DEFAULT = new HttpParser(null, null);
121 }
122
123
124 private final boolean[] IS_NOT_REQUEST_TARGET = new boolean[ARRAY_SIZE];
125 private final boolean[] IS_ABSOLUTEPATH_RELAXED = new boolean[ARRAY_SIZE];
126 private final boolean[] IS_QUERY_RELAXED = new boolean[ARRAY_SIZE];
127
128
129 public HttpParser(String relaxedPathChars, String relaxedQueryChars) {
130 for (int i = 0; i < ARRAY_SIZE; i++) {
131
132
133
134 if (IS_CONTROL[i] ||
135 i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>' || i == '\\' ||
136 i == '^' || i == '`' || i == '{' || i == '|' || i == '}') {
137 IS_NOT_REQUEST_TARGET[i] = true;
138 }
139
140
147 if (IS_USERINFO[i] || i == '@' || i == '/') {
148 IS_ABSOLUTEPATH_RELAXED[i] = true;
149 }
150
151
156 if (IS_ABSOLUTEPATH_RELAXED[i] || i == '?') {
157 IS_QUERY_RELAXED[i] = true;
158 }
159 }
160
161 relax(IS_ABSOLUTEPATH_RELAXED, relaxedPathChars);
162 relax(IS_QUERY_RELAXED, relaxedQueryChars);
163 }
164
165
166 public boolean isNotRequestTargetRelaxed(int c) {
167
168
169 try {
170 return IS_NOT_REQUEST_TARGET[c];
171 } catch (ArrayIndexOutOfBoundsException ex) {
172 return true;
173 }
174 }
175
176
177 public boolean isAbsolutePathRelaxed(int c) {
178
179
180 try {
181 return IS_ABSOLUTEPATH_RELAXED[c];
182 } catch (ArrayIndexOutOfBoundsException ex) {
183 return false;
184 }
185 }
186
187
188 public boolean isQueryRelaxed(int c) {
189
190
191 try {
192 return IS_QUERY_RELAXED[c];
193 } catch (ArrayIndexOutOfBoundsException ex) {
194 return false;
195 }
196 }
197
198
199 public static String unquote(String input) {
200 if (input == null || input.length() < 2) {
201 return input;
202 }
203
204 int start;
205 int end;
206
207
208 if (input.charAt(0) == '"') {
209 start = 1;
210 end = input.length() - 1;
211 } else {
212 start = 0;
213 end = input.length();
214 }
215
216 StringBuilder result = new StringBuilder();
217 for (int i = start ; i < end; i++) {
218 char c = input.charAt(i);
219 if (input.charAt(i) == '\\') {
220 i++;
221 result.append(input.charAt(i));
222 } else {
223 result.append(c);
224 }
225 }
226 return result.toString();
227 }
228
229
230 public static boolean isToken(int c) {
231
232 try {
233 return IS_TOKEN[c];
234 } catch (ArrayIndexOutOfBoundsException ex) {
235 return false;
236 }
237 }
238
239
240 public static boolean isHex(int c) {
241
242 try {
243 return IS_HEX[c];
244 } catch (ArrayIndexOutOfBoundsException ex) {
245 return false;
246 }
247 }
248
249
250 public static boolean isNotRequestTarget(int c) {
251 return DEFAULT.isNotRequestTargetRelaxed(c);
252 }
253
254
255 public static boolean isHttpProtocol(int c) {
256
257
258 try {
259 return IS_HTTP_PROTOCOL[c];
260 } catch (ArrayIndexOutOfBoundsException ex) {
261 return false;
262 }
263 }
264
265
266 public static boolean isAlpha(int c) {
267
268
269 try {
270 return IS_ALPHA[c];
271 } catch (ArrayIndexOutOfBoundsException ex) {
272 return false;
273 }
274 }
275
276
277 public static boolean isNumeric(int c) {
278
279
280 try {
281 return IS_NUMERIC[c];
282 } catch (ArrayIndexOutOfBoundsException ex) {
283 return false;
284 }
285 }
286
287
288 public static boolean isUserInfo(int c) {
289
290
291 try {
292 return IS_USERINFO[c];
293 } catch (ArrayIndexOutOfBoundsException ex) {
294 return false;
295 }
296 }
297
298
299 private static boolean isRelaxable(int c) {
300
301
302 try {
303 return IS_RELAXABLE[c];
304 } catch (ArrayIndexOutOfBoundsException ex) {
305 return false;
306 }
307 }
308
309
310 public static boolean isAbsolutePath(int c) {
311 return DEFAULT.isAbsolutePathRelaxed(c);
312 }
313
314
315 public static boolean isQuery(int c) {
316 return DEFAULT.isQueryRelaxed(c);
317 }
318
319
320 public static boolean isControl(int c) {
321
322
323 try {
324 return IS_CONTROL[c];
325 } catch (ArrayIndexOutOfBoundsException ex) {
326 return false;
327 }
328 }
329
330
331
332
333
334 static int skipLws(Reader input) throws IOException {
335
336 input.mark(1);
337 int c = input.read();
338
339 while (c == 32 || c == 9 || c == 10 || c == 13) {
340 input.mark(1);
341 c = input.read();
342 }
343
344 input.reset();
345 return c;
346 }
347
348 static SkipResult skipConstant(Reader input, String constant) throws IOException {
349 int len = constant.length();
350
351 skipLws(input);
352 input.mark(len);
353 int c = input.read();
354
355 for (int i = 0; i < len; i++) {
356 if (i == 0 && c == -1) {
357 return SkipResult.EOF;
358 }
359 if (c != constant.charAt(i)) {
360 input.reset();
361 return SkipResult.NOT_FOUND;
362 }
363 if (i != (len - 1)) {
364 c = input.read();
365 }
366 }
367 return SkipResult.FOUND;
368 }
369
370
375 static String readToken(Reader input) throws IOException {
376 StringBuilder result = new StringBuilder();
377
378 skipLws(input);
379 input.mark(1);
380 int c = input.read();
381
382 while (c != -1 && isToken(c)) {
383 result.append((char) c);
384 input.mark(1);
385 c = input.read();
386 }
387
388
389 input.reset();
390
391 if (c != -1 && result.length() == 0) {
392 return null;
393 } else {
394 return result.toString();
395 }
396 }
397
398
402 static String readDigits(Reader input) throws IOException {
403 StringBuilder result = new StringBuilder();
404
405 skipLws(input);
406 input.mark(1);
407 int c = input.read();
408
409 while (c != -1 && isNumeric(c)) {
410 result.append((char) c);
411 input.mark(1);
412 c = input.read();
413 }
414
415
416 input.reset();
417
418 return result.toString();
419 }
420
421
425 static long readLong(Reader input) throws IOException {
426 String digits = readDigits(input);
427
428 if (digits.length() == 0) {
429 return -1;
430 }
431
432 return Long.parseLong(digits);
433 }
434
435
440 static String readQuotedString(Reader input, boolean returnQuoted) throws IOException {
441
442 skipLws(input);
443 int c = input.read();
444
445 if (c != '"') {
446 return null;
447 }
448
449 StringBuilder result = new StringBuilder();
450 if (returnQuoted) {
451 result.append('\"');
452 }
453 c = input.read();
454
455 while (c != '"') {
456 if (c == -1) {
457 return null;
458 } else if (c == '\\') {
459 c = input.read();
460 if (returnQuoted) {
461 result.append('\\');
462 }
463 result.append((char) c);
464 } else {
465 result.append((char) c);
466 }
467 c = input.read();
468 }
469 if (returnQuoted) {
470 result.append('\"');
471 }
472
473 return result.toString();
474 }
475
476 static String readTokenOrQuotedString(Reader input, boolean returnQuoted)
477 throws IOException {
478
479
480 int c = skipLws(input);
481
482 if (c == '"') {
483 return readQuotedString(input, returnQuoted);
484 } else {
485 return readToken(input);
486 }
487 }
488
489
501 static String readQuotedToken(Reader input) throws IOException {
502
503 StringBuilder result = new StringBuilder();
504 boolean quoted = false;
505
506 skipLws(input);
507 input.mark(1);
508 int c = input.read();
509
510 if (c == '"') {
511 quoted = true;
512 } else if (c == -1 || !isToken(c)) {
513 return null;
514 } else {
515 result.append((char) c);
516 }
517 input.mark(1);
518 c = input.read();
519
520 while (c != -1 && isToken(c)) {
521 result.append((char) c);
522 input.mark(1);
523 c = input.read();
524 }
525
526 if (quoted) {
527 if (c != '"') {
528 return null;
529 }
530 } else {
531
532
533 input.reset();
534 }
535
536 if (c != -1 && result.length() == 0) {
537 return null;
538 } else {
539 return result.toString();
540 }
541 }
542
543
557 static String readLhex(Reader input) throws IOException {
558
559 StringBuilder result = new StringBuilder();
560 boolean quoted = false;
561
562 skipLws(input);
563 input.mark(1);
564 int c = input.read();
565
566 if (c == '"') {
567 quoted = true;
568 } else if (c == -1 || !isHex(c)) {
569 return null;
570 } else {
571 if ('A' <= c && c <= 'F') {
572 c -= ('A' - 'a');
573 }
574 result.append((char) c);
575 }
576 input.mark(1);
577 c = input.read();
578
579 while (c != -1 && isHex(c)) {
580 if ('A' <= c && c <= 'F') {
581 c -= ('A' - 'a');
582 }
583 result.append((char) c);
584 input.mark(1);
585 c = input.read();
586 }
587
588 if (quoted) {
589 if (c != '"') {
590 return null;
591 }
592 } else {
593
594
595 input.reset();
596 }
597
598 if (c != -1 && result.length() == 0) {
599 return null;
600 } else {
601 return result.toString();
602 }
603 }
604
605 static double readWeight(Reader input, char delimiter) throws IOException {
606 skipLws(input);
607 int c = input.read();
608 if (c == -1 || c == delimiter) {
609
610 return 1;
611 } else if (c != 'q') {
612
613 skipUntil(input, c, delimiter);
614 return 0;
615 }
616
617 skipLws(input);
618 c = input.read();
619 if (c != '=') {
620
621 skipUntil(input, c, delimiter);
622 return 0;
623 }
624
625
626 skipLws(input);
627 c = input.read();
628
629
630 StringBuilder value = new StringBuilder(5);
631 int decimalPlacesRead = -1;
632
633 if (c == '0' || c == '1') {
634 value.append((char) c);
635 c = input.read();
636
637 while (true) {
638 if (decimalPlacesRead == -1 && c == '.') {
639 value.append('.');
640 decimalPlacesRead = 0;
641 } else if (decimalPlacesRead > -1 && c >= '0' && c <= '9') {
642 if (decimalPlacesRead < 3) {
643 value.append((char) c);
644 decimalPlacesRead++;
645 }
646 } else {
647 break;
648 }
649 c = input.read();
650 }
651 } else {
652
653
654 skipUntil(input, c, delimiter);
655 return 0;
656 }
657
658 if (c == 9 || c == 32) {
659 skipLws(input);
660 c = input.read();
661 }
662
663
664 if (c != delimiter && c != -1) {
665
666
667 skipUntil(input, c, delimiter);
668 return 0;
669 }
670
671 double result = Double.parseDouble(value.toString());
672 if (result > 1) {
673 return 0;
674 }
675 return result;
676 }
677
678
679
684 static int readHostIPv4(Reader reader, boolean inIPv6) throws IOException {
685 int octet = -1;
686 int octetCount = 1;
687 int c;
688 int pos = 0;
689
690
691
692 reader.mark(1);
693 do {
694 c = reader.read();
695 if (c == '.') {
696 if (octet > -1 && octet < 256) {
697
698 octetCount++;
699 octet = -1;
700 } else if (inIPv6 || octet == -1) {
701 throw new IllegalArgumentException(
702 sm.getString("http.invalidOctet", Integer.toString(octet)));
703 } else {
704
705
706 reader.reset();
707 return readHostDomainName(reader);
708 }
709 } else if (isNumeric(c)) {
710 if (octet == -1) {
711 octet = c - '0';
712 } else if (octet == 0) {
713
714 if (inIPv6) {
715 throw new IllegalArgumentException(sm.getString("http.invalidLeadingZero"));
716 } else {
717
718 reader.reset();
719 return readHostDomainName(reader);
720 }
721 } else {
722 octet = octet * 10 + c - '0';
723 }
724 } else if (c == ':') {
725 break;
726 } else if (c == -1) {
727 if (inIPv6) {
728 throw new IllegalArgumentException(sm.getString("http.noClosingBracket"));
729 } else {
730 pos = -1;
731 break;
732 }
733 } else if (c == ']') {
734 if (inIPv6) {
735 pos++;
736 break;
737 } else {
738 throw new IllegalArgumentException(sm.getString("http.closingBracket"));
739 }
740 } else if (!inIPv6 && (isAlpha(c) || c == '-')) {
741
742 reader.reset();
743 return readHostDomainName(reader);
744 } else {
745 throw new IllegalArgumentException(sm.getString(
746 "http.illegalCharacterIpv4", Character.toString((char) c)));
747 }
748 pos++;
749 } while (true);
750
751 if (octetCount != 4 || octet < 0 || octet > 255) {
752
753
754
755 reader.reset();
756 return readHostDomainName(reader);
757 }
758
759 return pos;
760 }
761
762
763
767 static int readHostIPv6(Reader reader) throws IOException {
768
769 int c = reader.read();
770 if (c != '[') {
771 throw new IllegalArgumentException(sm.getString("http.noOpeningBracket"));
772 }
773
774 int h16Count = 0;
775 int h16Size = 0;
776 int pos = 1;
777 boolean parsedDoubleColon = false;
778 int precedingColonsCount = 0;
779
780 do {
781 c = reader.read();
782 if (h16Count == 0 && precedingColonsCount == 1 && c != ':') {
783
784 throw new IllegalArgumentException(sm.getString("http.singleColonStart"));
785 }
786 if (HttpParser.isHex(c)) {
787 if (h16Size == 0) {
788
789 precedingColonsCount = 0;
790 h16Count++;
791 }
792 h16Size++;
793 if (h16Size > 4) {
794 throw new IllegalArgumentException(sm.getString("http.invalidHextet"));
795 }
796 } else if (c == ':') {
797 if (precedingColonsCount >=2 ) {
798
799 throw new IllegalArgumentException(sm.getString("http.tooManyColons"));
800 } else {
801 if(precedingColonsCount == 1) {
802
803 if (parsedDoubleColon ) {
804
805 throw new IllegalArgumentException(
806 sm.getString("http.tooManyDoubleColons"));
807 }
808 parsedDoubleColon = true;
809
810 h16Count++;
811 }
812 precedingColonsCount++;
813
814 reader.mark(4);
815 }
816 h16Size = 0;
817 } else if (c == ']') {
818 if (precedingColonsCount == 1) {
819
820 throw new IllegalArgumentException(sm.getString("http.singleColonEnd"));
821 }
822 pos++;
823 break;
824 } else if (c == '.') {
825 if (h16Count == 7 || h16Count < 7 && parsedDoubleColon) {
826 reader.reset();
827 pos -= h16Size;
828 pos += readHostIPv4(reader, true);
829 h16Count++;
830 break;
831 } else {
832 throw new IllegalArgumentException(sm.getString("http.invalidIpv4Location"));
833 }
834 } else {
835 throw new IllegalArgumentException(sm.getString(
836 "http.illegalCharacterIpv6", Character.toString((char) c)));
837 }
838 pos++;
839 } while (true);
840
841 if (h16Count > 8) {
842 throw new IllegalArgumentException(
843 sm.getString("http.tooManyHextets", Integer.toString(h16Count)));
844 } else if (h16Count != 8 && !parsedDoubleColon) {
845 throw new IllegalArgumentException(
846 sm.getString("http.tooFewHextets", Integer.toString(h16Count)));
847 }
848
849 c = reader.read();
850 if (c == ':') {
851 return pos;
852 } else {
853 if(c == -1) {
854 return -1;
855 }
856 throw new IllegalArgumentException(
857 sm.getString("http.illegalAfterIpv6", Character.toString((char) c)));
858 }
859 }
860
861
865 static int readHostDomainName(Reader reader) throws IOException {
866 DomainParseState state = DomainParseState.NEW;
867 int pos = 0;
868
869 while (state.mayContinue()) {
870 state = state.next(reader.read());
871 pos++;
872 }
873
874 if (DomainParseState.COLON == state) {
875
876 return pos - 1;
877 } else {
878 return -1;
879 }
880 }
881
882
883
887 static SkipResult skipUntil(Reader input, int c, char target) throws IOException {
888 while (c != -1 && c != target) {
889 c = input.read();
890 }
891 if (c == -1) {
892 return SkipResult.EOF;
893 } else {
894 return SkipResult.FOUND;
895 }
896 }
897
898
899 private void relax(boolean[] flags, String relaxedChars) {
900 if (relaxedChars != null && relaxedChars.length() > 0) {
901 char[] chars = relaxedChars.toCharArray();
902 for (char c : chars) {
903 if (isRelaxable(c)) {
904 flags[c] = true;
905 IS_NOT_REQUEST_TARGET[c] = false;
906 }
907 }
908 }
909 }
910
911
912 private enum DomainParseState {
913 NEW( true, false, false, false, "http.invalidCharacterDomain.atStart"),
914 ALPHA( true, true, true, true, "http.invalidCharacterDomain.afterLetter"),
915 NUMERIC( true, true, true, true, "http.invalidCharacterDomain.afterNumber"),
916 PERIOD( true, false, false, true, "http.invalidCharacterDomain.afterPeriod"),
917 HYPHEN( true, true, false, false, "http.invalidCharacterDomain.afterHyphen"),
918 COLON( false, false, false, false, "http.invalidCharacterDomain.afterColon"),
919 END( false, false, false, false, "http.invalidCharacterDomain.atEnd");
920
921 private final boolean mayContinue;
922 private final boolean allowsHyphen;
923 private final boolean allowsPeriod;
924 private final boolean allowsEnd;
925 private final String errorMsg;
926
927 private DomainParseState(boolean mayContinue, boolean allowsHyphen, boolean allowsPeriod,
928 boolean allowsEnd, String errorMsg) {
929 this.mayContinue = mayContinue;
930 this.allowsHyphen = allowsHyphen;
931 this.allowsPeriod = allowsPeriod;
932 this.allowsEnd = allowsEnd;
933 this.errorMsg = errorMsg;
934 }
935
936 public boolean mayContinue() {
937 return mayContinue;
938 }
939
940 public DomainParseState next(int c) {
941 if (c == -1) {
942 if (allowsEnd) {
943 return END;
944 } else {
945 throw new IllegalArgumentException(
946 sm.getString("http.invalidSegmentEndState", this.name()));
947 }
948 } else if (HttpParser.isAlpha(c)) {
949 return ALPHA;
950 } else if (HttpParser.isNumeric(c)) {
951 return NUMERIC;
952 } else if (c == '.') {
953 if (allowsPeriod) {
954 return PERIOD;
955 } else {
956 throw new IllegalArgumentException(sm.getString(errorMsg,
957 Character.toString((char) c)));
958 }
959 } else if (c == ':') {
960 if (allowsEnd) {
961 return COLON;
962 } else {
963 throw new IllegalArgumentException(sm.getString(errorMsg,
964 Character.toString((char) c)));
965 }
966 } else if (c == '-') {
967 if (allowsHyphen) {
968 return HYPHEN;
969 } else {
970 throw new IllegalArgumentException(sm.getString(errorMsg,
971 Character.toString((char) c)));
972 }
973 } else {
974 throw new IllegalArgumentException(sm.getString(
975 "http.illegalCharacterDomain", Character.toString((char) c)));
976 }
977 }
978 }
979 }
980