Monitoring JavaMelody sur /demo

1 /*


2  * Licensed to the Apache Software Foundation (ASF) under one or more


3  * contributor license agreements.  See the NOTICE file distributed with


4  * this work for additional information regarding copyright ownership.


5  * The ASF licenses this file to You under the Apache License, Version 2.0


6  * (the "License"); you may not use this file except in compliance with


7  * the License.  You may obtain a copy of the License at


8  *


9  *      http://www.apache.org/licenses/LICENSE-2.0


10  *


11  * Unless required by applicable law or agreed to in writing, software


12  * distributed under the License is distributed on an "AS IS" BASIS,


13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.


14  * See the License for the specific language governing permissions and


15  * limitations under the License.


16  */


17 package org.apache.tomcat.util.http.parser;


18 


19 import java.io.IOException;


20 import java.io.Reader;


21 


22 import org.apache.tomcat.util.res.StringManager;


23 


24 /**


25  * HTTP header value parser implementation. Parsing HTTP headers as per RFC2616


26  * is not always as simple as it first appears. For headers that only use tokens


27  * the simple approach will normally be sufficient. However, for the other


28  * headers, while simple code meets 99.9% of cases, there are often some edge


29  * cases that make things far more complicated.


30  *


31  * The purpose of this parser is to let the parser worry about the edge cases.


32  * It provides tolerant (where safe to do so) parsing of HTTP header values


33  * assuming that wrapped header lines have already been unwrapped. (The Tomcat


34  * header processing code does the unwrapping.)


35  *


36  */


37 public class HttpParser {


38 


39     private static final StringManager sm = StringManager.getManager(HttpParser.class);


40 


41     private static final int ARRAY_SIZE = 128;


42 


43     private static final boolean[] IS_CONTROL = new boolean[ARRAY_SIZE];


44     private static final boolean[] IS_SEPARATOR = new boolean[ARRAY_SIZE];


45     private static final boolean[] IS_TOKEN = new boolean[ARRAY_SIZE];


46     private static final boolean[] IS_HEX = new boolean[ARRAY_SIZE];


47     private static final boolean[] IS_HTTP_PROTOCOL = new boolean[ARRAY_SIZE];


48     private static final boolean[] IS_ALPHA = new boolean[ARRAY_SIZE];


49     private static final boolean[] IS_NUMERIC = new boolean[ARRAY_SIZE];


50     private static final boolean[] IS_UNRESERVED = new boolean[ARRAY_SIZE];


51     private static final boolean[] IS_SUBDELIM = new boolean[ARRAY_SIZE];


52     private static final boolean[] IS_USERINFO = new boolean[ARRAY_SIZE];


53     private static final boolean[] IS_RELAXABLE = new boolean[ARRAY_SIZE];


54 


55     private static final HttpParser DEFAULT;


56 


57 


58     static {


59         for (int i = 0; i < ARRAY_SIZE; i++) {


60             // Control> 0-31, 127


61             if (i < 32 || i == 127) {


62                 IS_CONTROL[i] = true;


63             }


64 


65             // Separator


66             if (    i == '(' || i == ')' || i == '<' || i == '>'  || i == '@'  ||


67                     i == ',' || i == ';' || i == ':' || i == '\\' || i == '\"' ||


68                     i == '/' || i == '[' || i == ']' || i == '?'  || i == '='  ||


69                     i == '{' || i == '}' || i == ' ' || i == '\t') {


70                 IS_SEPARATOR[i] = true;


71             }


72 


73             // Token: Anything 0-127 that is not a control and not a separator


74             if (!IS_CONTROL[i] && !IS_SEPARATOR[i] && i < 128) {


75                 IS_TOKEN[i] = true;


76             }


77 


78             // Hex: 0-9, a-f, A-F


79             if ((i >= '0' && i <='9') || (i >= 'a' && i <= 'f') || (i >= 'A' && i <= 'F')) {


80                 IS_HEX[i] = true;


81             }


82 


83             // Not valid for HTTP protocol


84             // "HTTP/" DIGIT "." DIGIT


85             if (i == 'H' || i == 'T' || i == 'P' || i == '/' || i == '.' || (i >= '0' && i <= '9')) {


86                 IS_HTTP_PROTOCOL[i] = true;


87             }


88 


89             if (i >= '0' && i <= '9') {


90                 IS_NUMERIC[i] = true;


91             }


92 


93             if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z') {


94                 IS_ALPHA[i] = true;


95             }


96 


97             if (IS_ALPHA[i] || IS_NUMERIC[i] || i == '-' || i == '.' || i == '_' || i == '~') {


98                 IS_UNRESERVED[i] = true;


99             }


100 


101             if (i == '!' || i == '$' || i == '&' || i == '\'' || i == '(' || i == ')' || i == '*' ||


102                     i == '+' || i == ',' || i == ';' || i == '=') {


103                 IS_SUBDELIM[i] = true;


104             }


105 


106             // userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )


107             if (IS_UNRESERVED[i] || i == '%' || IS_SUBDELIM[i] || i == ':') {


108                 IS_USERINFO[i] = true;


109             }


110 


111             // The characters that are normally not permitted for which the


112             // restrictions may be relaxed when used in the path and/or query


113             // string


114             if (i == '\"' || i == '<' || i == '>' || i == '[' || i == '\\' || i == ']' ||


115                     i == '^' || i == '`'  || i == '{' || i == '|' || i == '}') {


116                 IS_RELAXABLE[i] = true;


117             }


118         }


119 


120         DEFAULT = new HttpParser(null, null);


121     }


122 


123 


124     private final boolean[] IS_NOT_REQUEST_TARGET = new boolean[ARRAY_SIZE];


125     private final boolean[] IS_ABSOLUTEPATH_RELAXED = new boolean[ARRAY_SIZE];


126     private final boolean[] IS_QUERY_RELAXED = new boolean[ARRAY_SIZE];


127 


128 


129     public HttpParser(String relaxedPathChars, String relaxedQueryChars) {


130         for (int i = 0; i < ARRAY_SIZE; i++) {


131             // Not valid for request target.


132             // Combination of multiple rules from RFC7230 and RFC 3986. Must be


133             // ASCII, no controls plus a few additional characters excluded


134             if (IS_CONTROL[i] ||


135                     i == ' ' || i == '\"' || i == '#' || i == '<' || i == '>' || i == '\\' ||


136                     i == '^' || i == '`'  || i == '{' || i == '|' || i == '}') {


137                 IS_NOT_REQUEST_TARGET[i] = true;


138             }


139 


140             /*


141              * absolute-path  = 1*( "/" segment )


142              * segment        = *pchar


143              * pchar          = unreserved / pct-encoded / sub-delims / ":" / "@"


144              *


145              * Note pchar allows everything userinfo allows plus "@"


146              */


147             if (IS_USERINFO[i] || i == '@' || i == '/') {


148                 IS_ABSOLUTEPATH_RELAXED[i] = true;


149             }


150 


151             /*


152              * query          = *( pchar / "/" / "?" )


153              *


154              * Note query allows everything absolute-path allows plus "?"


155              */


156             if (IS_ABSOLUTEPATH_RELAXED[i] || i == '?') {


157                 IS_QUERY_RELAXED[i] = true;


158             }


159         }


160 


161         relax(IS_ABSOLUTEPATH_RELAXED, relaxedPathChars);


162         relax(IS_QUERY_RELAXED, relaxedQueryChars);


163     }


164 


165 


166     public boolean isNotRequestTargetRelaxed(int c) {


167         // Fast for valid request target characters, slower for some incorrect


168         // ones


169         try {


170             return IS_NOT_REQUEST_TARGET[c];


171         } catch (ArrayIndexOutOfBoundsException ex) {


172             return true;


173         }


174     }


175 


176 


177     public boolean isAbsolutePathRelaxed(int c) {


178         // Fast for valid user info characters, slower for some incorrect


179         // ones


180         try {


181             return IS_ABSOLUTEPATH_RELAXED[c];


182         } catch (ArrayIndexOutOfBoundsException ex) {


183             return false;


184         }


185     }


186 


187 


188     public boolean isQueryRelaxed(int c) {


189         // Fast for valid user info characters, slower for some incorrect


190         // ones


191         try {


192             return IS_QUERY_RELAXED[c];


193         } catch (ArrayIndexOutOfBoundsException ex) {


194             return false;


195         }


196     }


197 


198 


199     public static String unquote(String input) {


200         if (input == null || input.length() < 2) {


201             return input;


202         }


203 


204         int start;


205         int end;


206 


207         // Skip surrounding quotes if there are any


208         if (input.charAt(0) == '"') {


209             start = 1;


210             end = input.length() - 1;


211         } else {


212             start = 0;


213             end = input.length();


214         }


215 


216         StringBuilder result = new StringBuilder();


217         for (int i = start ; i < end; i++) {


218             char c = input.charAt(i);


219             if (input.charAt(i) == '\\') {


220                 i++;


221                 result.append(input.charAt(i));


222             } else {


223                 result.append(c);


224             }


225         }


226         return result.toString();


227     }


228 


229 


230     public static boolean isToken(int c) {


231         // Fast for correct values, slower for incorrect ones


232         try {


233             return IS_TOKEN[c];


234         } catch (ArrayIndexOutOfBoundsException ex) {


235             return false;


236         }


237     }


238 


239 


240     public static boolean isHex(int c) {


241         // Fast for correct values, slower for some incorrect ones


242         try {


243             return IS_HEX[c];


244         } catch (ArrayIndexOutOfBoundsException ex) {


245             return false;


246         }


247     }


248 


249 


250     public static boolean isNotRequestTarget(int c) {


251         return DEFAULT.isNotRequestTargetRelaxed(c);


252     }


253 


254 


255     public static boolean isHttpProtocol(int c) {


256         // Fast for valid HTTP protocol characters, slower for some incorrect


257         // ones


258         try {


259             return IS_HTTP_PROTOCOL[c];


260         } catch (ArrayIndexOutOfBoundsException ex) {


261             return false;


262         }


263     }


264 


265 


266     public static boolean isAlpha(int c) {


267         // Fast for valid alpha characters, slower for some incorrect


268         // ones


269         try {


270             return IS_ALPHA[c];


271         } catch (ArrayIndexOutOfBoundsException ex) {


272             return false;


273         }


274     }


275 


276 


277     public static boolean isNumeric(int c) {


278         // Fast for valid numeric characters, slower for some incorrect


279         // ones


280         try {


281             return IS_NUMERIC[c];


282         } catch (ArrayIndexOutOfBoundsException ex) {


283             return false;


284         }


285     }


286 


287 


288     public static boolean isUserInfo(int c) {


289         // Fast for valid user info characters, slower for some incorrect


290         // ones


291         try {


292             return IS_USERINFO[c];


293         } catch (ArrayIndexOutOfBoundsException ex) {


294             return false;


295         }


296     }


297 


298 


299     private static boolean isRelaxable(int c) {


300         // Fast for valid user info characters, slower for some incorrect


301         // ones


302         try {


303             return IS_RELAXABLE[c];


304         } catch (ArrayIndexOutOfBoundsException ex) {


305             return false;


306         }


307     }


308 


309 


310     public static boolean isAbsolutePath(int c) {


311         return DEFAULT.isAbsolutePathRelaxed(c);


312     }


313 


314 


315     public static boolean isQuery(int c) {


316         return DEFAULT.isQueryRelaxed(c);


317     }


318 


319 


320     public static boolean isControl(int c) {


321         // Fast for valid control characters, slower for some incorrect


322         // ones


323         try {


324             return IS_CONTROL[c];


325         } catch (ArrayIndexOutOfBoundsException ex) {


326             return false;


327         }


328     }


329 


330 


331     // Skip any LWS and position to read the next character. The next character


332     // is returned as being able to 'peek()' it allows a small optimisation in


333     // some cases.


334     static int skipLws(Reader input) throws IOException {


335 


336         input.mark(1);


337         int c = input.read();


338 


339         while (c == 32 || c == 9 || c == 10 || c == 13) {


340             input.mark(1);


341             c = input.read();


342         }


343 


344         input.reset();


345         return c;


346     }


347 


348     static SkipResult skipConstant(Reader input, String constant) throws IOException {


349         int len = constant.length();


350 


351         skipLws(input);


352         input.mark(len);


353         int c = input.read();


354 


355         for (int i = 0; i < len; i++) {


356             if (i == 0 && c == -1) {


357                 return SkipResult.EOF;


358             }


359             if (c != constant.charAt(i)) {


360                 input.reset();


361                 return SkipResult.NOT_FOUND;


362             }


363             if (i != (len - 1)) {


364                 c = input.read();


365             }


366         }


367         return SkipResult.FOUND;


368     }


369 


370     /**


371      * @return  the token if one was found, the empty string if no data was


372      *          available to read or <code>null</code> if data other than a


373      *          token was found


374      */


375     static String readToken(Reader input) throws IOException {


376         StringBuilder result = new StringBuilder();


377 


378         skipLws(input);


379         input.mark(1);


380         int c = input.read();


381 


382         while (c != -1 && isToken(c)) {


383             result.append((char) c);


384             input.mark(1);


385             c = input.read();


386         }


387         // Use mark(1)/reset() rather than skip(-1) since skip() is a NOP


388         // once the end of the String has been reached.


389         input.reset();


390 


391         if (c != -1 && result.length() == 0) {


392             return null;


393         } else {


394             return result.toString();


395         }


396     }


397 


398     /**


399      * @return  the digits if any were found, the empty string if no data was


400      *          found or if data other than digits was found


401      */


402     static String readDigits(Reader input) throws IOException {


403         StringBuilder result = new StringBuilder();


404 


405         skipLws(input);


406         input.mark(1);


407         int c = input.read();


408 


409         while (c != -1 && isNumeric(c)) {


410             result.append((char) c);


411             input.mark(1);


412             c = input.read();


413         }


414         // Use mark(1)/reset() rather than skip(-1) since skip() is a NOP


415         // once the end of the String has been reached.


416         input.reset();


417 


418         return result.toString();


419     }


420 


421     /**


422      * @return  the number if digits were found, -1 if no data was found


423      *          or if data other than digits was found


424      */


425     static long readLong(Reader input) throws IOException {


426         String digits = readDigits(input);


427 


428         if (digits.length() == 0) {


429             return -1;


430         }


431 


432         return Long.parseLong(digits);


433     }


434 


435     /**


436      * @return the quoted string if one was found, null if data other than a


437      *         quoted string was found or null if the end of data was reached


438      *         before the quoted string was terminated


439      */


440     static String readQuotedString(Reader input, boolean returnQuoted) throws IOException {


441 


442         skipLws(input);


443         int c = input.read();


444 


445         if (c != '"') {


446             return null;


447         }


448 


449         StringBuilder result = new StringBuilder();


450         if (returnQuoted) {


451             result.append('\"');


452         }


453         c = input.read();


454 


455         while (c != '"') {


456             if (c == -1) {


457                 return null;


458             } else if (c == '\\') {


459                 c = input.read();


460                 if (returnQuoted) {


461                     result.append('\\');


462                 }


463                 result.append((char) c);


464             } else {


465                 result.append((char) c);


466             }


467             c = input.read();


468         }


469         if (returnQuoted) {


470             result.append('\"');


471         }


472 


473         return result.toString();


474     }


475 


476     static String readTokenOrQuotedString(Reader input, boolean returnQuoted)


477             throws IOException {


478 


479         // Peek at next character to enable correct method to be called


480         int c = skipLws(input);


481 


482         if (c == '"') {


483             return readQuotedString(input, returnQuoted);


484         } else {


485             return readToken(input);


486         }


487     }


488 


489     /**


490      * Token can be read unambiguously with or without surrounding quotes so


491      * this parsing method for token permits optional surrounding double quotes.


492      * This is not defined in any RFC. It is a special case to handle data from


493      * buggy clients (known buggy clients for DIGEST auth include Microsoft IE 8


494      * &amp; 9, Apple Safari for OSX and iOS) that add quotes to values that


495      * should be tokens.


496      *


497      * @return the token if one was found, null if data other than a token or


498      *         quoted token was found or null if the end of data was reached


499      *         before a quoted token was terminated


500      */


501     static String readQuotedToken(Reader input) throws IOException {


502 


503         StringBuilder result = new StringBuilder();


504         boolean quoted = false;


505 


506         skipLws(input);


507         input.mark(1);


508         int c = input.read();


509 


510         if (c == '"') {


511             quoted = true;


512         } else if (c == -1 || !isToken(c)) {


513             return null;


514         } else {


515             result.append((char) c);


516         }


517         input.mark(1);


518         c = input.read();


519 


520         while (c != -1 && isToken(c)) {


521             result.append((char) c);


522             input.mark(1);


523             c = input.read();


524         }


525 


526         if (quoted) {


527             if (c != '"') {


528                 return null;


529             }


530         } else {


531             // Use mark(1)/reset() rather than skip(-1) since skip() is a NOP


532             // once the end of the String has been reached.


533             input.reset();


534         }


535 


536         if (c != -1 && result.length() == 0) {


537             return null;


538         } else {


539             return result.toString();


540         }


541     }


542 


543     /**


544      * LHEX can be read unambiguously with or without surrounding quotes so this


545      * parsing method for LHEX permits optional surrounding double quotes. Some


546      * buggy clients (libwww-perl for DIGEST auth) are known to send quoted LHEX


547      * when the specification requires just LHEX.


548      *


549      * <p>


550      * LHEX are, literally, lower-case hexadecimal digits. This implementation


551      * allows for upper-case digits as well, converting the returned value to


552      * lower-case.


553      *


554      * @return  the sequence of LHEX (minus any surrounding quotes) if any was


555      *          found, or <code>null</code> if data other LHEX was found


556      */


557     static String readLhex(Reader input) throws IOException {


558 


559         StringBuilder result = new StringBuilder();


560         boolean quoted = false;


561 


562         skipLws(input);


563         input.mark(1);


564         int c = input.read();


565 


566         if (c == '"') {


567             quoted = true;


568         } else if (c == -1 || !isHex(c)) {


569             return null;


570         } else {


571             if ('A' <= c && c <= 'F') {


572                 c -= ('A' - 'a');


573             }


574             result.append((char) c);


575         }


576         input.mark(1);


577         c = input.read();


578 


579         while (c != -1 && isHex(c)) {


580             if ('A' <= c && c <= 'F') {


581                 c -= ('A' - 'a');


582             }


583             result.append((char) c);


584             input.mark(1);


585             c = input.read();


586         }


587 


588         if (quoted) {


589             if (c != '"') {


590                 return null;


591             }


592         } else {


593             // Use mark(1)/reset() rather than skip(-1) since skip() is a NOP


594             // once the end of the String has been reached.


595             input.reset();


596         }


597 


598         if (c != -1 && result.length() == 0) {


599             return null;


600         } else {


601             return result.toString();


602         }


603     }


604 


605     static double readWeight(Reader input, char delimiter) throws IOException {


606         skipLws(input);


607         int c = input.read();


608         if (c == -1 || c == delimiter) {


609             // No q value just whitespace


610             return 1;


611         } else if (c != 'q') {


612             // Malformed. Use quality of zero so it is dropped.


613             skipUntil(input, c, delimiter);


614             return 0;


615         }


616         // RFC 7231 does not allow whitespace here but be tolerant


617         skipLws(input);


618         c = input.read();


619         if (c != '=') {


620             // Malformed. Use quality of zero so it is dropped.


621             skipUntil(input, c, delimiter);


622             return 0;


623         }


624 


625         // RFC 7231 does not allow whitespace here but be tolerant


626         skipLws(input);


627         c = input.read();


628 


629         // Should be no more than 3 decimal places


630         StringBuilder value = new StringBuilder(5);


631         int decimalPlacesRead = -1;


632 


633         if (c == '0' || c == '1') {


634             value.append((char) c);


635             c = input.read();


636 


637             while (true) {


638                 if (decimalPlacesRead == -1 && c == '.') {


639                     value.append('.');


640                     decimalPlacesRead = 0;


641                 } else if (decimalPlacesRead > -1 && c >= '0' && c <= '9') {


642                     if (decimalPlacesRead < 3) {


643                         value.append((char) c);


644                         decimalPlacesRead++;


645                     }


646                 } else {


647                     break;


648                 }


649                 c = input.read();


650             }


651         } else {


652             // Malformed. Use quality of zero so it is dropped and skip until


653             // EOF or the next delimiter


654             skipUntil(input, c, delimiter);


655             return 0;


656         }


657 


658         if (c == 9 || c == 32) {


659             skipLws(input);


660             c = input.read();


661         }


662 


663         // Must be at delimiter or EOF


664         if (c != delimiter && c != -1) {


665             // Malformed. Use quality of zero so it is dropped and skip until


666             // EOF or the next delimiter


667             skipUntil(input, c, delimiter);


668             return 0;


669         }


670 


671         double result = Double.parseDouble(value.toString());


672         if (result > 1) {


673             return 0;


674         }


675         return result;


676     }


677 


678 


679     /**


680      * @return If inIPv6 is false, the position of ':' that separates the host


681      *         from the port or -1 if it is not present. If inIPv6 is true, the


682      *         number of characters read


683      */


684     static int readHostIPv4(Reader reader, boolean inIPv6) throws IOException {


685         int octet = -1;


686         int octetCount = 1;


687         int c;


688         int pos = 0;


689 


690         // readAheadLimit doesn't matter as all the readers passed to this


691         // method buffer the entire content.


692         reader.mark(1);


693         do {


694             c = reader.read();


695             if (c == '.') {


696                 if (octet > -1 && octet < 256) {


697                     // Valid


698                     octetCount++;


699                     octet = -1;


700                 } else if (inIPv6 || octet == -1) {


701                     throw new IllegalArgumentException(


702                             sm.getString("http.invalidOctet", Integer.toString(octet)));


703                 } else {


704                     // Might not be an IPv4 address. Could be a host / FQDN with


705                     // a fully numeric component.


706                     reader.reset();


707                     return readHostDomainName(reader);


708                 }


709             } else if (isNumeric(c)) {


710                 if (octet == -1) {


711                     octet = c - '0';


712                 } else if (octet == 0) {


713                     // Leading zero in non-zero octet. Not valid (ambiguous).


714                     if (inIPv6) {


715                         throw new IllegalArgumentException(sm.getString("http.invalidLeadingZero"));


716                     } else {


717                         // Could be a host/FQDN


718                         reader.reset();


719                         return readHostDomainName(reader);


720                     }


721                 } else {


722                     octet = octet * 10 + c - '0';


723                 }


724             } else if (c == ':') {


725                 break;


726             } else if (c == -1) {


727                 if (inIPv6) {


728                     throw new IllegalArgumentException(sm.getString("http.noClosingBracket"));


729                 } else {


730                     pos = -1;


731                     break;


732                 }


733             } else if (c == ']') {


734                 if (inIPv6) {


735                     pos++;


736                     break;


737                 } else {


738                     throw new IllegalArgumentException(sm.getString("http.closingBracket"));


739                 }


740             } else if (!inIPv6 && (isAlpha(c) || c == '-')) {


741                 // Go back to the start and parse as a host / FQDN


742                 reader.reset();


743                 return readHostDomainName(reader);


744             } else {


745                 throw new IllegalArgumentException(sm.getString(


746                         "http.illegalCharacterIpv4", Character.toString((char) c)));


747             }


748             pos++;


749         } while (true);


750 


751         if (octetCount != 4 || octet < 0 || octet > 255) {


752             // Might not be an IPv4 address. Could be a host name or a FQDN with


753             // fully numeric components. Go back to the start and parse as a


754             // host / FQDN.


755             reader.reset();


756             return readHostDomainName(reader);


757         }


758 


759         return pos;


760     }


761 


762 


763     /**


764      * @return The position of ':' that separates the host from the port or -1


765      *         if it is not present


766      */


767     static int readHostIPv6(Reader reader) throws IOException {


768         // Must start with '['


769         int c = reader.read();


770         if (c != '[') {


771             throw new IllegalArgumentException(sm.getString("http.noOpeningBracket"));


772         }


773 


774         int h16Count = 0;


775         int h16Size = 0;


776         int pos = 1;


777         boolean parsedDoubleColon = false;


778         int precedingColonsCount = 0;


779 


780         do {


781             c = reader.read();


782             if (h16Count == 0 && precedingColonsCount == 1 && c != ':') {


783                 // Can't start with a single :


784                 throw new IllegalArgumentException(sm.getString("http.singleColonStart"));


785             }


786             if (HttpParser.isHex(c)) {


787                 if (h16Size == 0) {


788                     // Start of a new h16 block


789                     precedingColonsCount = 0;


790                     h16Count++;


791                 }


792                 h16Size++;


793                 if (h16Size > 4) {


794                     throw new IllegalArgumentException(sm.getString("http.invalidHextet"));


795                 }


796             } else if (c == ':') {


797                 if (precedingColonsCount >=2 ) {


798                     // ::: is not allowed


799                     throw new IllegalArgumentException(sm.getString("http.tooManyColons"));


800                 } else {


801                     if(precedingColonsCount == 1) {


802                         // End of ::


803                         if (parsedDoubleColon ) {


804                             // Only allowed one :: sequence


805                             throw new IllegalArgumentException(


806                                     sm.getString("http.tooManyDoubleColons"));


807                         }


808                         parsedDoubleColon = true;


809                         // :: represents at least one h16 block


810                         h16Count++;


811                     }


812                     precedingColonsCount++;


813                     // mark if the next symbol is hex before the actual read


814                     reader.mark(4);


815                 }


816                 h16Size = 0;


817             } else if (c == ']') {


818                 if (precedingColonsCount == 1) {


819                     // Can't end on a single ':'


820                     throw new IllegalArgumentException(sm.getString("http.singleColonEnd"));


821                 }


822                 pos++;


823                 break;


824             } else if (c == '.') {


825                 if (h16Count == 7 || h16Count < 7 && parsedDoubleColon) {


826                     reader.reset();


827                     pos -= h16Size;


828                     pos += readHostIPv4(reader, true);


829                     h16Count++;


830                     break;


831                 } else {


832                     throw new IllegalArgumentException(sm.getString("http.invalidIpv4Location"));


833                 }


834             } else {


835                 throw new IllegalArgumentException(sm.getString(


836                         "http.illegalCharacterIpv6", Character.toString((char) c)));


837             }


838             pos++;


839         } while (true);


840 


841         if (h16Count > 8) {


842             throw new IllegalArgumentException(


843                     sm.getString("http.tooManyHextets", Integer.toString(h16Count)));


844         } else if (h16Count != 8 && !parsedDoubleColon) {


845             throw new IllegalArgumentException(


846                     sm.getString("http.tooFewHextets", Integer.toString(h16Count)));


847         }


848 


849         c = reader.read();


850         if (c == ':') {


851             return pos;


852         } else {


853             if(c == -1) {


854                 return -1;


855             }


856             throw new IllegalArgumentException(


857                     sm.getString("http.illegalAfterIpv6", Character.toString((char) c)));


858         }


859     }


860 


861     /**


862      * @return The position of ':' that separates the host from the port or -1


863      *         if it is not present


864      */


865     static int readHostDomainName(Reader reader) throws IOException {


866         DomainParseState state = DomainParseState.NEW;


867         int pos = 0;


868 


869         while (state.mayContinue()) {


870             state = state.next(reader.read());


871             pos++;


872         }


873 


874         if (DomainParseState.COLON == state) {


875             // State identifies the state of the previous character


876             return pos - 1;


877         } else {


878             return -1;


879         }


880     }


881 


882 


883     /**


884      * Skips all characters until EOF or the specified target is found. Normally


885      * used to skip invalid input until the next separator.


886      */


887     static SkipResult skipUntil(Reader input, int c, char target) throws IOException {


888         while (c != -1 && c != target) {


889             c = input.read();


890         }


891         if (c == -1) {


892             return SkipResult.EOF;


893         } else {


894             return SkipResult.FOUND;


895         }


896     }


897 


898 


899     private void relax(boolean[] flags, String relaxedChars) {


900         if (relaxedChars != null && relaxedChars.length() > 0) {


901             char[] chars = relaxedChars.toCharArray();


902             for (char c : chars) {


903                 if (isRelaxable(c)) {


904                     flags[c] = true;


905                     IS_NOT_REQUEST_TARGET[c] = false;


906                 }


907             }


908         }


909     }


910 


911 


912     private enum DomainParseState {


913         NEW(     true, false, false, false, "http.invalidCharacterDomain.atStart"),


914         ALPHA(   true,  true,  true,  true, "http.invalidCharacterDomain.afterLetter"),


915         NUMERIC( true,  true,  true,  true, "http.invalidCharacterDomain.afterNumber"),


916         PERIOD(  true, false, false,  true, "http.invalidCharacterDomain.afterPeriod"),


917         HYPHEN(  true,  true, false, false, "http.invalidCharacterDomain.afterHyphen"),


918         COLON(  false, false, false, false, "http.invalidCharacterDomain.afterColon"),


919         END(    false, false, false, false, "http.invalidCharacterDomain.atEnd");


920 


921         private final boolean mayContinue;


922         private final boolean allowsHyphen;


923         private final boolean allowsPeriod;


924         private final boolean allowsEnd;


925         private final String errorMsg;


926 


927         private DomainParseState(boolean mayContinue, boolean allowsHyphen, boolean allowsPeriod,


928                 boolean allowsEnd, String errorMsg) {


929             this.mayContinue = mayContinue;


930             this.allowsHyphen = allowsHyphen;


931             this.allowsPeriod = allowsPeriod;


932             this.allowsEnd = allowsEnd;


933             this.errorMsg = errorMsg;


934         }


935 


936         public boolean mayContinue() {


937             return mayContinue;


938         }


939 


940         public DomainParseState next(int c) {


941             if (c == -1) {


942                 if (allowsEnd) {


943                     return END;


944                 } else {


945                     throw new IllegalArgumentException(


946                             sm.getString("http.invalidSegmentEndState", this.name()));


947                 }


948             } else if (HttpParser.isAlpha(c)) {


949                 return ALPHA;


950             } else if (HttpParser.isNumeric(c)) {


951                 return NUMERIC;


952             } else if (c == '.') {


953                 if (allowsPeriod) {


954                     return PERIOD;


955                 } else {


956                     throw new IllegalArgumentException(sm.getString(errorMsg,


957                             Character.toString((char) c)));


958                 }


959             } else if (c == ':') {


960                 if (allowsEnd) {


961                     return COLON;


962                 } else {


963                     throw new IllegalArgumentException(sm.getString(errorMsg,


964                             Character.toString((char) c)));


965                 }


966             } else if (c == '-') {


967                 if (allowsHyphen) {


968                     return HYPHEN;


969                 } else {


970                     throw new IllegalArgumentException(sm.getString(errorMsg,


971                             Character.toString((char) c)));


972                 }


973             } else {


974                 throw new IllegalArgumentException(sm.getString(


975                         "http.illegalCharacterDomain", Character.toString((char) c)));


976             }


977         }


978     }


979 }


980