Monitoring JavaMelody sur /demo

1 /*


2  * Licensed to the Apache Software Foundation (ASF) under one or more


3  * contributor license agreements.  See the NOTICE file distributed with


4  * this work for additional information regarding copyright ownership.


5  * The ASF licenses this file to You under the Apache License, Version 2.0


6  * (the "License"); you may not use this file except in compliance with


7  * the License.  You may obtain a copy of the License at


8  *


9  *      http://www.apache.org/licenses/LICENSE-2.0


10  *


11  * Unless required by applicable law or agreed to in writing, software


12  * distributed under the License is distributed on an "AS IS" BASIS,


13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.


14  * See the License for the specific language governing permissions and


15  * limitations under the License.


16  */


17 package org.apache.tomcat.util.buf;


18 


19 import java.nio.ByteBuffer;


20 import java.nio.CharBuffer;


21 import java.nio.charset.CharsetDecoder;


22 import java.nio.charset.CoderResult;


23 import java.nio.charset.StandardCharsets;


24 


25 /**


26  * Decodes bytes to UTF-8. Extracted from Apache Harmony and modified to reject


27  * code points from U+D800 to U+DFFF as per RFC3629. The standard Java decoder


28  * does not reject these. It has also been modified to reject code points


29  * greater than U+10FFFF which the standard Java decoder rejects but the harmony


30  * one does not.


31  */


32 public class Utf8Decoder extends CharsetDecoder {


33 


34     // The next table contains information about UTF-8 charset and


35     // correspondence of 1st byte to the length of sequence


36     // For information please visit http://www.ietf.org/rfc/rfc3629.txt


37     //


38     // Please note, o means 0, actually.


39     // -------------------------------------------------------------------


40     // 0 1 2 3 Value


41     // -------------------------------------------------------------------


42     // oxxxxxxx                            00000000 00000000 0xxxxxxx


43     // 11oyyyyy 1oxxxxxx                   00000000 00000yyy yyxxxxxx


44     // 111ozzzz 1oyyyyyy 1oxxxxxx          00000000 zzzzyyyy yyxxxxxx


45     // 1111ouuu 1ouuzzzz 1oyyyyyy 1oxxxxxx 000uuuuu zzzzyyyy yyxxxxxx


46     private static final int remainingBytes[] = {


47             // 1owwwwww


48             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,


49             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,


50             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,


51             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,


52             // 11oyyyyy


53             -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,


54             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,


55             // 111ozzzz


56             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,


57             // 1111ouuu


58             3, 3, 3, 3, 3, -1, -1, -1,


59             // > 11110111


60             -1, -1, -1, -1, -1, -1, -1, -1};


61     private static final int remainingNumbers[] = {0, // 0 1 2 3


62             4224, // (01o00000b << 6)+(1o000000b)


63             401536, // (011o0000b << 12)+(1o000000b << 6)+(1o000000b)


64             29892736 // (0111o000b << 18)+(1o000000b << 12)+(1o000000b <<


65                      // 6)+(1o000000b)


66     };


67     private static final int lowerEncodingLimit[] = {-1, 0x80, 0x800, 0x10000};


68 


69 


70     public Utf8Decoder() {


71         super(StandardCharsets.UTF_8, 1.0f, 1.0f);


72     }


73 


74 


75     @Override


76     protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {


77         if (in.hasArray() && out.hasArray()) {


78             return decodeHasArray(in, out);


79         }


80         return decodeNotHasArray(in, out);


81     }


82 


83 


84     private CoderResult decodeNotHasArray(ByteBuffer in, CharBuffer out) {


85         int outRemaining = out.remaining();


86         int pos = in.position();


87         int limit = in.limit();


88         try {


89             while (pos < limit) {


90                 if (outRemaining == 0) {


91                     return CoderResult.OVERFLOW;


92                 }


93                 int jchar = in.get();


94                 if (jchar < 0) {


95                     jchar = jchar & 0x7F;


96                     int tail = remainingBytes[jchar];


97                     if (tail == -1) {


98                         return CoderResult.malformedForLength(1);


99                     }


100                     if (limit - pos < 1 + tail) {


101                         // No early test for invalid sequences here as peeking


102                         // at the next byte is harder


103                         return CoderResult.UNDERFLOW;


104                     }


105                     int nextByte;


106                     for (int i = 0; i < tail; i++) {


107                         nextByte = in.get() & 0xFF;


108                         if ((nextByte & 0xC0) != 0x80) {


109                             return CoderResult.malformedForLength(1 + i);


110                         }


111                         jchar = (jchar << 6) + nextByte;


112                     }


113                     jchar -= remainingNumbers[tail];


114                     if (jchar < lowerEncodingLimit[tail]) {


115                         // Should have been encoded in a fewer octets


116                         return CoderResult.malformedForLength(1);


117                     }


118                     pos += tail;


119                 }


120                 // Apache Tomcat added test


121                 if (jchar >= 0xD800 && jchar <= 0xDFFF) {


122                     return CoderResult.unmappableForLength(3);


123                 }


124                 // Apache Tomcat added test


125                 if (jchar > 0x10FFFF) {


126                     return CoderResult.unmappableForLength(4);


127                 }


128                 if (jchar <= 0xffff) {


129                     out.put((char) jchar);


130                     outRemaining--;


131                 } else {


132                     if (outRemaining < 2) {


133                         return CoderResult.OVERFLOW;


134                     }


135                     out.put((char) ((jchar >> 0xA) + 0xD7C0));


136                     out.put((char) ((jchar & 0x3FF) + 0xDC00));


137                     outRemaining -= 2;


138                 }


139                 pos++;


140             }


141             return CoderResult.UNDERFLOW;


142         } finally {


143             in.position(pos);


144         }


145     }


146 


147 


148     private CoderResult decodeHasArray(ByteBuffer in, CharBuffer out) {


149         int outRemaining = out.remaining();


150         int pos = in.position();


151         int limit = in.limit();


152         final byte[] bArr = in.array();


153         final char[] cArr = out.array();


154         final int inIndexLimit = limit + in.arrayOffset();


155         int inIndex = pos + in.arrayOffset();


156         int outIndex = out.position() + out.arrayOffset();


157         // if someone would change the limit in process,


158         // he would face consequences


159         for (; inIndex < inIndexLimit && outRemaining > 0; inIndex++) {


160             int jchar = bArr[inIndex];


161             if (jchar < 0) {


162                 jchar = jchar & 0x7F;


163                 // If first byte is invalid, tail will be set to -1


164                 int tail = remainingBytes[jchar];


165                 if (tail == -1) {


166                     in.position(inIndex - in.arrayOffset());


167                     out.position(outIndex - out.arrayOffset());


168                     return CoderResult.malformedForLength(1);


169                 }


170                 // Additional checks to detect invalid sequences ASAP


171                 // Checks derived from Unicode 6.2, Chapter 3, Table 3-7


172                 // Check 2nd byte


173                 int tailAvailable = inIndexLimit - inIndex - 1;


174                 if (tailAvailable > 0) {


175                     // First byte C2..DF, second byte 80..BF


176                     if (jchar > 0x41 && jchar < 0x60 &&


177                             (bArr[inIndex + 1] & 0xC0) != 0x80) {


178                         in.position(inIndex - in.arrayOffset());


179                         out.position(outIndex - out.arrayOffset());


180                         return CoderResult.malformedForLength(1);


181                     }


182                     // First byte E0, second byte A0..BF


183                     if (jchar == 0x60 && (bArr[inIndex + 1] & 0xE0) != 0xA0) {


184                         in.position(inIndex - in.arrayOffset());


185                         out.position(outIndex - out.arrayOffset());


186                         return CoderResult.malformedForLength(1);


187                     }


188                     // First byte E1..EC, second byte 80..BF


189                     if (jchar > 0x60 && jchar < 0x6D &&


190                             (bArr[inIndex + 1] & 0xC0) != 0x80) {


191                         in.position(inIndex - in.arrayOffset());


192                         out.position(outIndex - out.arrayOffset());


193                         return CoderResult.malformedForLength(1);


194                     }


195                     // First byte ED, second byte 80..9F


196                     if (jchar == 0x6D && (bArr[inIndex + 1] & 0xE0) != 0x80) {


197                         in.position(inIndex - in.arrayOffset());


198                         out.position(outIndex - out.arrayOffset());


199                         return CoderResult.malformedForLength(1);


200                     }


201                     // First byte EE..EF, second byte 80..BF


202                     if (jchar > 0x6D && jchar < 0x70 &&


203                             (bArr[inIndex + 1] & 0xC0) != 0x80) {


204                         in.position(inIndex - in.arrayOffset());


205                         out.position(outIndex - out.arrayOffset());


206                         return CoderResult.malformedForLength(1);


207                     }


208                     // First byte F0, second byte 90..BF


209                     if (jchar == 0x70 &&


210                             ((bArr[inIndex + 1] & 0xFF) < 0x90 ||


211                             (bArr[inIndex + 1] & 0xFF) > 0xBF)) {


212                         in.position(inIndex - in.arrayOffset());


213                         out.position(outIndex - out.arrayOffset());


214                         return CoderResult.malformedForLength(1);


215                     }


216                     // First byte F1..F3, second byte 80..BF


217                     if (jchar > 0x70 && jchar < 0x74 &&


218                             (bArr[inIndex + 1] & 0xC0) != 0x80) {


219                         in.position(inIndex - in.arrayOffset());


220                         out.position(outIndex - out.arrayOffset());


221                         return CoderResult.malformedForLength(1);


222                     }


223                     // First byte F4, second byte 80..8F


224                     if (jchar == 0x74 &&


225                             (bArr[inIndex + 1] & 0xF0) != 0x80) {


226                         in.position(inIndex - in.arrayOffset());


227                         out.position(outIndex - out.arrayOffset());


228                         return CoderResult.malformedForLength(1);


229                     }


230                 }


231                 // Check third byte if present and expected


232                 if (tailAvailable > 1 && tail > 1) {


233                     if ((bArr[inIndex + 2] & 0xC0) != 0x80) {


234                         in.position(inIndex - in.arrayOffset());


235                         out.position(outIndex - out.arrayOffset());


236                         return CoderResult.malformedForLength(2);


237                     }


238                 }


239                 // Check fourth byte if present and expected


240                 if (tailAvailable > 2 && tail > 2) {


241                     if ((bArr[inIndex + 3] & 0xC0) != 0x80) {


242                         in.position(inIndex - in.arrayOffset());


243                         out.position(outIndex - out.arrayOffset());


244                         return CoderResult.malformedForLength(3);


245                     }


246                 }


247                 if (tailAvailable < tail) {


248                     break;


249                 }


250                 for (int i = 0; i < tail; i++) {


251                     int nextByte = bArr[inIndex + i + 1] & 0xFF;


252                     if ((nextByte & 0xC0) != 0x80) {


253                         in.position(inIndex - in.arrayOffset());


254                         out.position(outIndex - out.arrayOffset());


255                         return CoderResult.malformedForLength(1 + i);


256                     }


257                     jchar = (jchar << 6) + nextByte;


258                 }


259                 jchar -= remainingNumbers[tail];


260                 if (jchar < lowerEncodingLimit[tail]) {


261                     // Should have been encoded in fewer octets


262                     in.position(inIndex - in.arrayOffset());


263                     out.position(outIndex - out.arrayOffset());


264                     return CoderResult.malformedForLength(1);


265                 }


266                 inIndex += tail;


267             }


268             // Apache Tomcat added test


269             if (jchar >= 0xD800 && jchar <= 0xDFFF) {


270                 return CoderResult.unmappableForLength(3);


271             }


272             // Apache Tomcat added test


273             if (jchar > 0x10FFFF) {


274                 return CoderResult.unmappableForLength(4);


275             }


276             if (jchar <= 0xffff) {


277                 cArr[outIndex++] = (char) jchar;


278                 outRemaining--;


279             } else {


280                 if (outRemaining < 2) {


281                     // Encoded with 4 bytes. inIndex currently points


282                     // to the final byte. Move it back to first byte.


283                     inIndex -= 3;


284                     in.position(inIndex - in.arrayOffset());


285                     out.position(outIndex - out.arrayOffset());


286                     return CoderResult.OVERFLOW;


287                 }


288                 cArr[outIndex++] = (char) ((jchar >> 0xA) + 0xD7C0);


289                 cArr[outIndex++] = (char) ((jchar & 0x3FF) + 0xDC00);


290                 outRemaining -= 2;


291             }


292         }


293         in.position(inIndex - in.arrayOffset());


294         out.position(outIndex - out.arrayOffset());


295         return (outRemaining == 0 && inIndex < inIndexLimit) ?


296                 CoderResult.OVERFLOW :


297                 CoderResult.UNDERFLOW;


298     }


299 }


300