1
25
26 package java.lang;
27
28 import java.io.UnsupportedEncodingException;
29 import java.lang.ref.SoftReference;
30 import java.nio.ByteBuffer;
31 import java.nio.CharBuffer;
32 import java.nio.charset.Charset;
33 import java.nio.charset.CharsetDecoder;
34 import java.nio.charset.CharsetEncoder;
35 import java.nio.charset.CharacterCodingException;
36 import java.nio.charset.CoderResult;
37 import java.nio.charset.CodingErrorAction;
38 import java.nio.charset.IllegalCharsetNameException;
39 import java.nio.charset.MalformedInputException;
40 import java.nio.charset.UnmappableCharacterException;
41 import java.nio.charset.UnsupportedCharsetException;
42 import java.util.Arrays;
43 import jdk.internal.HotSpotIntrinsicCandidate;
44 import sun.nio.cs.HistoricallyNamedCharset;
45 import sun.nio.cs.ArrayDecoder;
46 import sun.nio.cs.ArrayEncoder;
47
48 import static java.lang.String.LATIN1;
49 import static java.lang.String.UTF16;
50 import static java.lang.String.COMPACT_STRINGS;
51 import static java.lang.Character.isSurrogate;
52 import static java.lang.Character.highSurrogate;
53 import static java.lang.Character.lowSurrogate;
54 import static java.lang.Character.isSupplementaryCodePoint;
55 import static java.lang.StringUTF16.putChar;
56
57
60
61 class StringCoding {
62
63 private StringCoding() { }
64
65
66 private static final ThreadLocal<SoftReference<StringDecoder>> decoder =
67 new ThreadLocal<>();
68 private static final ThreadLocal<SoftReference<StringEncoder>> encoder =
69 new ThreadLocal<>();
70
71 private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
72 private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
73 private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
74
75 private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
76 SoftReference<T> sr = tl.get();
77 if (sr == null)
78 return null;
79 return sr.get();
80 }
81
82 private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
83 tl.set(new SoftReference<>(ob));
84 }
85
86
87 private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
88 if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
89 return ba;
90 else
91 return Arrays.copyOf(ba, len);
92 }
93
94 private static int scale(int len, float expansionFactor) {
95
96
97 return (int)(len * (double)expansionFactor);
98 }
99
100 private static Charset lookupCharset(String csn) {
101 if (Charset.isSupported(csn)) {
102 try {
103 return Charset.forName(csn);
104 } catch (UnsupportedCharsetException x) {
105 throw new Error(x);
106 }
107 }
108 return null;
109 }
110
111 static class Result {
112 byte[] value;
113 byte coder;
114
115 Result with() {
116 coder = COMPACT_STRINGS ? LATIN1 : UTF16;
117 value = new byte[0];
118 return this;
119 }
120
121 Result with(char[] val, int off, int len) {
122 if (String.COMPACT_STRINGS) {
123 byte[] bs = StringUTF16.compress(val, off, len);
124 if (bs != null) {
125 value = bs;
126 coder = LATIN1;
127 return this;
128 }
129 }
130 coder = UTF16;
131 value = StringUTF16.toBytes(val, off, len);
132 return this;
133 }
134
135 Result with(byte[] val, byte coder) {
136 this.coder = coder;
137 value = val;
138 return this;
139 }
140 }
141
142 @HotSpotIntrinsicCandidate
143 public static boolean hasNegatives(byte[] ba, int off, int len) {
144 for (int i = off; i < off + len; i++) {
145 if (ba[i] < 0) {
146 return true;
147 }
148 }
149 return false;
150 }
151
152
153 static class StringDecoder {
154 private final String requestedCharsetName;
155 private final Charset cs;
156 private final boolean isASCIICompatible;
157 private final CharsetDecoder cd;
158 protected final Result result;
159
160 StringDecoder(Charset cs, String rcn) {
161 this.requestedCharsetName = rcn;
162 this.cs = cs;
163 this.cd = cs.newDecoder()
164 .onMalformedInput(CodingErrorAction.REPLACE)
165 .onUnmappableCharacter(CodingErrorAction.REPLACE);
166 this.result = new Result();
167 this.isASCIICompatible = (cd instanceof ArrayDecoder) &&
168 ((ArrayDecoder)cd).isASCIICompatible();
169 }
170
171 String charsetName() {
172 if (cs instanceof HistoricallyNamedCharset)
173 return ((HistoricallyNamedCharset)cs).historicalName();
174 return cs.name();
175 }
176
177 final String requestedCharsetName() {
178 return requestedCharsetName;
179 }
180
181 Result decode(byte[] ba, int off, int len) {
182 if (len == 0) {
183 return result.with();
184 }
185
186 if (isASCIICompatible && !hasNegatives(ba, off, len)) {
187 if (COMPACT_STRINGS) {
188 return result.with(Arrays.copyOfRange(ba, off, off + len),
189 LATIN1);
190 } else {
191 return result.with(StringLatin1.inflate(ba, off, len), UTF16);
192 }
193 }
194 int en = scale(len, cd.maxCharsPerByte());
195 char[] ca = new char[en];
196 if (cd instanceof ArrayDecoder) {
197 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
198 return result.with(ca, 0, clen);
199 }
200 cd.reset();
201 ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
202 CharBuffer cb = CharBuffer.wrap(ca);
203 try {
204 CoderResult cr = cd.decode(bb, cb, true);
205 if (!cr.isUnderflow())
206 cr.throwException();
207 cr = cd.flush(cb);
208 if (!cr.isUnderflow())
209 cr.throwException();
210 } catch (CharacterCodingException x) {
211
212
213 throw new Error(x);
214 }
215 return result.with(ca, 0, cb.position());
216 }
217 }
218
219 static Result decode(String charsetName, byte[] ba, int off, int len)
220 throws UnsupportedEncodingException
221 {
222 StringDecoder sd = deref(decoder);
223 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
224 if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
225 || csn.equals(sd.charsetName()))) {
226 sd = null;
227 try {
228 Charset cs = lookupCharset(csn);
229 if (cs != null) {
230 if (cs == UTF_8) {
231 return decodeUTF8(ba, off, len, true);
232 }
233 if (cs == ISO_8859_1) {
234 return decodeLatin1(ba, off, len);
235 }
236 if (cs == US_ASCII) {
237 return decodeASCII(ba, off, len);
238 }
239 sd = new StringDecoder(cs, csn);
240 }
241 } catch (IllegalCharsetNameException x) {}
242 if (sd == null)
243 throw new UnsupportedEncodingException(csn);
244 set(decoder, sd);
245 }
246 return sd.decode(ba, off, len);
247 }
248
249 static Result decode(Charset cs, byte[] ba, int off, int len) {
250 if (cs == UTF_8) {
251 return decodeUTF8(ba, off, len, true);
252 }
253 if (cs == ISO_8859_1) {
254 return decodeLatin1(ba, off, len);
255 }
256 if (cs == US_ASCII) {
257 return decodeASCII(ba, off, len);
258 }
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275 CharsetDecoder cd = cs.newDecoder();
276
277 if ((cd instanceof ArrayDecoder) &&
278 ((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
279 return decodeLatin1(ba, off, len);
280 }
281 int en = scale(len, cd.maxCharsPerByte());
282 if (len == 0) {
283 return new Result().with();
284 }
285 cd.onMalformedInput(CodingErrorAction.REPLACE)
286 .onUnmappableCharacter(CodingErrorAction.REPLACE)
287 .reset();
288 char[] ca = new char[en];
289 if (cd instanceof ArrayDecoder) {
290 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
291 return new Result().with(ca, 0, clen);
292 }
293 if (cs.getClass().getClassLoader0() != null &&
294 System.getSecurityManager() != null) {
295 ba = Arrays.copyOfRange(ba, off, off + len);
296 off = 0;
297 }
298 ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
299 CharBuffer cb = CharBuffer.wrap(ca);
300 try {
301 CoderResult cr = cd.decode(bb, cb, true);
302 if (!cr.isUnderflow())
303 cr.throwException();
304 cr = cd.flush(cb);
305 if (!cr.isUnderflow())
306 cr.throwException();
307 } catch (CharacterCodingException x) {
308
309
310 throw new Error(x);
311 }
312 return new Result().with(ca, 0, cb.position());
313 }
314
315 static Result decode(byte[] ba, int off, int len) {
316 Charset cs = Charset.defaultCharset();
317 if (cs == UTF_8) {
318 return decodeUTF8(ba, off, len, true);
319 }
320 if (cs == ISO_8859_1) {
321 return decodeLatin1(ba, off, len);
322 }
323 if (cs == US_ASCII) {
324 return decodeASCII(ba, off, len);
325 }
326 StringDecoder sd = deref(decoder);
327 if (sd == null || !cs.name().equals(sd.cs.name())) {
328 sd = new StringDecoder(cs, cs.name());
329 set(decoder, sd);
330 }
331 return sd.decode(ba, off, len);
332 }
333
334
335 private static class StringEncoder {
336 private Charset cs;
337 private CharsetEncoder ce;
338 private final boolean isASCIICompatible;
339 private final String requestedCharsetName;
340 private final boolean isTrusted;
341
342 private StringEncoder(Charset cs, String rcn) {
343 this.requestedCharsetName = rcn;
344 this.cs = cs;
345 this.ce = cs.newEncoder()
346 .onMalformedInput(CodingErrorAction.REPLACE)
347 .onUnmappableCharacter(CodingErrorAction.REPLACE);
348 this.isTrusted = (cs.getClass().getClassLoader0() == null);
349 this.isASCIICompatible = (ce instanceof ArrayEncoder) &&
350 ((ArrayEncoder)ce).isASCIICompatible();
351 }
352
353 String charsetName() {
354 if (cs instanceof HistoricallyNamedCharset)
355 return ((HistoricallyNamedCharset)cs).historicalName();
356 return cs.name();
357 }
358
359 final String requestedCharsetName() {
360 return requestedCharsetName;
361 }
362
363 byte[] encode(byte coder, byte[] val) {
364
365 if (coder == LATIN1 && isASCIICompatible &&
366 !hasNegatives(val, 0, val.length)) {
367 return Arrays.copyOf(val, val.length);
368 }
369 int len = val.length >> coder;
370 int en = scale(len, ce.maxBytesPerChar());
371 byte[] ba = new byte[en];
372 if (len == 0) {
373 return ba;
374 }
375 if (ce instanceof ArrayEncoder) {
376 int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
377 : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
378 if (blen != -1) {
379 return safeTrim(ba, blen, isTrusted);
380 }
381 }
382 char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
383 : StringUTF16.toChars(val);
384 ce.reset();
385 ByteBuffer bb = ByteBuffer.wrap(ba);
386 CharBuffer cb = CharBuffer.wrap(ca, 0, len);
387 try {
388 CoderResult cr = ce.encode(cb, bb, true);
389 if (!cr.isUnderflow())
390 cr.throwException();
391 cr = ce.flush(bb);
392 if (!cr.isUnderflow())
393 cr.throwException();
394 } catch (CharacterCodingException x) {
395
396
397 throw new Error(x);
398 }
399 return safeTrim(ba, bb.position(), isTrusted);
400 }
401 }
402
403 static byte[] encode(String charsetName, byte coder, byte[] val)
404 throws UnsupportedEncodingException
405 {
406 StringEncoder se = deref(encoder);
407 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
408 if ((se == null) || !(csn.equals(se.requestedCharsetName())
409 || csn.equals(se.charsetName()))) {
410 se = null;
411 try {
412 Charset cs = lookupCharset(csn);
413 if (cs != null) {
414 if (cs == UTF_8) {
415 return encodeUTF8(coder, val, true);
416 }
417 if (cs == ISO_8859_1) {
418 return encode8859_1(coder, val);
419 }
420 if (cs == US_ASCII) {
421 return encodeASCII(coder, val);
422 }
423 se = new StringEncoder(cs, csn);
424 }
425 } catch (IllegalCharsetNameException x) {}
426 if (se == null) {
427 throw new UnsupportedEncodingException (csn);
428 }
429 set(encoder, se);
430 }
431 return se.encode(coder, val);
432 }
433
434 static byte[] encode(Charset cs, byte coder, byte[] val) {
435 if (cs == UTF_8) {
436 return encodeUTF8(coder, val, true);
437 }
438 if (cs == ISO_8859_1) {
439 return encode8859_1(coder, val);
440 }
441 if (cs == US_ASCII) {
442 return encodeASCII(coder, val);
443 }
444 CharsetEncoder ce = cs.newEncoder();
445
446 if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
447 ((ArrayEncoder)ce).isASCIICompatible() &&
448 !hasNegatives(val, 0, val.length)))) {
449 return Arrays.copyOf(val, val.length);
450 }
451 int len = val.length >> coder;
452 int en = scale(len, ce.maxBytesPerChar());
453 byte[] ba = new byte[en];
454 if (len == 0) {
455 return ba;
456 }
457 ce.onMalformedInput(CodingErrorAction.REPLACE)
458 .onUnmappableCharacter(CodingErrorAction.REPLACE)
459 .reset();
460 if (ce instanceof ArrayEncoder) {
461 int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
462 : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
463 if (blen != -1) {
464 return safeTrim(ba, blen, true);
465 }
466 }
467 boolean isTrusted = cs.getClass().getClassLoader0() == null ||
468 System.getSecurityManager() == null;
469 char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
470 : StringUTF16.toChars(val);
471 ByteBuffer bb = ByteBuffer.wrap(ba);
472 CharBuffer cb = CharBuffer.wrap(ca, 0, len);
473 try {
474 CoderResult cr = ce.encode(cb, bb, true);
475 if (!cr.isUnderflow())
476 cr.throwException();
477 cr = ce.flush(bb);
478 if (!cr.isUnderflow())
479 cr.throwException();
480 } catch (CharacterCodingException x) {
481 throw new Error(x);
482 }
483 return safeTrim(ba, bb.position(), isTrusted);
484 }
485
486 static byte[] encode(byte coder, byte[] val) {
487 Charset cs = Charset.defaultCharset();
488 if (cs == UTF_8) {
489 return encodeUTF8(coder, val, true);
490 }
491 if (cs == ISO_8859_1) {
492 return encode8859_1(coder, val);
493 }
494 if (cs == US_ASCII) {
495 return encodeASCII(coder, val);
496 }
497 StringEncoder se = deref(encoder);
498 if (se == null || !cs.name().equals(se.cs.name())) {
499 se = new StringEncoder(cs, cs.name());
500 set(encoder, se);
501 }
502 return se.encode(coder, val);
503 }
504
505
510 private static native void err(String msg);
511
512
513 private static final ThreadLocal<StringCoding.Result>
514 resultCached = new ThreadLocal<>() {
515 protected StringCoding.Result initialValue() {
516 return new StringCoding.Result();
517 }};
518
519
520
521 private static Result decodeASCII(byte[] ba, int off, int len) {
522 Result result = resultCached.get();
523 if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) {
524 return result.with(Arrays.copyOfRange(ba, off, off + len),
525 LATIN1);
526 }
527 byte[] dst = new byte[len<<1];
528 int dp = 0;
529 while (dp < len) {
530 int b = ba[off++];
531 putChar(dst, dp++, (b >= 0) ? (char)b : repl);
532 }
533 return result.with(dst, UTF16);
534 }
535
536 private static byte[] encodeASCII(byte coder, byte[] val) {
537 if (coder == LATIN1) {
538 byte[] dst = new byte[val.length];
539 for (int i = 0; i < val.length; i++) {
540 if (val[i] < 0) {
541 dst[i] = '?';
542 } else {
543 dst[i] = val[i];
544 }
545 }
546 return dst;
547 }
548 int len = val.length >> 1;
549 byte[] dst = new byte[len];
550 int dp = 0;
551 for (int i = 0; i < len; i++) {
552 char c = StringUTF16.getChar(val, i);
553 if (c < 0x80) {
554 dst[dp++] = (byte)c;
555 continue;
556 }
557 if (Character.isHighSurrogate(c) && i + 1 < len &&
558 Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
559 i++;
560 }
561 dst[dp++] = '?';
562 }
563 if (len == dp) {
564 return dst;
565 }
566 return Arrays.copyOf(dst, dp);
567 }
568
569
570
571 private static Result decodeLatin1(byte[] ba, int off, int len) {
572 Result result = resultCached.get();
573 if (COMPACT_STRINGS) {
574 return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
575 } else {
576 return result.with(StringLatin1.inflate(ba, off, len), UTF16);
577 }
578 }
579
580 @HotSpotIntrinsicCandidate
581 private static int implEncodeISOArray(byte[] sa, int sp,
582 byte[] da, int dp, int len) {
583 int i = 0;
584 for (; i < len; i++) {
585 char c = StringUTF16.getChar(sa, sp++);
586 if (c > '\u00FF')
587 break;
588 da[dp++] = (byte)c;
589 }
590 return i;
591 }
592
593 private static byte[] encode8859_1(byte coder, byte[] val) {
594 return encode8859_1(coder, val, true);
595 }
596
597 private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
598 if (coder == LATIN1) {
599 return Arrays.copyOf(val, val.length);
600 }
601 int len = val.length >> 1;
602 byte[] dst = new byte[len];
603 int dp = 0;
604 int sp = 0;
605 int sl = len;
606 while (sp < sl) {
607 int ret = implEncodeISOArray(val, sp, dst, dp, len);
608 sp = sp + ret;
609 dp = dp + ret;
610 if (ret != len) {
611 if (!doReplace) {
612 throwUnmappable(sp, 1);
613 }
614 char c = StringUTF16.getChar(val, sp++);
615 if (Character.isHighSurrogate(c) && sp < sl &&
616 Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
617 sp++;
618 }
619 dst[dp++] = '?';
620 len = sl - sp;
621 }
622 }
623 if (dp == dst.length) {
624 return dst;
625 }
626 return Arrays.copyOf(dst, dp);
627 }
628
629
630
631 private static boolean isNotContinuation(int b) {
632 return (b & 0xc0) != 0x80;
633 }
634
635 private static boolean isMalformed3(int b1, int b2, int b3) {
636 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
637 (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
638 }
639
640 private static boolean isMalformed3_2(int b1, int b2) {
641 return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
642 (b2 & 0xc0) != 0x80;
643 }
644
645 private static boolean isMalformed4(int b2, int b3, int b4) {
646 return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
647 (b4 & 0xc0) != 0x80;
648 }
649
650 private static boolean isMalformed4_2(int b1, int b2) {
651 return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
652 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
653 (b2 & 0xc0) != 0x80;
654 }
655
656 private static boolean isMalformed4_3(int b3) {
657 return (b3 & 0xc0) != 0x80;
658 }
659
660
661 private static int malformedN(byte[] src, int sp, int nb) {
662 if (nb == 3) {
663 int b1 = src[sp++];
664 int b2 = src[sp++];
665 return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
666 isNotContinuation(b2)) ? 1 : 2;
667 } else if (nb == 4) {
668 int b1 = src[sp++] & 0xff;
669 int b2 = src[sp++] & 0xff;
670 if (b1 > 0xf4 ||
671 (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
672 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
673 isNotContinuation(b2))
674 return 1;
675 if (isNotContinuation(src[sp++]))
676 return 2;
677 return 3;
678 }
679 assert false;
680 return -1;
681 }
682
683 private static void throwMalformed(int off, int nb) {
684 String msg = "malformed input off : " + off + ", length : " + nb;
685 throw new IllegalArgumentException(msg, new MalformedInputException(nb));
686 }
687
688 private static void throwMalformed(byte[] val) {
689 int dp = 0;
690 while (dp < val.length && val[dp] >=0) { dp++; }
691 throwMalformed(dp, 1);
692 }
693
694 private static void throwUnmappable(int off, int nb) {
695 String msg = "malformed input off : " + off + ", length : " + nb;
696 throw new IllegalArgumentException(msg, new UnmappableCharacterException(nb));
697 }
698
699 private static void throwUnmappable(byte[] val) {
700 int dp = 0;
701 while (dp < val.length && val[dp] >=0) { dp++; }
702 throwUnmappable(dp, 1);
703 }
704
705 private static char repl = '\ufffd';
706
707 private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
708
709 if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
710 return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
711 LATIN1);
712 return decodeUTF8_0(src, sp, len, doReplace);
713 }
714
715 private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
716 Result ret = resultCached.get();
717
718 int sl = sp + len;
719 int dp = 0;
720 byte[] dst = new byte[len];
721
722 if (COMPACT_STRINGS) {
723 while (sp < sl) {
724 int b1 = src[sp];
725 if (b1 >= 0) {
726 dst[dp++] = (byte)b1;
727 sp++;
728 continue;
729 }
730 if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
731 sp + 1 < sl) {
732 int b2 = src[sp + 1];
733 if (!isNotContinuation(b2)) {
734 dst[dp++] = (byte)(((b1 << 6) ^ b2)^
735 (((byte) 0xC0 << 6) ^
736 ((byte) 0x80 << 0)));
737 sp += 2;
738 continue;
739 }
740 }
741
742
743 break;
744 }
745 if (sp == sl) {
746 if (dp != dst.length) {
747 dst = Arrays.copyOf(dst, dp);
748 }
749 return ret.with(dst, LATIN1);
750 }
751 }
752 if (dp == 0) {
753 dst = new byte[len << 1];
754 } else {
755 byte[] buf = new byte[len << 1];
756 StringLatin1.inflate(dst, 0, buf, 0, dp);
757 dst = buf;
758 }
759 while (sp < sl) {
760 int b1 = src[sp++];
761 if (b1 >= 0) {
762 putChar(dst, dp++, (char) b1);
763 } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
764 if (sp < sl) {
765 int b2 = src[sp++];
766 if (isNotContinuation(b2)) {
767 if (!doReplace) {
768 throwMalformed(sp - 1, 1);
769 }
770 putChar(dst, dp++, repl);
771 sp--;
772 } else {
773 putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
774 (((byte) 0xC0 << 6) ^
775 ((byte) 0x80 << 0))));
776 }
777 continue;
778 }
779 if (!doReplace) {
780 throwMalformed(sp, 1);
781 }
782 putChar(dst, dp++, repl);
783 break;
784 } else if ((b1 >> 4) == -2) {
785 if (sp + 1 < sl) {
786 int b2 = src[sp++];
787 int b3 = src[sp++];
788 if (isMalformed3(b1, b2, b3)) {
789 if (!doReplace) {
790 throwMalformed(sp - 3, 3);
791 }
792 putChar(dst, dp++, repl);
793 sp -= 3;
794 sp += malformedN(src, sp, 3);
795 } else {
796 char c = (char)((b1 << 12) ^
797 (b2 << 6) ^
798 (b3 ^
799 (((byte) 0xE0 << 12) ^
800 ((byte) 0x80 << 6) ^
801 ((byte) 0x80 << 0))));
802 if (isSurrogate(c)) {
803 if (!doReplace) {
804 throwMalformed(sp - 3, 3);
805 }
806 putChar(dst, dp++, repl);
807 } else {
808 putChar(dst, dp++, c);
809 }
810 }
811 continue;
812 }
813 if (sp < sl && isMalformed3_2(b1, src[sp])) {
814 if (!doReplace) {
815 throwMalformed(sp - 1, 2);
816 }
817 putChar(dst, dp++, repl);
818 continue;
819 }
820 if (!doReplace){
821 throwMalformed(sp, 1);
822 }
823 putChar(dst, dp++, repl);
824 break;
825 } else if ((b1 >> 3) == -2) {
826 if (sp + 2 < sl) {
827 int b2 = src[sp++];
828 int b3 = src[sp++];
829 int b4 = src[sp++];
830 int uc = ((b1 << 18) ^
831 (b2 << 12) ^
832 (b3 << 6) ^
833 (b4 ^
834 (((byte) 0xF0 << 18) ^
835 ((byte) 0x80 << 12) ^
836 ((byte) 0x80 << 6) ^
837 ((byte) 0x80 << 0))));
838 if (isMalformed4(b2, b3, b4) ||
839 !isSupplementaryCodePoint(uc)) {
840 if (!doReplace) {
841 throwMalformed(sp - 4, 4);
842 }
843 putChar(dst, dp++, repl);
844 sp -= 4;
845 sp += malformedN(src, sp, 4);
846 } else {
847 putChar(dst, dp++, highSurrogate(uc));
848 putChar(dst, dp++, lowSurrogate(uc));
849 }
850 continue;
851 }
852 b1 &= 0xff;
853 if (b1 > 0xf4 ||
854 sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
855 if (!doReplace) {
856 throwMalformed(sp - 1, 1);
857 }
858 putChar(dst, dp++, repl);
859 continue;
860 }
861 if (!doReplace) {
862 throwMalformed(sp - 1, 1);
863 }
864 sp++;
865 putChar(dst, dp++, repl);
866 if (sp < sl && isMalformed4_3(src[sp])) {
867 continue;
868 }
869 break;
870 } else {
871 if (!doReplace) {
872 throwMalformed(sp - 1, 1);
873 }
874 putChar(dst, dp++, repl);
875 }
876 }
877 if (dp != len) {
878 dst = Arrays.copyOf(dst, dp << 1);
879 }
880 return ret.with(dst, UTF16);
881 }
882
883 private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
884 if (coder == UTF16)
885 return encodeUTF8_UTF16(val, doReplace);
886
887 if (!hasNegatives(val, 0, val.length))
888 return Arrays.copyOf(val, val.length);
889
890 int dp = 0;
891 byte[] dst = new byte[val.length << 1];
892 for (int sp = 0; sp < val.length; sp++) {
893 byte c = val[sp];
894 if (c < 0) {
895 dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
896 dst[dp++] = (byte)(0x80 | (c & 0x3f));
897 } else {
898 dst[dp++] = c;
899 }
900 }
901 if (dp == dst.length)
902 return dst;
903 return Arrays.copyOf(dst, dp);
904 }
905
906 private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
907 int dp = 0;
908 int sp = 0;
909 int sl = val.length >> 1;
910 byte[] dst = new byte[sl * 3];
911 char c;
912 while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
913
914 dst[dp++] = (byte)c;
915 sp++;
916 }
917 while (sp < sl) {
918 c = StringUTF16.getChar(val, sp++);
919 if (c < 0x80) {
920 dst[dp++] = (byte)c;
921 } else if (c < 0x800) {
922 dst[dp++] = (byte)(0xc0 | (c >> 6));
923 dst[dp++] = (byte)(0x80 | (c & 0x3f));
924 } else if (Character.isSurrogate(c)) {
925 int uc = -1;
926 char c2;
927 if (Character.isHighSurrogate(c) && sp < sl &&
928 Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
929 uc = Character.toCodePoint(c, c2);
930 }
931 if (uc < 0) {
932 if (doReplace) {
933 dst[dp++] = '?';
934 } else {
935 throwUnmappable(sp - 1, 1);
936 }
937 } else {
938 dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
939 dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
940 dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
941 dst[dp++] = (byte)(0x80 | (uc & 0x3f));
942 sp++;
943 }
944 } else {
945
946 dst[dp++] = (byte)(0xe0 | ((c >> 12)));
947 dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
948 dst[dp++] = (byte)(0x80 | (c & 0x3f));
949 }
950 }
951 if (dp == dst.length) {
952 return dst;
953 }
954 return Arrays.copyOf(dst, dp);
955 }
956
957
958
959
962 static String newStringUTF8NoRepl(byte[] src, int off, int len) {
963 if (COMPACT_STRINGS && !hasNegatives(src, off, len))
964 return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
965 Result ret = decodeUTF8_0(src, off, len, false);
966 return new String(ret.value, ret.coder);
967 }
968
969
972 static byte[] getBytesUTF8NoRepl(String s) {
973 return encodeUTF8(s.coder(), s.value(), false);
974 }
975
976
977
978 private static boolean isASCII(byte[] src) {
979 return !hasNegatives(src, 0, src.length);
980 }
981
982 private static String newStringLatin1(byte[] src) {
983 if (COMPACT_STRINGS)
984 return new String(src, LATIN1);
985 return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
986 }
987
988 static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
989 try {
990 return newStringNoRepl1(src, cs);
991 } catch (IllegalArgumentException e) {
992
993 Throwable cause = e.getCause();
994 if (cause instanceof MalformedInputException) {
995 throw (MalformedInputException)cause;
996 }
997 throw (CharacterCodingException)cause;
998 }
999 }
1000
1001 static String newStringNoRepl1(byte[] src, Charset cs) {
1002 if (cs == UTF_8) {
1003 if (COMPACT_STRINGS && isASCII(src))
1004 return new String(src, LATIN1);
1005 Result ret = decodeUTF8_0(src, 0, src.length, false);
1006 return new String(ret.value, ret.coder);
1007 }
1008 if (cs == ISO_8859_1) {
1009 return newStringLatin1(src);
1010 }
1011 if (cs == US_ASCII) {
1012 if (isASCII(src)) {
1013 return newStringLatin1(src);
1014 } else {
1015 throwMalformed(src);
1016 }
1017 }
1018
1019 CharsetDecoder cd = cs.newDecoder();
1020
1021 if ((cd instanceof ArrayDecoder) &&
1022 ((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) {
1023 return newStringLatin1(src);
1024 }
1025 int len = src.length;
1026 if (len == 0) {
1027 return "";
1028 }
1029 int en = scale(len, cd.maxCharsPerByte());
1030 char[] ca = new char[en];
1031 if (cs.getClass().getClassLoader0() != null &&
1032 System.getSecurityManager() != null) {
1033 src = Arrays.copyOf(src, len);
1034 }
1035 ByteBuffer bb = ByteBuffer.wrap(src);
1036 CharBuffer cb = CharBuffer.wrap(ca);
1037 try {
1038 CoderResult cr = cd.decode(bb, cb, true);
1039 if (!cr.isUnderflow())
1040 cr.throwException();
1041 cr = cd.flush(cb);
1042 if (!cr.isUnderflow())
1043 cr.throwException();
1044 } catch (CharacterCodingException x) {
1045 throw new IllegalArgumentException(x);
1046 }
1047 Result ret = resultCached.get().with(ca, 0, cb.position());
1048 return new String(ret.value, ret.coder);
1049 }
1050
1051
1054 static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
1055 try {
1056 return getBytesNoRepl1(s, cs);
1057 } catch (IllegalArgumentException e) {
1058
1059 Throwable cause = e.getCause();
1060 if (cause instanceof UnmappableCharacterException) {
1061 throw (UnmappableCharacterException)cause;
1062 }
1063 throw (CharacterCodingException)cause;
1064 }
1065 }
1066
1067 static byte[] getBytesNoRepl1(String s, Charset cs) {
1068 byte[] val = s.value();
1069 byte coder = s.coder();
1070 if (cs == UTF_8) {
1071 if (coder == LATIN1 && isASCII(val)) {
1072 return val;
1073 }
1074 return encodeUTF8(coder, val, false);
1075 }
1076 if (cs == ISO_8859_1) {
1077 if (coder == LATIN1) {
1078 return val;
1079 }
1080 return encode8859_1(coder, val, false);
1081 }
1082 if (cs == US_ASCII) {
1083 if (coder == LATIN1) {
1084 if (isASCII(val)) {
1085 return val;
1086 } else {
1087 throwUnmappable(val);
1088 }
1089 }
1090 }
1091 CharsetEncoder ce = cs.newEncoder();
1092
1093 if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
1094 ((ArrayEncoder)ce).isASCIICompatible() &&
1095 isASCII(val)))) {
1096 return val;
1097 }
1098 int len = val.length >> coder;
1099 int en = scale(len, ce.maxBytesPerChar());
1100 byte[] ba = new byte[en];
1101 if (len == 0) {
1102 return ba;
1103 }
1104 if (ce instanceof ArrayEncoder) {
1105 int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
1106 : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
1107 if (blen != -1) {
1108 return safeTrim(ba, blen, true);
1109 }
1110 }
1111 boolean isTrusted = cs.getClass().getClassLoader0() == null ||
1112 System.getSecurityManager() == null;
1113 char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
1114 : StringUTF16.toChars(val);
1115 ByteBuffer bb = ByteBuffer.wrap(ba);
1116 CharBuffer cb = CharBuffer.wrap(ca, 0, len);
1117 try {
1118 CoderResult cr = ce.encode(cb, bb, true);
1119 if (!cr.isUnderflow())
1120 cr.throwException();
1121 cr = ce.flush(bb);
1122 if (!cr.isUnderflow())
1123 cr.throwException();
1124 } catch (CharacterCodingException x) {
1125 throw new IllegalArgumentException(x);
1126 }
1127 return safeTrim(ba, bb.position(), isTrusted);
1128 }
1129 }
1130