1 /*
2 * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.lang;
27
28 import java.util.Arrays;
29 import java.util.Map;
30 import java.util.HashMap;
31 import java.util.Locale;
32
33 import jdk.internal.HotSpotIntrinsicCandidate;
34
35 /**
36 * The {@code Character} class wraps a value of the primitive
37 * type {@code char} in an object. An object of class
38 * {@code Character} contains a single field whose type is
39 * {@code char}.
40 * <p>
41 * In addition, this class provides a large number of static methods for
42 * determining a character's category (lowercase letter, digit, etc.)
43 * and for converting characters from uppercase to lowercase and vice
44 * versa.
45 *
46 * <h3><a id="conformance">Unicode Conformance</a></h3>
47 * <p>
48 * The fields and methods of class {@code Character} are defined in terms
49 * of character information from the Unicode Standard, specifically the
50 * <i>UnicodeData</i> file that is part of the Unicode Character Database.
51 * This file specifies properties including name and category for every
52 * assigned Unicode code point or character range. The file is available
53 * from the Unicode Consortium at
54 * <a href="http://www.unicode.org">http://www.unicode.org</a>.
55 * <p>
56 * The Java SE 11 Platform uses character information from version 10.0
57 * of the Unicode Standard, with an extension. The Java SE 11 Platform allows
58 * an implementation of class {@code Character} to use the Japanese Era
59 * code point, {@code U+32FF}, from the first version of the Unicode Standard
60 * after 10.0 that assigns the code point. Consequently, the behavior of
61 * fields and methods of class {@code Character} may vary across
62 * implementations of the Java SE 11 Platform when processing the
63 * aforementioned code point ( outside of version 10.0 ), except for
64 * the following methods that define Java identifiers:
65 * {@link #isJavaIdentifierStart(int)}, {@link #isJavaIdentifierStart(char)},
66 * {@link #isJavaIdentifierPart(int)}, and {@link #isJavaIdentifierPart(char)}.
67 * Code points in Java identifiers must be drawn from version 10.0 of
68 * the Unicode Standard.
69 *
70 * <h3><a id="unicode">Unicode Character Representations</a></h3>
71 *
72 * <p>The {@code char} data type (and therefore the value that a
73 * {@code Character} object encapsulates) are based on the
74 * original Unicode specification, which defined characters as
75 * fixed-width 16-bit entities. The Unicode Standard has since been
76 * changed to allow for characters whose representation requires more
77 * than 16 bits. The range of legal <em>code point</em>s is now
78 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
79 * (Refer to the <a
80 * href="http://www.unicode.org/reports/tr27/#notation"><i>
81 * definition</i></a> of the U+<i>n</i> notation in the Unicode
82 * Standard.)
83 *
84 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
85 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
86 * <a id="supplementary">Characters</a> whose code points are greater
87 * than U+FFFF are called <em>supplementary character</em>s. The Java
88 * platform uses the UTF-16 representation in {@code char} arrays and
89 * in the {@code String} and {@code StringBuffer} classes. In
90 * this representation, supplementary characters are represented as a pair
91 * of {@code char} values, the first from the <em>high-surrogates</em>
92 * range, (\uD800-\uDBFF), the second from the
93 * <em>low-surrogates</em> range (\uDC00-\uDFFF).
94 *
95 * <p>A {@code char} value, therefore, represents Basic
96 * Multilingual Plane (BMP) code points, including the surrogate
97 * code points, or code units of the UTF-16 encoding. An
98 * {@code int} value represents all Unicode code points,
99 * including supplementary code points. The lower (least significant)
100 * 21 bits of {@code int} are used to represent Unicode code
101 * points and the upper (most significant) 11 bits must be zero.
102 * Unless otherwise specified, the behavior with respect to
103 * supplementary characters and surrogate {@code char} values is
104 * as follows:
105 *
106 * <ul>
107 * <li>The methods that only accept a {@code char} value cannot support
108 * supplementary characters. They treat {@code char} values from the
109 * surrogate ranges as undefined characters. For example,
110 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
111 * this specific value if followed by any low-surrogate value in a string
112 * would represent a letter.
113 *
114 * <li>The methods that accept an {@code int} value support all
115 * Unicode characters, including supplementary characters. For
116 * example, {@code Character.isLetter(0x2F81A)} returns
117 * {@code true} because the code point value represents a letter
118 * (a CJK ideograph).
119 * </ul>
120 *
121 * <p>In the Java SE API documentation, <em>Unicode code point</em> is
122 * used for character values in the range between U+0000 and U+10FFFF,
123 * and <em>Unicode code unit</em> is used for 16-bit
124 * {@code char} values that are code units of the <em>UTF-16</em>
125 * encoding. For more information on Unicode terminology, refer to the
126 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
127 *
128 * @author Lee Boynton
129 * @author Guy Steele
130 * @author Akira Tanaka
131 * @author Martin Buchholz
132 * @author Ulf Zibis
133 * @since 1.0
134 */
135 public final
136 class Character implements java.io.Serializable, Comparable<Character> {
137 /**
138 * The minimum radix available for conversion to and from strings.
139 * The constant value of this field is the smallest value permitted
140 * for the radix argument in radix-conversion methods such as the
141 * {@code digit} method, the {@code forDigit} method, and the
142 * {@code toString} method of class {@code Integer}.
143 *
144 * @see Character#digit(char, int)
145 * @see Character#forDigit(int, int)
146 * @see Integer#toString(int, int)
147 * @see Integer#valueOf(String)
148 */
149 public static final int MIN_RADIX = 2;
150
151 /**
152 * The maximum radix available for conversion to and from strings.
153 * The constant value of this field is the largest value permitted
154 * for the radix argument in radix-conversion methods such as the
155 * {@code digit} method, the {@code forDigit} method, and the
156 * {@code toString} method of class {@code Integer}.
157 *
158 * @see Character#digit(char, int)
159 * @see Character#forDigit(int, int)
160 * @see Integer#toString(int, int)
161 * @see Integer#valueOf(String)
162 */
163 public static final int MAX_RADIX = 36;
164
165 /**
166 * The constant value of this field is the smallest value of type
167 * {@code char}, {@code '\u005Cu0000'}.
168 *
169 * @since 1.0.2
170 */
171 public static final char MIN_VALUE = '\u0000';
172
173 /**
174 * The constant value of this field is the largest value of type
175 * {@code char}, {@code '\u005CuFFFF'}.
176 *
177 * @since 1.0.2
178 */
179 public static final char MAX_VALUE = '\uFFFF';
180
181 /**
182 * The {@code Class} instance representing the primitive type
183 * {@code char}.
184 *
185 * @since 1.1
186 */
187 @SuppressWarnings("unchecked")
188 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
189
190 /*
191 * Normative general types
192 */
193
194 /*
195 * General character types
196 */
197
198 /**
199 * General category "Cn" in the Unicode specification.
200 * @since 1.1
201 */
202 public static final byte UNASSIGNED = 0;
203
204 /**
205 * General category "Lu" in the Unicode specification.
206 * @since 1.1
207 */
208 public static final byte UPPERCASE_LETTER = 1;
209
210 /**
211 * General category "Ll" in the Unicode specification.
212 * @since 1.1
213 */
214 public static final byte LOWERCASE_LETTER = 2;
215
216 /**
217 * General category "Lt" in the Unicode specification.
218 * @since 1.1
219 */
220 public static final byte TITLECASE_LETTER = 3;
221
222 /**
223 * General category "Lm" in the Unicode specification.
224 * @since 1.1
225 */
226 public static final byte MODIFIER_LETTER = 4;
227
228 /**
229 * General category "Lo" in the Unicode specification.
230 * @since 1.1
231 */
232 public static final byte OTHER_LETTER = 5;
233
234 /**
235 * General category "Mn" in the Unicode specification.
236 * @since 1.1
237 */
238 public static final byte NON_SPACING_MARK = 6;
239
240 /**
241 * General category "Me" in the Unicode specification.
242 * @since 1.1
243 */
244 public static final byte ENCLOSING_MARK = 7;
245
246 /**
247 * General category "Mc" in the Unicode specification.
248 * @since 1.1
249 */
250 public static final byte COMBINING_SPACING_MARK = 8;
251
252 /**
253 * General category "Nd" in the Unicode specification.
254 * @since 1.1
255 */
256 public static final byte DECIMAL_DIGIT_NUMBER = 9;
257
258 /**
259 * General category "Nl" in the Unicode specification.
260 * @since 1.1
261 */
262 public static final byte LETTER_NUMBER = 10;
263
264 /**
265 * General category "No" in the Unicode specification.
266 * @since 1.1
267 */
268 public static final byte OTHER_NUMBER = 11;
269
270 /**
271 * General category "Zs" in the Unicode specification.
272 * @since 1.1
273 */
274 public static final byte SPACE_SEPARATOR = 12;
275
276 /**
277 * General category "Zl" in the Unicode specification.
278 * @since 1.1
279 */
280 public static final byte LINE_SEPARATOR = 13;
281
282 /**
283 * General category "Zp" in the Unicode specification.
284 * @since 1.1
285 */
286 public static final byte PARAGRAPH_SEPARATOR = 14;
287
288 /**
289 * General category "Cc" in the Unicode specification.
290 * @since 1.1
291 */
292 public static final byte CONTROL = 15;
293
294 /**
295 * General category "Cf" in the Unicode specification.
296 * @since 1.1
297 */
298 public static final byte FORMAT = 16;
299
300 /**
301 * General category "Co" in the Unicode specification.
302 * @since 1.1
303 */
304 public static final byte PRIVATE_USE = 18;
305
306 /**
307 * General category "Cs" in the Unicode specification.
308 * @since 1.1
309 */
310 public static final byte SURROGATE = 19;
311
312 /**
313 * General category "Pd" in the Unicode specification.
314 * @since 1.1
315 */
316 public static final byte DASH_PUNCTUATION = 20;
317
318 /**
319 * General category "Ps" in the Unicode specification.
320 * @since 1.1
321 */
322 public static final byte START_PUNCTUATION = 21;
323
324 /**
325 * General category "Pe" in the Unicode specification.
326 * @since 1.1
327 */
328 public static final byte END_PUNCTUATION = 22;
329
330 /**
331 * General category "Pc" in the Unicode specification.
332 * @since 1.1
333 */
334 public static final byte CONNECTOR_PUNCTUATION = 23;
335
336 /**
337 * General category "Po" in the Unicode specification.
338 * @since 1.1
339 */
340 public static final byte OTHER_PUNCTUATION = 24;
341
342 /**
343 * General category "Sm" in the Unicode specification.
344 * @since 1.1
345 */
346 public static final byte MATH_SYMBOL = 25;
347
348 /**
349 * General category "Sc" in the Unicode specification.
350 * @since 1.1
351 */
352 public static final byte CURRENCY_SYMBOL = 26;
353
354 /**
355 * General category "Sk" in the Unicode specification.
356 * @since 1.1
357 */
358 public static final byte MODIFIER_SYMBOL = 27;
359
360 /**
361 * General category "So" in the Unicode specification.
362 * @since 1.1
363 */
364 public static final byte OTHER_SYMBOL = 28;
365
366 /**
367 * General category "Pi" in the Unicode specification.
368 * @since 1.4
369 */
370 public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
371
372 /**
373 * General category "Pf" in the Unicode specification.
374 * @since 1.4
375 */
376 public static final byte FINAL_QUOTE_PUNCTUATION = 30;
377
378 /**
379 * Error flag. Use int (code point) to avoid confusion with U+FFFF.
380 */
381 static final int ERROR = 0xFFFFFFFF;
382
383
384 /**
385 * Undefined bidirectional character type. Undefined {@code char}
386 * values have undefined directionality in the Unicode specification.
387 * @since 1.4
388 */
389 public static final byte DIRECTIONALITY_UNDEFINED = -1;
390
391 /**
392 * Strong bidirectional character type "L" in the Unicode specification.
393 * @since 1.4
394 */
395 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
396
397 /**
398 * Strong bidirectional character type "R" in the Unicode specification.
399 * @since 1.4
400 */
401 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
402
403 /**
404 * Strong bidirectional character type "AL" in the Unicode specification.
405 * @since 1.4
406 */
407 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
408
409 /**
410 * Weak bidirectional character type "EN" in the Unicode specification.
411 * @since 1.4
412 */
413 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
414
415 /**
416 * Weak bidirectional character type "ES" in the Unicode specification.
417 * @since 1.4
418 */
419 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
420
421 /**
422 * Weak bidirectional character type "ET" in the Unicode specification.
423 * @since 1.4
424 */
425 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
426
427 /**
428 * Weak bidirectional character type "AN" in the Unicode specification.
429 * @since 1.4
430 */
431 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
432
433 /**
434 * Weak bidirectional character type "CS" in the Unicode specification.
435 * @since 1.4
436 */
437 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
438
439 /**
440 * Weak bidirectional character type "NSM" in the Unicode specification.
441 * @since 1.4
442 */
443 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
444
445 /**
446 * Weak bidirectional character type "BN" in the Unicode specification.
447 * @since 1.4
448 */
449 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
450
451 /**
452 * Neutral bidirectional character type "B" in the Unicode specification.
453 * @since 1.4
454 */
455 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
456
457 /**
458 * Neutral bidirectional character type "S" in the Unicode specification.
459 * @since 1.4
460 */
461 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
462
463 /**
464 * Neutral bidirectional character type "WS" in the Unicode specification.
465 * @since 1.4
466 */
467 public static final byte DIRECTIONALITY_WHITESPACE = 12;
468
469 /**
470 * Neutral bidirectional character type "ON" in the Unicode specification.
471 * @since 1.4
472 */
473 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
474
475 /**
476 * Strong bidirectional character type "LRE" in the Unicode specification.
477 * @since 1.4
478 */
479 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
480
481 /**
482 * Strong bidirectional character type "LRO" in the Unicode specification.
483 * @since 1.4
484 */
485 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
486
487 /**
488 * Strong bidirectional character type "RLE" in the Unicode specification.
489 * @since 1.4
490 */
491 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
492
493 /**
494 * Strong bidirectional character type "RLO" in the Unicode specification.
495 * @since 1.4
496 */
497 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
498
499 /**
500 * Weak bidirectional character type "PDF" in the Unicode specification.
501 * @since 1.4
502 */
503 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
504
505 /**
506 * Weak bidirectional character type "LRI" in the Unicode specification.
507 * @since 9
508 */
509 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
510
511 /**
512 * Weak bidirectional character type "RLI" in the Unicode specification.
513 * @since 9
514 */
515 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
516
517 /**
518 * Weak bidirectional character type "FSI" in the Unicode specification.
519 * @since 9
520 */
521 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
522
523 /**
524 * Weak bidirectional character type "PDI" in the Unicode specification.
525 * @since 9
526 */
527 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
528
529 /**
530 * The minimum value of a
531 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
532 * Unicode high-surrogate code unit</a>
533 * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
534 * A high-surrogate is also known as a <i>leading-surrogate</i>.
535 *
536 * @since 1.5
537 */
538 public static final char MIN_HIGH_SURROGATE = '\uD800';
539
540 /**
541 * The maximum value of a
542 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
543 * Unicode high-surrogate code unit</a>
544 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
545 * A high-surrogate is also known as a <i>leading-surrogate</i>.
546 *
547 * @since 1.5
548 */
549 public static final char MAX_HIGH_SURROGATE = '\uDBFF';
550
551 /**
552 * The minimum value of a
553 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
554 * Unicode low-surrogate code unit</a>
555 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
556 * A low-surrogate is also known as a <i>trailing-surrogate</i>.
557 *
558 * @since 1.5
559 */
560 public static final char MIN_LOW_SURROGATE = '\uDC00';
561
562 /**
563 * The maximum value of a
564 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
565 * Unicode low-surrogate code unit</a>
566 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
567 * A low-surrogate is also known as a <i>trailing-surrogate</i>.
568 *
569 * @since 1.5
570 */
571 public static final char MAX_LOW_SURROGATE = '\uDFFF';
572
573 /**
574 * The minimum value of a Unicode surrogate code unit in the
575 * UTF-16 encoding, constant {@code '\u005CuD800'}.
576 *
577 * @since 1.5
578 */
579 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
580
581 /**
582 * The maximum value of a Unicode surrogate code unit in the
583 * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
584 *
585 * @since 1.5
586 */
587 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
588
589 /**
590 * The minimum value of a
591 * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
592 * Unicode supplementary code point</a>, constant {@code U+10000}.
593 *
594 * @since 1.5
595 */
596 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
597
598 /**
599 * The minimum value of a
600 * <a href="http://www.unicode.org/glossary/#code_point">
601 * Unicode code point</a>, constant {@code U+0000}.
602 *
603 * @since 1.5
604 */
605 public static final int MIN_CODE_POINT = 0x000000;
606
607 /**
608 * The maximum value of a
609 * <a href="http://www.unicode.org/glossary/#code_point">
610 * Unicode code point</a>, constant {@code U+10FFFF}.
611 *
612 * @since 1.5
613 */
614 public static final int MAX_CODE_POINT = 0X10FFFF;
615
616
617 /**
618 * Instances of this class represent particular subsets of the Unicode
619 * character set. The only family of subsets defined in the
620 * {@code Character} class is {@link Character.UnicodeBlock}.
621 * Other portions of the Java API may define other subsets for their
622 * own purposes.
623 *
624 * @since 1.2
625 */
626 public static class Subset {
627
628 private String name;
629
630 /**
631 * Constructs a new {@code Subset} instance.
632 *
633 * @param name The name of this subset
634 * @throws NullPointerException if name is {@code null}
635 */
636 protected Subset(String name) {
637 if (name == null) {
638 throw new NullPointerException("name");
639 }
640 this.name = name;
641 }
642
643 /**
644 * Compares two {@code Subset} objects for equality.
645 * This method returns {@code true} if and only if
646 * {@code this} and the argument refer to the same
647 * object; since this method is {@code final}, this
648 * guarantee holds for all subclasses.
649 */
650 public final boolean equals(Object obj) {
651 return (this == obj);
652 }
653
654 /**
655 * Returns the standard hash code as defined by the
656 * {@link Object#hashCode} method. This method
657 * is {@code final} in order to ensure that the
658 * {@code equals} and {@code hashCode} methods will
659 * be consistent in all subclasses.
660 */
661 public final int hashCode() {
662 return super.hashCode();
663 }
664
665 /**
666 * Returns the name of this subset.
667 */
668 public final String toString() {
669 return name;
670 }
671 }
672
673 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
674 // for the latest specification of Unicode Blocks.
675
676 /**
677 * A family of character subsets representing the character blocks in the
678 * Unicode specification. Character blocks generally define characters
679 * used for a specific script or purpose. A character is contained by
680 * at most one Unicode block.
681 *
682 * @since 1.2
683 */
684 public static final class UnicodeBlock extends Subset {
685 /**
686 * 638 - the expected number of entities
687 * 0.75 - the default load factor of HashMap
688 */
689 private static Map<String, UnicodeBlock> map =
690 new HashMap<>((int)(638 / 0.75f + 1.0f));
691
692 /**
693 * Creates a UnicodeBlock with the given identifier name.
694 * This name must be the same as the block identifier.
695 */
696 private UnicodeBlock(String idName) {
697 super(idName);
698 map.put(idName, this);
699 }
700
701 /**
702 * Creates a UnicodeBlock with the given identifier name and
703 * alias name.
704 */
705 private UnicodeBlock(String idName, String alias) {
706 this(idName);
707 map.put(alias, this);
708 }
709
710 /**
711 * Creates a UnicodeBlock with the given identifier name and
712 * alias names.
713 */
714 private UnicodeBlock(String idName, String... aliases) {
715 this(idName);
716 for (String alias : aliases)
717 map.put(alias, this);
718 }
719
720 /**
721 * Constant for the "Basic Latin" Unicode character block.
722 * @since 1.2
723 */
724 public static final UnicodeBlock BASIC_LATIN =
725 new UnicodeBlock("BASIC_LATIN",
726 "BASIC LATIN",
727 "BASICLATIN");
728
729 /**
730 * Constant for the "Latin-1 Supplement" Unicode character block.
731 * @since 1.2
732 */
733 public static final UnicodeBlock LATIN_1_SUPPLEMENT =
734 new UnicodeBlock("LATIN_1_SUPPLEMENT",
735 "LATIN-1 SUPPLEMENT",
736 "LATIN-1SUPPLEMENT");
737
738 /**
739 * Constant for the "Latin Extended-A" Unicode character block.
740 * @since 1.2
741 */
742 public static final UnicodeBlock LATIN_EXTENDED_A =
743 new UnicodeBlock("LATIN_EXTENDED_A",
744 "LATIN EXTENDED-A",
745 "LATINEXTENDED-A");
746
747 /**
748 * Constant for the "Latin Extended-B" Unicode character block.
749 * @since 1.2
750 */
751 public static final UnicodeBlock LATIN_EXTENDED_B =
752 new UnicodeBlock("LATIN_EXTENDED_B",
753 "LATIN EXTENDED-B",
754 "LATINEXTENDED-B");
755
756 /**
757 * Constant for the "IPA Extensions" Unicode character block.
758 * @since 1.2
759 */
760 public static final UnicodeBlock IPA_EXTENSIONS =
761 new UnicodeBlock("IPA_EXTENSIONS",
762 "IPA EXTENSIONS",
763 "IPAEXTENSIONS");
764
765 /**
766 * Constant for the "Spacing Modifier Letters" Unicode character block.
767 * @since 1.2
768 */
769 public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
770 new UnicodeBlock("SPACING_MODIFIER_LETTERS",
771 "SPACING MODIFIER LETTERS",
772 "SPACINGMODIFIERLETTERS");
773
774 /**
775 * Constant for the "Combining Diacritical Marks" Unicode character block.
776 * @since 1.2
777 */
778 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
779 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
780 "COMBINING DIACRITICAL MARKS",
781 "COMBININGDIACRITICALMARKS");
782
783 /**
784 * Constant for the "Greek and Coptic" Unicode character block.
785 * <p>
786 * This block was previously known as the "Greek" block.
787 *
788 * @since 1.2
789 */
790 public static final UnicodeBlock GREEK =
791 new UnicodeBlock("GREEK",
792 "GREEK AND COPTIC",
793 "GREEKANDCOPTIC");
794
795 /**
796 * Constant for the "Cyrillic" Unicode character block.
797 * @since 1.2
798 */
799 public static final UnicodeBlock CYRILLIC =
800 new UnicodeBlock("CYRILLIC");
801
802 /**
803 * Constant for the "Armenian" Unicode character block.
804 * @since 1.2
805 */
806 public static final UnicodeBlock ARMENIAN =
807 new UnicodeBlock("ARMENIAN");
808
809 /**
810 * Constant for the "Hebrew" Unicode character block.
811 * @since 1.2
812 */
813 public static final UnicodeBlock HEBREW =
814 new UnicodeBlock("HEBREW");
815
816 /**
817 * Constant for the "Arabic" Unicode character block.
818 * @since 1.2
819 */
820 public static final UnicodeBlock ARABIC =
821 new UnicodeBlock("ARABIC");
822
823 /**
824 * Constant for the "Devanagari" Unicode character block.
825 * @since 1.2
826 */
827 public static final UnicodeBlock DEVANAGARI =
828 new UnicodeBlock("DEVANAGARI");
829
830 /**
831 * Constant for the "Bengali" Unicode character block.
832 * @since 1.2
833 */
834 public static final UnicodeBlock BENGALI =
835 new UnicodeBlock("BENGALI");
836
837 /**
838 * Constant for the "Gurmukhi" Unicode character block.
839 * @since 1.2
840 */
841 public static final UnicodeBlock GURMUKHI =
842 new UnicodeBlock("GURMUKHI");
843
844 /**
845 * Constant for the "Gujarati" Unicode character block.
846 * @since 1.2
847 */
848 public static final UnicodeBlock GUJARATI =
849 new UnicodeBlock("GUJARATI");
850
851 /**
852 * Constant for the "Oriya" Unicode character block.
853 * @since 1.2
854 */
855 public static final UnicodeBlock ORIYA =
856 new UnicodeBlock("ORIYA");
857
858 /**
859 * Constant for the "Tamil" Unicode character block.
860 * @since 1.2
861 */
862 public static final UnicodeBlock TAMIL =
863 new UnicodeBlock("TAMIL");
864
865 /**
866 * Constant for the "Telugu" Unicode character block.
867 * @since 1.2
868 */
869 public static final UnicodeBlock TELUGU =
870 new UnicodeBlock("TELUGU");
871
872 /**
873 * Constant for the "Kannada" Unicode character block.
874 * @since 1.2
875 */
876 public static final UnicodeBlock KANNADA =
877 new UnicodeBlock("KANNADA");
878
879 /**
880 * Constant for the "Malayalam" Unicode character block.
881 * @since 1.2
882 */
883 public static final UnicodeBlock MALAYALAM =
884 new UnicodeBlock("MALAYALAM");
885
886 /**
887 * Constant for the "Thai" Unicode character block.
888 * @since 1.2
889 */
890 public static final UnicodeBlock THAI =
891 new UnicodeBlock("THAI");
892
893 /**
894 * Constant for the "Lao" Unicode character block.
895 * @since 1.2
896 */
897 public static final UnicodeBlock LAO =
898 new UnicodeBlock("LAO");
899
900 /**
901 * Constant for the "Tibetan" Unicode character block.
902 * @since 1.2
903 */
904 public static final UnicodeBlock TIBETAN =
905 new UnicodeBlock("TIBETAN");
906
907 /**
908 * Constant for the "Georgian" Unicode character block.
909 * @since 1.2
910 */
911 public static final UnicodeBlock GEORGIAN =
912 new UnicodeBlock("GEORGIAN");
913
914 /**
915 * Constant for the "Hangul Jamo" Unicode character block.
916 * @since 1.2
917 */
918 public static final UnicodeBlock HANGUL_JAMO =
919 new UnicodeBlock("HANGUL_JAMO",
920 "HANGUL JAMO",
921 "HANGULJAMO");
922
923 /**
924 * Constant for the "Latin Extended Additional" Unicode character block.
925 * @since 1.2
926 */
927 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
928 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
929 "LATIN EXTENDED ADDITIONAL",
930 "LATINEXTENDEDADDITIONAL");
931
932 /**
933 * Constant for the "Greek Extended" Unicode character block.
934 * @since 1.2
935 */
936 public static final UnicodeBlock GREEK_EXTENDED =
937 new UnicodeBlock("GREEK_EXTENDED",
938 "GREEK EXTENDED",
939 "GREEKEXTENDED");
940
941 /**
942 * Constant for the "General Punctuation" Unicode character block.
943 * @since 1.2
944 */
945 public static final UnicodeBlock GENERAL_PUNCTUATION =
946 new UnicodeBlock("GENERAL_PUNCTUATION",
947 "GENERAL PUNCTUATION",
948 "GENERALPUNCTUATION");
949
950 /**
951 * Constant for the "Superscripts and Subscripts" Unicode character
952 * block.
953 * @since 1.2
954 */
955 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
956 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
957 "SUPERSCRIPTS AND SUBSCRIPTS",
958 "SUPERSCRIPTSANDSUBSCRIPTS");
959
960 /**
961 * Constant for the "Currency Symbols" Unicode character block.
962 * @since 1.2
963 */
964 public static final UnicodeBlock CURRENCY_SYMBOLS =
965 new UnicodeBlock("CURRENCY_SYMBOLS",
966 "CURRENCY SYMBOLS",
967 "CURRENCYSYMBOLS");
968
969 /**
970 * Constant for the "Combining Diacritical Marks for Symbols" Unicode
971 * character block.
972 * <p>
973 * This block was previously known as "Combining Marks for Symbols".
974 * @since 1.2
975 */
976 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
977 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
978 "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
979 "COMBININGDIACRITICALMARKSFORSYMBOLS",
980 "COMBINING MARKS FOR SYMBOLS",
981 "COMBININGMARKSFORSYMBOLS");
982
983 /**
984 * Constant for the "Letterlike Symbols" Unicode character block.
985 * @since 1.2
986 */
987 public static final UnicodeBlock LETTERLIKE_SYMBOLS =
988 new UnicodeBlock("LETTERLIKE_SYMBOLS",
989 "LETTERLIKE SYMBOLS",
990 "LETTERLIKESYMBOLS");
991
992 /**
993 * Constant for the "Number Forms" Unicode character block.
994 * @since 1.2
995 */
996 public static final UnicodeBlock NUMBER_FORMS =
997 new UnicodeBlock("NUMBER_FORMS",
998 "NUMBER FORMS",
999 "NUMBERFORMS");
1000
1001 /**
1002 * Constant for the "Arrows" Unicode character block.
1003 * @since 1.2
1004 */
1005 public static final UnicodeBlock ARROWS =
1006 new UnicodeBlock("ARROWS");
1007
1008 /**
1009 * Constant for the "Mathematical Operators" Unicode character block.
1010 * @since 1.2
1011 */
1012 public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1013 new UnicodeBlock("MATHEMATICAL_OPERATORS",
1014 "MATHEMATICAL OPERATORS",
1015 "MATHEMATICALOPERATORS");
1016
1017 /**
1018 * Constant for the "Miscellaneous Technical" Unicode character block.
1019 * @since 1.2
1020 */
1021 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1022 new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1023 "MISCELLANEOUS TECHNICAL",
1024 "MISCELLANEOUSTECHNICAL");
1025
1026 /**
1027 * Constant for the "Control Pictures" Unicode character block.
1028 * @since 1.2
1029 */
1030 public static final UnicodeBlock CONTROL_PICTURES =
1031 new UnicodeBlock("CONTROL_PICTURES",
1032 "CONTROL PICTURES",
1033 "CONTROLPICTURES");
1034
1035 /**
1036 * Constant for the "Optical Character Recognition" Unicode character block.
1037 * @since 1.2
1038 */
1039 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1040 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1041 "OPTICAL CHARACTER RECOGNITION",
1042 "OPTICALCHARACTERRECOGNITION");
1043
1044 /**
1045 * Constant for the "Enclosed Alphanumerics" Unicode character block.
1046 * @since 1.2
1047 */
1048 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1049 new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1050 "ENCLOSED ALPHANUMERICS",
1051 "ENCLOSEDALPHANUMERICS");
1052
1053 /**
1054 * Constant for the "Box Drawing" Unicode character block.
1055 * @since 1.2
1056 */
1057 public static final UnicodeBlock BOX_DRAWING =
1058 new UnicodeBlock("BOX_DRAWING",
1059 "BOX DRAWING",
1060 "BOXDRAWING");
1061
1062 /**
1063 * Constant for the "Block Elements" Unicode character block.
1064 * @since 1.2
1065 */
1066 public static final UnicodeBlock BLOCK_ELEMENTS =
1067 new UnicodeBlock("BLOCK_ELEMENTS",
1068 "BLOCK ELEMENTS",
1069 "BLOCKELEMENTS");
1070
1071 /**
1072 * Constant for the "Geometric Shapes" Unicode character block.
1073 * @since 1.2
1074 */
1075 public static final UnicodeBlock GEOMETRIC_SHAPES =
1076 new UnicodeBlock("GEOMETRIC_SHAPES",
1077 "GEOMETRIC SHAPES",
1078 "GEOMETRICSHAPES");
1079
1080 /**
1081 * Constant for the "Miscellaneous Symbols" Unicode character block.
1082 * @since 1.2
1083 */
1084 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1085 new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1086 "MISCELLANEOUS SYMBOLS",
1087 "MISCELLANEOUSSYMBOLS");
1088
1089 /**
1090 * Constant for the "Dingbats" Unicode character block.
1091 * @since 1.2
1092 */
1093 public static final UnicodeBlock DINGBATS =
1094 new UnicodeBlock("DINGBATS");
1095
1096 /**
1097 * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1098 * @since 1.2
1099 */
1100 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1101 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1102 "CJK SYMBOLS AND PUNCTUATION",
1103 "CJKSYMBOLSANDPUNCTUATION");
1104
1105 /**
1106 * Constant for the "Hiragana" Unicode character block.
1107 * @since 1.2
1108 */
1109 public static final UnicodeBlock HIRAGANA =
1110 new UnicodeBlock("HIRAGANA");
1111
1112 /**
1113 * Constant for the "Katakana" Unicode character block.
1114 * @since 1.2
1115 */
1116 public static final UnicodeBlock KATAKANA =
1117 new UnicodeBlock("KATAKANA");
1118
1119 /**
1120 * Constant for the "Bopomofo" Unicode character block.
1121 * @since 1.2
1122 */
1123 public static final UnicodeBlock BOPOMOFO =
1124 new UnicodeBlock("BOPOMOFO");
1125
1126 /**
1127 * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1128 * @since 1.2
1129 */
1130 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1131 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1132 "HANGUL COMPATIBILITY JAMO",
1133 "HANGULCOMPATIBILITYJAMO");
1134
1135 /**
1136 * Constant for the "Kanbun" Unicode character block.
1137 * @since 1.2
1138 */
1139 public static final UnicodeBlock KANBUN =
1140 new UnicodeBlock("KANBUN");
1141
1142 /**
1143 * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1144 * @since 1.2
1145 */
1146 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1147 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1148 "ENCLOSED CJK LETTERS AND MONTHS",
1149 "ENCLOSEDCJKLETTERSANDMONTHS");
1150
1151 /**
1152 * Constant for the "CJK Compatibility" Unicode character block.
1153 * @since 1.2
1154 */
1155 public static final UnicodeBlock CJK_COMPATIBILITY =
1156 new UnicodeBlock("CJK_COMPATIBILITY",
1157 "CJK COMPATIBILITY",
1158 "CJKCOMPATIBILITY");
1159
1160 /**
1161 * Constant for the "CJK Unified Ideographs" Unicode character block.
1162 * @since 1.2
1163 */
1164 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1165 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1166 "CJK UNIFIED IDEOGRAPHS",
1167 "CJKUNIFIEDIDEOGRAPHS");
1168
1169 /**
1170 * Constant for the "Hangul Syllables" Unicode character block.
1171 * @since 1.2
1172 */
1173 public static final UnicodeBlock HANGUL_SYLLABLES =
1174 new UnicodeBlock("HANGUL_SYLLABLES",
1175 "HANGUL SYLLABLES",
1176 "HANGULSYLLABLES");
1177
1178 /**
1179 * Constant for the "Private Use Area" Unicode character block.
1180 * @since 1.2
1181 */
1182 public static final UnicodeBlock PRIVATE_USE_AREA =
1183 new UnicodeBlock("PRIVATE_USE_AREA",
1184 "PRIVATE USE AREA",
1185 "PRIVATEUSEAREA");
1186
1187 /**
1188 * Constant for the "CJK Compatibility Ideographs" Unicode character
1189 * block.
1190 * @since 1.2
1191 */
1192 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1193 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1194 "CJK COMPATIBILITY IDEOGRAPHS",
1195 "CJKCOMPATIBILITYIDEOGRAPHS");
1196
1197 /**
1198 * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1199 * @since 1.2
1200 */
1201 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1202 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1203 "ALPHABETIC PRESENTATION FORMS",
1204 "ALPHABETICPRESENTATIONFORMS");
1205
1206 /**
1207 * Constant for the "Arabic Presentation Forms-A" Unicode character
1208 * block.
1209 * @since 1.2
1210 */
1211 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1212 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1213 "ARABIC PRESENTATION FORMS-A",
1214 "ARABICPRESENTATIONFORMS-A");
1215
1216 /**
1217 * Constant for the "Combining Half Marks" Unicode character block.
1218 * @since 1.2
1219 */
1220 public static final UnicodeBlock COMBINING_HALF_MARKS =
1221 new UnicodeBlock("COMBINING_HALF_MARKS",
1222 "COMBINING HALF MARKS",
1223 "COMBININGHALFMARKS");
1224
1225 /**
1226 * Constant for the "CJK Compatibility Forms" Unicode character block.
1227 * @since 1.2
1228 */
1229 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1230 new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1231 "CJK COMPATIBILITY FORMS",
1232 "CJKCOMPATIBILITYFORMS");
1233
1234 /**
1235 * Constant for the "Small Form Variants" Unicode character block.
1236 * @since 1.2
1237 */
1238 public static final UnicodeBlock SMALL_FORM_VARIANTS =
1239 new UnicodeBlock("SMALL_FORM_VARIANTS",
1240 "SMALL FORM VARIANTS",
1241 "SMALLFORMVARIANTS");
1242
1243 /**
1244 * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1245 * @since 1.2
1246 */
1247 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1248 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1249 "ARABIC PRESENTATION FORMS-B",
1250 "ARABICPRESENTATIONFORMS-B");
1251
1252 /**
1253 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1254 * block.
1255 * @since 1.2
1256 */
1257 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1258 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1259 "HALFWIDTH AND FULLWIDTH FORMS",
1260 "HALFWIDTHANDFULLWIDTHFORMS");
1261
1262 /**
1263 * Constant for the "Specials" Unicode character block.
1264 * @since 1.2
1265 */
1266 public static final UnicodeBlock SPECIALS =
1267 new UnicodeBlock("SPECIALS");
1268
1269 /**
1270 * @deprecated
1271 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1272 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1273 * These constants match the block definitions of the Unicode Standard.
1274 * The {@link #of(char)} and {@link #of(int)} methods return the
1275 * standard constants.
1276 */
1277 @Deprecated(since="1.5")
1278 public static final UnicodeBlock SURROGATES_AREA =
1279 new UnicodeBlock("SURROGATES_AREA");
1280
1281 /**
1282 * Constant for the "Syriac" Unicode character block.
1283 * @since 1.4
1284 */
1285 public static final UnicodeBlock SYRIAC =
1286 new UnicodeBlock("SYRIAC");
1287
1288 /**
1289 * Constant for the "Thaana" Unicode character block.
1290 * @since 1.4
1291 */
1292 public static final UnicodeBlock THAANA =
1293 new UnicodeBlock("THAANA");
1294
1295 /**
1296 * Constant for the "Sinhala" Unicode character block.
1297 * @since 1.4
1298 */
1299 public static final UnicodeBlock SINHALA =
1300 new UnicodeBlock("SINHALA");
1301
1302 /**
1303 * Constant for the "Myanmar" Unicode character block.
1304 * @since 1.4
1305 */
1306 public static final UnicodeBlock MYANMAR =
1307 new UnicodeBlock("MYANMAR");
1308
1309 /**
1310 * Constant for the "Ethiopic" Unicode character block.
1311 * @since 1.4
1312 */
1313 public static final UnicodeBlock ETHIOPIC =
1314 new UnicodeBlock("ETHIOPIC");
1315
1316 /**
1317 * Constant for the "Cherokee" Unicode character block.
1318 * @since 1.4
1319 */
1320 public static final UnicodeBlock CHEROKEE =
1321 new UnicodeBlock("CHEROKEE");
1322
1323 /**
1324 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1325 * @since 1.4
1326 */
1327 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1328 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1329 "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1330 "UNIFIEDCANADIANABORIGINALSYLLABICS");
1331
1332 /**
1333 * Constant for the "Ogham" Unicode character block.
1334 * @since 1.4
1335 */
1336 public static final UnicodeBlock OGHAM =
1337 new UnicodeBlock("OGHAM");
1338
1339 /**
1340 * Constant for the "Runic" Unicode character block.
1341 * @since 1.4
1342 */
1343 public static final UnicodeBlock RUNIC =
1344 new UnicodeBlock("RUNIC");
1345
1346 /**
1347 * Constant for the "Khmer" Unicode character block.
1348 * @since 1.4
1349 */
1350 public static final UnicodeBlock KHMER =
1351 new UnicodeBlock("KHMER");
1352
1353 /**
1354 * Constant for the "Mongolian" Unicode character block.
1355 * @since 1.4
1356 */
1357 public static final UnicodeBlock MONGOLIAN =
1358 new UnicodeBlock("MONGOLIAN");
1359
1360 /**
1361 * Constant for the "Braille Patterns" Unicode character block.
1362 * @since 1.4
1363 */
1364 public static final UnicodeBlock BRAILLE_PATTERNS =
1365 new UnicodeBlock("BRAILLE_PATTERNS",
1366 "BRAILLE PATTERNS",
1367 "BRAILLEPATTERNS");
1368
1369 /**
1370 * Constant for the "CJK Radicals Supplement" Unicode character block.
1371 * @since 1.4
1372 */
1373 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1374 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1375 "CJK RADICALS SUPPLEMENT",
1376 "CJKRADICALSSUPPLEMENT");
1377
1378 /**
1379 * Constant for the "Kangxi Radicals" Unicode character block.
1380 * @since 1.4
1381 */
1382 public static final UnicodeBlock KANGXI_RADICALS =
1383 new UnicodeBlock("KANGXI_RADICALS",
1384 "KANGXI RADICALS",
1385 "KANGXIRADICALS");
1386
1387 /**
1388 * Constant for the "Ideographic Description Characters" Unicode character block.
1389 * @since 1.4
1390 */
1391 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1392 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1393 "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1394 "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1395
1396 /**
1397 * Constant for the "Bopomofo Extended" Unicode character block.
1398 * @since 1.4
1399 */
1400 public static final UnicodeBlock BOPOMOFO_EXTENDED =
1401 new UnicodeBlock("BOPOMOFO_EXTENDED",
1402 "BOPOMOFO EXTENDED",
1403 "BOPOMOFOEXTENDED");
1404
1405 /**
1406 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1407 * @since 1.4
1408 */
1409 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1410 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1411 "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1412 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1413
1414 /**
1415 * Constant for the "Yi Syllables" Unicode character block.
1416 * @since 1.4
1417 */
1418 public static final UnicodeBlock YI_SYLLABLES =
1419 new UnicodeBlock("YI_SYLLABLES",
1420 "YI SYLLABLES",
1421 "YISYLLABLES");
1422
1423 /**
1424 * Constant for the "Yi Radicals" Unicode character block.
1425 * @since 1.4
1426 */
1427 public static final UnicodeBlock YI_RADICALS =
1428 new UnicodeBlock("YI_RADICALS",
1429 "YI RADICALS",
1430 "YIRADICALS");
1431
1432 /**
1433 * Constant for the "Cyrillic Supplement" Unicode character block.
1434 * This block was previously known as the "Cyrillic Supplementary" block.
1435 * @since 1.5
1436 */
1437 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1438 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1439 "CYRILLIC SUPPLEMENTARY",
1440 "CYRILLICSUPPLEMENTARY",
1441 "CYRILLIC SUPPLEMENT",
1442 "CYRILLICSUPPLEMENT");
1443
1444 /**
1445 * Constant for the "Tagalog" Unicode character block.
1446 * @since 1.5
1447 */
1448 public static final UnicodeBlock TAGALOG =
1449 new UnicodeBlock("TAGALOG");
1450
1451 /**
1452 * Constant for the "Hanunoo" Unicode character block.
1453 * @since 1.5
1454 */
1455 public static final UnicodeBlock HANUNOO =
1456 new UnicodeBlock("HANUNOO");
1457
1458 /**
1459 * Constant for the "Buhid" Unicode character block.
1460 * @since 1.5
1461 */
1462 public static final UnicodeBlock BUHID =
1463 new UnicodeBlock("BUHID");
1464
1465 /**
1466 * Constant for the "Tagbanwa" Unicode character block.
1467 * @since 1.5
1468 */
1469 public static final UnicodeBlock TAGBANWA =
1470 new UnicodeBlock("TAGBANWA");
1471
1472 /**
1473 * Constant for the "Limbu" Unicode character block.
1474 * @since 1.5
1475 */
1476 public static final UnicodeBlock LIMBU =
1477 new UnicodeBlock("LIMBU");
1478
1479 /**
1480 * Constant for the "Tai Le" Unicode character block.
1481 * @since 1.5
1482 */
1483 public static final UnicodeBlock TAI_LE =
1484 new UnicodeBlock("TAI_LE",
1485 "TAI LE",
1486 "TAILE");
1487
1488 /**
1489 * Constant for the "Khmer Symbols" Unicode character block.
1490 * @since 1.5
1491 */
1492 public static final UnicodeBlock KHMER_SYMBOLS =
1493 new UnicodeBlock("KHMER_SYMBOLS",
1494 "KHMER SYMBOLS",
1495 "KHMERSYMBOLS");
1496
1497 /**
1498 * Constant for the "Phonetic Extensions" Unicode character block.
1499 * @since 1.5
1500 */
1501 public static final UnicodeBlock PHONETIC_EXTENSIONS =
1502 new UnicodeBlock("PHONETIC_EXTENSIONS",
1503 "PHONETIC EXTENSIONS",
1504 "PHONETICEXTENSIONS");
1505
1506 /**
1507 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1508 * @since 1.5
1509 */
1510 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1511 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1512 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1513 "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1514
1515 /**
1516 * Constant for the "Supplemental Arrows-A" Unicode character block.
1517 * @since 1.5
1518 */
1519 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1520 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1521 "SUPPLEMENTAL ARROWS-A",
1522 "SUPPLEMENTALARROWS-A");
1523
1524 /**
1525 * Constant for the "Supplemental Arrows-B" Unicode character block.
1526 * @since 1.5
1527 */
1528 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1529 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1530 "SUPPLEMENTAL ARROWS-B",
1531 "SUPPLEMENTALARROWS-B");
1532
1533 /**
1534 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1535 * character block.
1536 * @since 1.5
1537 */
1538 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1539 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1540 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1541 "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1542
1543 /**
1544 * Constant for the "Supplemental Mathematical Operators" Unicode
1545 * character block.
1546 * @since 1.5
1547 */
1548 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1549 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1550 "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1551 "SUPPLEMENTALMATHEMATICALOPERATORS");
1552
1553 /**
1554 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1555 * block.
1556 * @since 1.5
1557 */
1558 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1559 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1560 "MISCELLANEOUS SYMBOLS AND ARROWS",
1561 "MISCELLANEOUSSYMBOLSANDARROWS");
1562
1563 /**
1564 * Constant for the "Katakana Phonetic Extensions" Unicode character
1565 * block.
1566 * @since 1.5
1567 */
1568 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1569 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1570 "KATAKANA PHONETIC EXTENSIONS",
1571 "KATAKANAPHONETICEXTENSIONS");
1572
1573 /**
1574 * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1575 * @since 1.5
1576 */
1577 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1578 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1579 "YIJING HEXAGRAM SYMBOLS",
1580 "YIJINGHEXAGRAMSYMBOLS");
1581
1582 /**
1583 * Constant for the "Variation Selectors" Unicode character block.
1584 * @since 1.5
1585 */
1586 public static final UnicodeBlock VARIATION_SELECTORS =
1587 new UnicodeBlock("VARIATION_SELECTORS",
1588 "VARIATION SELECTORS",
1589 "VARIATIONSELECTORS");
1590
1591 /**
1592 * Constant for the "Linear B Syllabary" Unicode character block.
1593 * @since 1.5
1594 */
1595 public static final UnicodeBlock LINEAR_B_SYLLABARY =
1596 new UnicodeBlock("LINEAR_B_SYLLABARY",
1597 "LINEAR B SYLLABARY",
1598 "LINEARBSYLLABARY");
1599
1600 /**
1601 * Constant for the "Linear B Ideograms" Unicode character block.
1602 * @since 1.5
1603 */
1604 public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1605 new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1606 "LINEAR B IDEOGRAMS",
1607 "LINEARBIDEOGRAMS");
1608
1609 /**
1610 * Constant for the "Aegean Numbers" Unicode character block.
1611 * @since 1.5
1612 */
1613 public static final UnicodeBlock AEGEAN_NUMBERS =
1614 new UnicodeBlock("AEGEAN_NUMBERS",
1615 "AEGEAN NUMBERS",
1616 "AEGEANNUMBERS");
1617
1618 /**
1619 * Constant for the "Old Italic" Unicode character block.
1620 * @since 1.5
1621 */
1622 public static final UnicodeBlock OLD_ITALIC =
1623 new UnicodeBlock("OLD_ITALIC",
1624 "OLD ITALIC",
1625 "OLDITALIC");
1626
1627 /**
1628 * Constant for the "Gothic" Unicode character block.
1629 * @since 1.5
1630 */
1631 public static final UnicodeBlock GOTHIC =
1632 new UnicodeBlock("GOTHIC");
1633
1634 /**
1635 * Constant for the "Ugaritic" Unicode character block.
1636 * @since 1.5
1637 */
1638 public static final UnicodeBlock UGARITIC =
1639 new UnicodeBlock("UGARITIC");
1640
1641 /**
1642 * Constant for the "Deseret" Unicode character block.
1643 * @since 1.5
1644 */
1645 public static final UnicodeBlock DESERET =
1646 new UnicodeBlock("DESERET");
1647
1648 /**
1649 * Constant for the "Shavian" Unicode character block.
1650 * @since 1.5
1651 */
1652 public static final UnicodeBlock SHAVIAN =
1653 new UnicodeBlock("SHAVIAN");
1654
1655 /**
1656 * Constant for the "Osmanya" Unicode character block.
1657 * @since 1.5
1658 */
1659 public static final UnicodeBlock OSMANYA =
1660 new UnicodeBlock("OSMANYA");
1661
1662 /**
1663 * Constant for the "Cypriot Syllabary" Unicode character block.
1664 * @since 1.5
1665 */
1666 public static final UnicodeBlock CYPRIOT_SYLLABARY =
1667 new UnicodeBlock("CYPRIOT_SYLLABARY",
1668 "CYPRIOT SYLLABARY",
1669 "CYPRIOTSYLLABARY");
1670
1671 /**
1672 * Constant for the "Byzantine Musical Symbols" Unicode character block.
1673 * @since 1.5
1674 */
1675 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1676 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1677 "BYZANTINE MUSICAL SYMBOLS",
1678 "BYZANTINEMUSICALSYMBOLS");
1679
1680 /**
1681 * Constant for the "Musical Symbols" Unicode character block.
1682 * @since 1.5
1683 */
1684 public static final UnicodeBlock MUSICAL_SYMBOLS =
1685 new UnicodeBlock("MUSICAL_SYMBOLS",
1686 "MUSICAL SYMBOLS",
1687 "MUSICALSYMBOLS");
1688
1689 /**
1690 * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1691 * @since 1.5
1692 */
1693 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1694 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1695 "TAI XUAN JING SYMBOLS",
1696 "TAIXUANJINGSYMBOLS");
1697
1698 /**
1699 * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1700 * character block.
1701 * @since 1.5
1702 */
1703 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1704 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1705 "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1706 "MATHEMATICALALPHANUMERICSYMBOLS");
1707
1708 /**
1709 * Constant for the "CJK Unified Ideographs Extension B" Unicode
1710 * character block.
1711 * @since 1.5
1712 */
1713 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1714 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1715 "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1716 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1717
1718 /**
1719 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1720 * @since 1.5
1721 */
1722 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1723 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1724 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1725 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1726
1727 /**
1728 * Constant for the "Tags" Unicode character block.
1729 * @since 1.5
1730 */
1731 public static final UnicodeBlock TAGS =
1732 new UnicodeBlock("TAGS");
1733
1734 /**
1735 * Constant for the "Variation Selectors Supplement" Unicode character
1736 * block.
1737 * @since 1.5
1738 */
1739 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1740 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1741 "VARIATION SELECTORS SUPPLEMENT",
1742 "VARIATIONSELECTORSSUPPLEMENT");
1743
1744 /**
1745 * Constant for the "Supplementary Private Use Area-A" Unicode character
1746 * block.
1747 * @since 1.5
1748 */
1749 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1750 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1751 "SUPPLEMENTARY PRIVATE USE AREA-A",
1752 "SUPPLEMENTARYPRIVATEUSEAREA-A");
1753
1754 /**
1755 * Constant for the "Supplementary Private Use Area-B" Unicode character
1756 * block.
1757 * @since 1.5
1758 */
1759 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1760 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1761 "SUPPLEMENTARY PRIVATE USE AREA-B",
1762 "SUPPLEMENTARYPRIVATEUSEAREA-B");
1763
1764 /**
1765 * Constant for the "High Surrogates" Unicode character block.
1766 * This block represents codepoint values in the high surrogate
1767 * range: U+D800 through U+DB7F
1768 *
1769 * @since 1.5
1770 */
1771 public static final UnicodeBlock HIGH_SURROGATES =
1772 new UnicodeBlock("HIGH_SURROGATES",
1773 "HIGH SURROGATES",
1774 "HIGHSURROGATES");
1775
1776 /**
1777 * Constant for the "High Private Use Surrogates" Unicode character
1778 * block.
1779 * This block represents codepoint values in the private use high
1780 * surrogate range: U+DB80 through U+DBFF
1781 *
1782 * @since 1.5
1783 */
1784 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1785 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1786 "HIGH PRIVATE USE SURROGATES",
1787 "HIGHPRIVATEUSESURROGATES");
1788
1789 /**
1790 * Constant for the "Low Surrogates" Unicode character block.
1791 * This block represents codepoint values in the low surrogate
1792 * range: U+DC00 through U+DFFF
1793 *
1794 * @since 1.5
1795 */
1796 public static final UnicodeBlock LOW_SURROGATES =
1797 new UnicodeBlock("LOW_SURROGATES",
1798 "LOW SURROGATES",
1799 "LOWSURROGATES");
1800
1801 /**
1802 * Constant for the "Arabic Supplement" Unicode character block.
1803 * @since 1.7
1804 */
1805 public static final UnicodeBlock ARABIC_SUPPLEMENT =
1806 new UnicodeBlock("ARABIC_SUPPLEMENT",
1807 "ARABIC SUPPLEMENT",
1808 "ARABICSUPPLEMENT");
1809
1810 /**
1811 * Constant for the "NKo" Unicode character block.
1812 * @since 1.7
1813 */
1814 public static final UnicodeBlock NKO =
1815 new UnicodeBlock("NKO");
1816
1817 /**
1818 * Constant for the "Samaritan" Unicode character block.
1819 * @since 1.7
1820 */
1821 public static final UnicodeBlock SAMARITAN =
1822 new UnicodeBlock("SAMARITAN");
1823
1824 /**
1825 * Constant for the "Mandaic" Unicode character block.
1826 * @since 1.7
1827 */
1828 public static final UnicodeBlock MANDAIC =
1829 new UnicodeBlock("MANDAIC");
1830
1831 /**
1832 * Constant for the "Ethiopic Supplement" Unicode character block.
1833 * @since 1.7
1834 */
1835 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1836 new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1837 "ETHIOPIC SUPPLEMENT",
1838 "ETHIOPICSUPPLEMENT");
1839
1840 /**
1841 * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1842 * Unicode character block.
1843 * @since 1.7
1844 */
1845 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1846 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1847 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1848 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1849
1850 /**
1851 * Constant for the "New Tai Lue" Unicode character block.
1852 * @since 1.7
1853 */
1854 public static final UnicodeBlock NEW_TAI_LUE =
1855 new UnicodeBlock("NEW_TAI_LUE",
1856 "NEW TAI LUE",
1857 "NEWTAILUE");
1858
1859 /**
1860 * Constant for the "Buginese" Unicode character block.
1861 * @since 1.7
1862 */
1863 public static final UnicodeBlock BUGINESE =
1864 new UnicodeBlock("BUGINESE");
1865
1866 /**
1867 * Constant for the "Tai Tham" Unicode character block.
1868 * @since 1.7
1869 */
1870 public static final UnicodeBlock TAI_THAM =
1871 new UnicodeBlock("TAI_THAM",
1872 "TAI THAM",
1873 "TAITHAM");
1874
1875 /**
1876 * Constant for the "Balinese" Unicode character block.
1877 * @since 1.7
1878 */
1879 public static final UnicodeBlock BALINESE =
1880 new UnicodeBlock("BALINESE");
1881
1882 /**
1883 * Constant for the "Sundanese" Unicode character block.
1884 * @since 1.7
1885 */
1886 public static final UnicodeBlock SUNDANESE =
1887 new UnicodeBlock("SUNDANESE");
1888
1889 /**
1890 * Constant for the "Batak" Unicode character block.
1891 * @since 1.7
1892 */
1893 public static final UnicodeBlock BATAK =
1894 new UnicodeBlock("BATAK");
1895
1896 /**
1897 * Constant for the "Lepcha" Unicode character block.
1898 * @since 1.7
1899 */
1900 public static final UnicodeBlock LEPCHA =
1901 new UnicodeBlock("LEPCHA");
1902
1903 /**
1904 * Constant for the "Ol Chiki" Unicode character block.
1905 * @since 1.7
1906 */
1907 public static final UnicodeBlock OL_CHIKI =
1908 new UnicodeBlock("OL_CHIKI",
1909 "OL CHIKI",
1910 "OLCHIKI");
1911
1912 /**
1913 * Constant for the "Vedic Extensions" Unicode character block.
1914 * @since 1.7
1915 */
1916 public static final UnicodeBlock VEDIC_EXTENSIONS =
1917 new UnicodeBlock("VEDIC_EXTENSIONS",
1918 "VEDIC EXTENSIONS",
1919 "VEDICEXTENSIONS");
1920
1921 /**
1922 * Constant for the "Phonetic Extensions Supplement" Unicode character
1923 * block.
1924 * @since 1.7
1925 */
1926 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1927 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1928 "PHONETIC EXTENSIONS SUPPLEMENT",
1929 "PHONETICEXTENSIONSSUPPLEMENT");
1930
1931 /**
1932 * Constant for the "Combining Diacritical Marks Supplement" Unicode
1933 * character block.
1934 * @since 1.7
1935 */
1936 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1937 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1938 "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1939 "COMBININGDIACRITICALMARKSSUPPLEMENT");
1940
1941 /**
1942 * Constant for the "Glagolitic" Unicode character block.
1943 * @since 1.7
1944 */
1945 public static final UnicodeBlock GLAGOLITIC =
1946 new UnicodeBlock("GLAGOLITIC");
1947
1948 /**
1949 * Constant for the "Latin Extended-C" Unicode character block.
1950 * @since 1.7
1951 */
1952 public static final UnicodeBlock LATIN_EXTENDED_C =
1953 new UnicodeBlock("LATIN_EXTENDED_C",
1954 "LATIN EXTENDED-C",
1955 "LATINEXTENDED-C");
1956
1957 /**
1958 * Constant for the "Coptic" Unicode character block.
1959 * @since 1.7
1960 */
1961 public static final UnicodeBlock COPTIC =
1962 new UnicodeBlock("COPTIC");
1963
1964 /**
1965 * Constant for the "Georgian Supplement" Unicode character block.
1966 * @since 1.7
1967 */
1968 public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1969 new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1970 "GEORGIAN SUPPLEMENT",
1971 "GEORGIANSUPPLEMENT");
1972
1973 /**
1974 * Constant for the "Tifinagh" Unicode character block.
1975 * @since 1.7
1976 */
1977 public static final UnicodeBlock TIFINAGH =
1978 new UnicodeBlock("TIFINAGH");
1979
1980 /**
1981 * Constant for the "Ethiopic Extended" Unicode character block.
1982 * @since 1.7
1983 */
1984 public static final UnicodeBlock ETHIOPIC_EXTENDED =
1985 new UnicodeBlock("ETHIOPIC_EXTENDED",
1986 "ETHIOPIC EXTENDED",
1987 "ETHIOPICEXTENDED");
1988
1989 /**
1990 * Constant for the "Cyrillic Extended-A" Unicode character block.
1991 * @since 1.7
1992 */
1993 public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1994 new UnicodeBlock("CYRILLIC_EXTENDED_A",
1995 "CYRILLIC EXTENDED-A",
1996 "CYRILLICEXTENDED-A");
1997
1998 /**
1999 * Constant for the "Supplemental Punctuation" Unicode character block.
2000 * @since 1.7
2001 */
2002 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2003 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
2004 "SUPPLEMENTAL PUNCTUATION",
2005 "SUPPLEMENTALPUNCTUATION");
2006
2007 /**
2008 * Constant for the "CJK Strokes" Unicode character block.
2009 * @since 1.7
2010 */
2011 public static final UnicodeBlock CJK_STROKES =
2012 new UnicodeBlock("CJK_STROKES",
2013 "CJK STROKES",
2014 "CJKSTROKES");
2015
2016 /**
2017 * Constant for the "Lisu" Unicode character block.
2018 * @since 1.7
2019 */
2020 public static final UnicodeBlock LISU =
2021 new UnicodeBlock("LISU");
2022
2023 /**
2024 * Constant for the "Vai" Unicode character block.
2025 * @since 1.7
2026 */
2027 public static final UnicodeBlock VAI =
2028 new UnicodeBlock("VAI");
2029
2030 /**
2031 * Constant for the "Cyrillic Extended-B" Unicode character block.
2032 * @since 1.7
2033 */
2034 public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2035 new UnicodeBlock("CYRILLIC_EXTENDED_B",
2036 "CYRILLIC EXTENDED-B",
2037 "CYRILLICEXTENDED-B");
2038
2039 /**
2040 * Constant for the "Bamum" Unicode character block.
2041 * @since 1.7
2042 */
2043 public static final UnicodeBlock BAMUM =
2044 new UnicodeBlock("BAMUM");
2045
2046 /**
2047 * Constant for the "Modifier Tone Letters" Unicode character block.
2048 * @since 1.7
2049 */
2050 public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2051 new UnicodeBlock("MODIFIER_TONE_LETTERS",
2052 "MODIFIER TONE LETTERS",
2053 "MODIFIERTONELETTERS");
2054
2055 /**
2056 * Constant for the "Latin Extended-D" Unicode character block.
2057 * @since 1.7
2058 */
2059 public static final UnicodeBlock LATIN_EXTENDED_D =
2060 new UnicodeBlock("LATIN_EXTENDED_D",
2061 "LATIN EXTENDED-D",
2062 "LATINEXTENDED-D");
2063
2064 /**
2065 * Constant for the "Syloti Nagri" Unicode character block.
2066 * @since 1.7
2067 */
2068 public static final UnicodeBlock SYLOTI_NAGRI =
2069 new UnicodeBlock("SYLOTI_NAGRI",
2070 "SYLOTI NAGRI",
2071 "SYLOTINAGRI");
2072
2073 /**
2074 * Constant for the "Common Indic Number Forms" Unicode character block.
2075 * @since 1.7
2076 */
2077 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2078 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2079 "COMMON INDIC NUMBER FORMS",
2080 "COMMONINDICNUMBERFORMS");
2081
2082 /**
2083 * Constant for the "Phags-pa" Unicode character block.
2084 * @since 1.7
2085 */
2086 public static final UnicodeBlock PHAGS_PA =
2087 new UnicodeBlock("PHAGS_PA",
2088 "PHAGS-PA");
2089
2090 /**
2091 * Constant for the "Saurashtra" Unicode character block.
2092 * @since 1.7
2093 */
2094 public static final UnicodeBlock SAURASHTRA =
2095 new UnicodeBlock("SAURASHTRA");
2096
2097 /**
2098 * Constant for the "Devanagari Extended" Unicode character block.
2099 * @since 1.7
2100 */
2101 public static final UnicodeBlock DEVANAGARI_EXTENDED =
2102 new UnicodeBlock("DEVANAGARI_EXTENDED",
2103 "DEVANAGARI EXTENDED",
2104 "DEVANAGARIEXTENDED");
2105
2106 /**
2107 * Constant for the "Kayah Li" Unicode character block.
2108 * @since 1.7
2109 */
2110 public static final UnicodeBlock KAYAH_LI =
2111 new UnicodeBlock("KAYAH_LI",
2112 "KAYAH LI",
2113 "KAYAHLI");
2114
2115 /**
2116 * Constant for the "Rejang" Unicode character block.
2117 * @since 1.7
2118 */
2119 public static final UnicodeBlock REJANG =
2120 new UnicodeBlock("REJANG");
2121
2122 /**
2123 * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2124 * @since 1.7
2125 */
2126 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2127 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2128 "HANGUL JAMO EXTENDED-A",
2129 "HANGULJAMOEXTENDED-A");
2130
2131 /**
2132 * Constant for the "Javanese" Unicode character block.
2133 * @since 1.7
2134 */
2135 public static final UnicodeBlock JAVANESE =
2136 new UnicodeBlock("JAVANESE");
2137
2138 /**
2139 * Constant for the "Cham" Unicode character block.
2140 * @since 1.7
2141 */
2142 public static final UnicodeBlock CHAM =
2143 new UnicodeBlock("CHAM");
2144
2145 /**
2146 * Constant for the "Myanmar Extended-A" Unicode character block.
2147 * @since 1.7
2148 */
2149 public static final UnicodeBlock MYANMAR_EXTENDED_A =
2150 new UnicodeBlock("MYANMAR_EXTENDED_A",
2151 "MYANMAR EXTENDED-A",
2152 "MYANMAREXTENDED-A");
2153
2154 /**
2155 * Constant for the "Tai Viet" Unicode character block.
2156 * @since 1.7
2157 */
2158 public static final UnicodeBlock TAI_VIET =
2159 new UnicodeBlock("TAI_VIET",
2160 "TAI VIET",
2161 "TAIVIET");
2162
2163 /**
2164 * Constant for the "Ethiopic Extended-A" Unicode character block.
2165 * @since 1.7
2166 */
2167 public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2168 new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2169 "ETHIOPIC EXTENDED-A",
2170 "ETHIOPICEXTENDED-A");
2171
2172 /**
2173 * Constant for the "Meetei Mayek" Unicode character block.
2174 * @since 1.7
2175 */
2176 public static final UnicodeBlock MEETEI_MAYEK =
2177 new UnicodeBlock("MEETEI_MAYEK",
2178 "MEETEI MAYEK",
2179 "MEETEIMAYEK");
2180
2181 /**
2182 * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2183 * @since 1.7
2184 */
2185 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2186 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2187 "HANGUL JAMO EXTENDED-B",
2188 "HANGULJAMOEXTENDED-B");
2189
2190 /**
2191 * Constant for the "Vertical Forms" Unicode character block.
2192 * @since 1.7
2193 */
2194 public static final UnicodeBlock VERTICAL_FORMS =
2195 new UnicodeBlock("VERTICAL_FORMS",
2196 "VERTICAL FORMS",
2197 "VERTICALFORMS");
2198
2199 /**
2200 * Constant for the "Ancient Greek Numbers" Unicode character block.
2201 * @since 1.7
2202 */
2203 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2204 new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2205 "ANCIENT GREEK NUMBERS",
2206 "ANCIENTGREEKNUMBERS");
2207
2208 /**
2209 * Constant for the "Ancient Symbols" Unicode character block.
2210 * @since 1.7
2211 */
2212 public static final UnicodeBlock ANCIENT_SYMBOLS =
2213 new UnicodeBlock("ANCIENT_SYMBOLS",
2214 "ANCIENT SYMBOLS",
2215 "ANCIENTSYMBOLS");
2216
2217 /**
2218 * Constant for the "Phaistos Disc" Unicode character block.
2219 * @since 1.7
2220 */
2221 public static final UnicodeBlock PHAISTOS_DISC =
2222 new UnicodeBlock("PHAISTOS_DISC",
2223 "PHAISTOS DISC",
2224 "PHAISTOSDISC");
2225
2226 /**
2227 * Constant for the "Lycian" Unicode character block.
2228 * @since 1.7
2229 */
2230 public static final UnicodeBlock LYCIAN =
2231 new UnicodeBlock("LYCIAN");
2232
2233 /**
2234 * Constant for the "Carian" Unicode character block.
2235 * @since 1.7
2236 */
2237 public static final UnicodeBlock CARIAN =
2238 new UnicodeBlock("CARIAN");
2239
2240 /**
2241 * Constant for the "Old Persian" Unicode character block.
2242 * @since 1.7
2243 */
2244 public static final UnicodeBlock OLD_PERSIAN =
2245 new UnicodeBlock("OLD_PERSIAN",
2246 "OLD PERSIAN",
2247 "OLDPERSIAN");
2248
2249 /**
2250 * Constant for the "Imperial Aramaic" Unicode character block.
2251 * @since 1.7
2252 */
2253 public static final UnicodeBlock IMPERIAL_ARAMAIC =
2254 new UnicodeBlock("IMPERIAL_ARAMAIC",
2255 "IMPERIAL ARAMAIC",
2256 "IMPERIALARAMAIC");
2257
2258 /**
2259 * Constant for the "Phoenician" Unicode character block.
2260 * @since 1.7
2261 */
2262 public static final UnicodeBlock PHOENICIAN =
2263 new UnicodeBlock("PHOENICIAN");
2264
2265 /**
2266 * Constant for the "Lydian" Unicode character block.
2267 * @since 1.7
2268 */
2269 public static final UnicodeBlock LYDIAN =
2270 new UnicodeBlock("LYDIAN");
2271
2272 /**
2273 * Constant for the "Kharoshthi" Unicode character block.
2274 * @since 1.7
2275 */
2276 public static final UnicodeBlock KHAROSHTHI =
2277 new UnicodeBlock("KHAROSHTHI");
2278
2279 /**
2280 * Constant for the "Old South Arabian" Unicode character block.
2281 * @since 1.7
2282 */
2283 public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2284 new UnicodeBlock("OLD_SOUTH_ARABIAN",
2285 "OLD SOUTH ARABIAN",
2286 "OLDSOUTHARABIAN");
2287
2288 /**
2289 * Constant for the "Avestan" Unicode character block.
2290 * @since 1.7
2291 */
2292 public static final UnicodeBlock AVESTAN =
2293 new UnicodeBlock("AVESTAN");
2294
2295 /**
2296 * Constant for the "Inscriptional Parthian" Unicode character block.
2297 * @since 1.7
2298 */
2299 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2300 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2301 "INSCRIPTIONAL PARTHIAN",
2302 "INSCRIPTIONALPARTHIAN");
2303
2304 /**
2305 * Constant for the "Inscriptional Pahlavi" Unicode character block.
2306 * @since 1.7
2307 */
2308 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2309 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2310 "INSCRIPTIONAL PAHLAVI",
2311 "INSCRIPTIONALPAHLAVI");
2312
2313 /**
2314 * Constant for the "Old Turkic" Unicode character block.
2315 * @since 1.7
2316 */
2317 public static final UnicodeBlock OLD_TURKIC =
2318 new UnicodeBlock("OLD_TURKIC",
2319 "OLD TURKIC",
2320 "OLDTURKIC");
2321
2322 /**
2323 * Constant for the "Rumi Numeral Symbols" Unicode character block.
2324 * @since 1.7
2325 */
2326 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2327 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2328 "RUMI NUMERAL SYMBOLS",
2329 "RUMINUMERALSYMBOLS");
2330
2331 /**
2332 * Constant for the "Brahmi" Unicode character block.
2333 * @since 1.7
2334 */
2335 public static final UnicodeBlock BRAHMI =
2336 new UnicodeBlock("BRAHMI");
2337
2338 /**
2339 * Constant for the "Kaithi" Unicode character block.
2340 * @since 1.7
2341 */
2342 public static final UnicodeBlock KAITHI =
2343 new UnicodeBlock("KAITHI");
2344
2345 /**
2346 * Constant for the "Cuneiform" Unicode character block.
2347 * @since 1.7
2348 */
2349 public static final UnicodeBlock CUNEIFORM =
2350 new UnicodeBlock("CUNEIFORM");
2351
2352 /**
2353 * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2354 * character block.
2355 * @since 1.7
2356 */
2357 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2358 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2359 "CUNEIFORM NUMBERS AND PUNCTUATION",
2360 "CUNEIFORMNUMBERSANDPUNCTUATION");
2361
2362 /**
2363 * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2364 * @since 1.7
2365 */
2366 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2367 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2368 "EGYPTIAN HIEROGLYPHS",
2369 "EGYPTIANHIEROGLYPHS");
2370
2371 /**
2372 * Constant for the "Bamum Supplement" Unicode character block.
2373 * @since 1.7
2374 */
2375 public static final UnicodeBlock BAMUM_SUPPLEMENT =
2376 new UnicodeBlock("BAMUM_SUPPLEMENT",
2377 "BAMUM SUPPLEMENT",
2378 "BAMUMSUPPLEMENT");
2379
2380 /**
2381 * Constant for the "Kana Supplement" Unicode character block.
2382 * @since 1.7
2383 */
2384 public static final UnicodeBlock KANA_SUPPLEMENT =
2385 new UnicodeBlock("KANA_SUPPLEMENT",
2386 "KANA SUPPLEMENT",
2387 "KANASUPPLEMENT");
2388
2389 /**
2390 * Constant for the "Ancient Greek Musical Notation" Unicode character
2391 * block.
2392 * @since 1.7
2393 */
2394 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2395 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2396 "ANCIENT GREEK MUSICAL NOTATION",
2397 "ANCIENTGREEKMUSICALNOTATION");
2398
2399 /**
2400 * Constant for the "Counting Rod Numerals" Unicode character block.
2401 * @since 1.7
2402 */
2403 public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2404 new UnicodeBlock("COUNTING_ROD_NUMERALS",
2405 "COUNTING ROD NUMERALS",
2406 "COUNTINGRODNUMERALS");
2407
2408 /**
2409 * Constant for the "Mahjong Tiles" Unicode character block.
2410 * @since 1.7
2411 */
2412 public static final UnicodeBlock MAHJONG_TILES =
2413 new UnicodeBlock("MAHJONG_TILES",
2414 "MAHJONG TILES",
2415 "MAHJONGTILES");
2416
2417 /**
2418 * Constant for the "Domino Tiles" Unicode character block.
2419 * @since 1.7
2420 */
2421 public static final UnicodeBlock DOMINO_TILES =
2422 new UnicodeBlock("DOMINO_TILES",
2423 "DOMINO TILES",
2424 "DOMINOTILES");
2425
2426 /**
2427 * Constant for the "Playing Cards" Unicode character block.
2428 * @since 1.7
2429 */
2430 public static final UnicodeBlock PLAYING_CARDS =
2431 new UnicodeBlock("PLAYING_CARDS",
2432 "PLAYING CARDS",
2433 "PLAYINGCARDS");
2434
2435 /**
2436 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2437 * block.
2438 * @since 1.7
2439 */
2440 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2441 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2442 "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2443 "ENCLOSEDALPHANUMERICSUPPLEMENT");
2444
2445 /**
2446 * Constant for the "Enclosed Ideographic Supplement" Unicode character
2447 * block.
2448 * @since 1.7
2449 */
2450 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2451 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2452 "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2453 "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2454
2455 /**
2456 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2457 * character block.
2458 * @since 1.7
2459 */
2460 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2461 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2462 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2463 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2464
2465 /**
2466 * Constant for the "Emoticons" Unicode character block.
2467 * @since 1.7
2468 */
2469 public static final UnicodeBlock EMOTICONS =
2470 new UnicodeBlock("EMOTICONS");
2471
2472 /**
2473 * Constant for the "Transport And Map Symbols" Unicode character block.
2474 * @since 1.7
2475 */
2476 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2477 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2478 "TRANSPORT AND MAP SYMBOLS",
2479 "TRANSPORTANDMAPSYMBOLS");
2480
2481 /**
2482 * Constant for the "Alchemical Symbols" Unicode character block.
2483 * @since 1.7
2484 */
2485 public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2486 new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2487 "ALCHEMICAL SYMBOLS",
2488 "ALCHEMICALSYMBOLS");
2489
2490 /**
2491 * Constant for the "CJK Unified Ideographs Extension C" Unicode
2492 * character block.
2493 * @since 1.7
2494 */
2495 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2496 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2497 "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2498 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2499
2500 /**
2501 * Constant for the "CJK Unified Ideographs Extension D" Unicode
2502 * character block.
2503 * @since 1.7
2504 */
2505 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2506 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2507 "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2508 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2509
2510 /**
2511 * Constant for the "Arabic Extended-A" Unicode character block.
2512 * @since 1.8
2513 */
2514 public static final UnicodeBlock ARABIC_EXTENDED_A =
2515 new UnicodeBlock("ARABIC_EXTENDED_A",
2516 "ARABIC EXTENDED-A",
2517 "ARABICEXTENDED-A");
2518
2519 /**
2520 * Constant for the "Sundanese Supplement" Unicode character block.
2521 * @since 1.8
2522 */
2523 public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2524 new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2525 "SUNDANESE SUPPLEMENT",
2526 "SUNDANESESUPPLEMENT");
2527
2528 /**
2529 * Constant for the "Meetei Mayek Extensions" Unicode character block.
2530 * @since 1.8
2531 */
2532 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2533 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2534 "MEETEI MAYEK EXTENSIONS",
2535 "MEETEIMAYEKEXTENSIONS");
2536
2537 /**
2538 * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2539 * @since 1.8
2540 */
2541 public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2542 new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2543 "MEROITIC HIEROGLYPHS",
2544 "MEROITICHIEROGLYPHS");
2545
2546 /**
2547 * Constant for the "Meroitic Cursive" Unicode character block.
2548 * @since 1.8
2549 */
2550 public static final UnicodeBlock MEROITIC_CURSIVE =
2551 new UnicodeBlock("MEROITIC_CURSIVE",
2552 "MEROITIC CURSIVE",
2553 "MEROITICCURSIVE");
2554
2555 /**
2556 * Constant for the "Sora Sompeng" Unicode character block.
2557 * @since 1.8
2558 */
2559 public static final UnicodeBlock SORA_SOMPENG =
2560 new UnicodeBlock("SORA_SOMPENG",
2561 "SORA SOMPENG",
2562 "SORASOMPENG");
2563
2564 /**
2565 * Constant for the "Chakma" Unicode character block.
2566 * @since 1.8
2567 */
2568 public static final UnicodeBlock CHAKMA =
2569 new UnicodeBlock("CHAKMA");
2570
2571 /**
2572 * Constant for the "Sharada" Unicode character block.
2573 * @since 1.8
2574 */
2575 public static final UnicodeBlock SHARADA =
2576 new UnicodeBlock("SHARADA");
2577
2578 /**
2579 * Constant for the "Takri" Unicode character block.
2580 * @since 1.8
2581 */
2582 public static final UnicodeBlock TAKRI =
2583 new UnicodeBlock("TAKRI");
2584
2585 /**
2586 * Constant for the "Miao" Unicode character block.
2587 * @since 1.8
2588 */
2589 public static final UnicodeBlock MIAO =
2590 new UnicodeBlock("MIAO");
2591
2592 /**
2593 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2594 * character block.
2595 * @since 1.8
2596 */
2597 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2598 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2599 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2600 "ARABICMATHEMATICALALPHABETICSYMBOLS");
2601
2602 /**
2603 * Constant for the "Combining Diacritical Marks Extended" Unicode
2604 * character block.
2605 * @since 9
2606 */
2607 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2608 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2609 "COMBINING DIACRITICAL MARKS EXTENDED",
2610 "COMBININGDIACRITICALMARKSEXTENDED");
2611
2612 /**
2613 * Constant for the "Myanmar Extended-B" Unicode character block.
2614 * @since 9
2615 */
2616 public static final UnicodeBlock MYANMAR_EXTENDED_B =
2617 new UnicodeBlock("MYANMAR_EXTENDED_B",
2618 "MYANMAR EXTENDED-B",
2619 "MYANMAREXTENDED-B");
2620
2621 /**
2622 * Constant for the "Latin Extended-E" Unicode character block.
2623 * @since 9
2624 */
2625 public static final UnicodeBlock LATIN_EXTENDED_E =
2626 new UnicodeBlock("LATIN_EXTENDED_E",
2627 "LATIN EXTENDED-E",
2628 "LATINEXTENDED-E");
2629
2630 /**
2631 * Constant for the "Coptic Epact Numbers" Unicode character block.
2632 * @since 9
2633 */
2634 public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2635 new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2636 "COPTIC EPACT NUMBERS",
2637 "COPTICEPACTNUMBERS");
2638
2639 /**
2640 * Constant for the "Old Permic" Unicode character block.
2641 * @since 9
2642 */
2643 public static final UnicodeBlock OLD_PERMIC =
2644 new UnicodeBlock("OLD_PERMIC",
2645 "OLD PERMIC",
2646 "OLDPERMIC");
2647
2648 /**
2649 * Constant for the "Elbasan" Unicode character block.
2650 * @since 9
2651 */
2652 public static final UnicodeBlock ELBASAN =
2653 new UnicodeBlock("ELBASAN");
2654
2655 /**
2656 * Constant for the "Caucasian Albanian" Unicode character block.
2657 * @since 9
2658 */
2659 public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2660 new UnicodeBlock("CAUCASIAN_ALBANIAN",
2661 "CAUCASIAN ALBANIAN",
2662 "CAUCASIANALBANIAN");
2663
2664 /**
2665 * Constant for the "Linear A" Unicode character block.
2666 * @since 9
2667 */
2668 public static final UnicodeBlock LINEAR_A =
2669 new UnicodeBlock("LINEAR_A",
2670 "LINEAR A",
2671 "LINEARA");
2672
2673 /**
2674 * Constant for the "Palmyrene" Unicode character block.
2675 * @since 9
2676 */
2677 public static final UnicodeBlock PALMYRENE =
2678 new UnicodeBlock("PALMYRENE");
2679
2680 /**
2681 * Constant for the "Nabataean" Unicode character block.
2682 * @since 9
2683 */
2684 public static final UnicodeBlock NABATAEAN =
2685 new UnicodeBlock("NABATAEAN");
2686
2687 /**
2688 * Constant for the "Old North Arabian" Unicode character block.
2689 * @since 9
2690 */
2691 public static final UnicodeBlock OLD_NORTH_ARABIAN =
2692 new UnicodeBlock("OLD_NORTH_ARABIAN",
2693 "OLD NORTH ARABIAN",
2694 "OLDNORTHARABIAN");
2695
2696 /**
2697 * Constant for the "Manichaean" Unicode character block.
2698 * @since 9
2699 */
2700 public static final UnicodeBlock MANICHAEAN =
2701 new UnicodeBlock("MANICHAEAN");
2702
2703 /**
2704 * Constant for the "Psalter Pahlavi" Unicode character block.
2705 * @since 9
2706 */
2707 public static final UnicodeBlock PSALTER_PAHLAVI =
2708 new UnicodeBlock("PSALTER_PAHLAVI",
2709 "PSALTER PAHLAVI",
2710 "PSALTERPAHLAVI");
2711
2712 /**
2713 * Constant for the "Mahajani" Unicode character block.
2714 * @since 9
2715 */
2716 public static final UnicodeBlock MAHAJANI =
2717 new UnicodeBlock("MAHAJANI");
2718
2719 /**
2720 * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2721 * @since 9
2722 */
2723 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2724 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2725 "SINHALA ARCHAIC NUMBERS",
2726 "SINHALAARCHAICNUMBERS");
2727
2728 /**
2729 * Constant for the "Khojki" Unicode character block.
2730 * @since 9
2731 */
2732 public static final UnicodeBlock KHOJKI =
2733 new UnicodeBlock("KHOJKI");
2734
2735 /**
2736 * Constant for the "Khudawadi" Unicode character block.
2737 * @since 9
2738 */
2739 public static final UnicodeBlock KHUDAWADI =
2740 new UnicodeBlock("KHUDAWADI");
2741
2742 /**
2743 * Constant for the "Grantha" Unicode character block.
2744 * @since 9
2745 */
2746 public static final UnicodeBlock GRANTHA =
2747 new UnicodeBlock("GRANTHA");
2748
2749 /**
2750 * Constant for the "Tirhuta" Unicode character block.
2751 * @since 9
2752 */
2753 public static final UnicodeBlock TIRHUTA =
2754 new UnicodeBlock("TIRHUTA");
2755
2756 /**
2757 * Constant for the "Siddham" Unicode character block.
2758 * @since 9
2759 */
2760 public static final UnicodeBlock SIDDHAM =
2761 new UnicodeBlock("SIDDHAM");
2762
2763 /**
2764 * Constant for the "Modi" Unicode character block.
2765 * @since 9
2766 */
2767 public static final UnicodeBlock MODI =
2768 new UnicodeBlock("MODI");
2769
2770 /**
2771 * Constant for the "Warang Citi" Unicode character block.
2772 * @since 9
2773 */
2774 public static final UnicodeBlock WARANG_CITI =
2775 new UnicodeBlock("WARANG_CITI",
2776 "WARANG CITI",
2777 "WARANGCITI");
2778
2779 /**
2780 * Constant for the "Pau Cin Hau" Unicode character block.
2781 * @since 9
2782 */
2783 public static final UnicodeBlock PAU_CIN_HAU =
2784 new UnicodeBlock("PAU_CIN_HAU",
2785 "PAU CIN HAU",
2786 "PAUCINHAU");
2787
2788 /**
2789 * Constant for the "Mro" Unicode character block.
2790 * @since 9
2791 */
2792 public static final UnicodeBlock MRO =
2793 new UnicodeBlock("MRO");
2794
2795 /**
2796 * Constant for the "Bassa Vah" Unicode character block.
2797 * @since 9
2798 */
2799 public static final UnicodeBlock BASSA_VAH =
2800 new UnicodeBlock("BASSA_VAH",
2801 "BASSA VAH",
2802 "BASSAVAH");
2803
2804 /**
2805 * Constant for the "Pahawh Hmong" Unicode character block.
2806 * @since 9
2807 */
2808 public static final UnicodeBlock PAHAWH_HMONG =
2809 new UnicodeBlock("PAHAWH_HMONG",
2810 "PAHAWH HMONG",
2811 "PAHAWHHMONG");
2812
2813 /**
2814 * Constant for the "Duployan" Unicode character block.
2815 * @since 9
2816 */
2817 public static final UnicodeBlock DUPLOYAN =
2818 new UnicodeBlock("DUPLOYAN");
2819
2820 /**
2821 * Constant for the "Shorthand Format Controls" Unicode character block.
2822 * @since 9
2823 */
2824 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2825 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2826 "SHORTHAND FORMAT CONTROLS",
2827 "SHORTHANDFORMATCONTROLS");
2828
2829 /**
2830 * Constant for the "Mende Kikakui" Unicode character block.
2831 * @since 9
2832 */
2833 public static final UnicodeBlock MENDE_KIKAKUI =
2834 new UnicodeBlock("MENDE_KIKAKUI",
2835 "MENDE KIKAKUI",
2836 "MENDEKIKAKUI");
2837
2838 /**
2839 * Constant for the "Ornamental Dingbats" Unicode character block.
2840 * @since 9
2841 */
2842 public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2843 new UnicodeBlock("ORNAMENTAL_DINGBATS",
2844 "ORNAMENTAL DINGBATS",
2845 "ORNAMENTALDINGBATS");
2846
2847 /**
2848 * Constant for the "Geometric Shapes Extended" Unicode character block.
2849 * @since 9
2850 */
2851 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2852 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2853 "GEOMETRIC SHAPES EXTENDED",
2854 "GEOMETRICSHAPESEXTENDED");
2855
2856 /**
2857 * Constant for the "Supplemental Arrows-C" Unicode character block.
2858 * @since 9
2859 */
2860 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2861 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2862 "SUPPLEMENTAL ARROWS-C",
2863 "SUPPLEMENTALARROWS-C");
2864
2865 /**
2866 * Constant for the "Cherokee Supplement" Unicode character block.
2867 * @since 9
2868 */
2869 public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2870 new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2871 "CHEROKEE SUPPLEMENT",
2872 "CHEROKEESUPPLEMENT");
2873
2874 /**
2875 * Constant for the "Hatran" Unicode character block.
2876 * @since 9
2877 */
2878 public static final UnicodeBlock HATRAN =
2879 new UnicodeBlock("HATRAN");
2880
2881 /**
2882 * Constant for the "Old Hungarian" Unicode character block.
2883 * @since 9
2884 */
2885 public static final UnicodeBlock OLD_HUNGARIAN =
2886 new UnicodeBlock("OLD_HUNGARIAN",
2887 "OLD HUNGARIAN",
2888 "OLDHUNGARIAN");
2889
2890 /**
2891 * Constant for the "Multani" Unicode character block.
2892 * @since 9
2893 */
2894 public static final UnicodeBlock MULTANI =
2895 new UnicodeBlock("MULTANI");
2896
2897 /**
2898 * Constant for the "Ahom" Unicode character block.
2899 * @since 9
2900 */
2901 public static final UnicodeBlock AHOM =
2902 new UnicodeBlock("AHOM");
2903
2904 /**
2905 * Constant for the "Early Dynastic Cuneiform" Unicode character block.
2906 * @since 9
2907 */
2908 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2909 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
2910 "EARLY DYNASTIC CUNEIFORM",
2911 "EARLYDYNASTICCUNEIFORM");
2912
2913 /**
2914 * Constant for the "Anatolian Hieroglyphs" Unicode character block.
2915 * @since 9
2916 */
2917 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2918 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
2919 "ANATOLIAN HIEROGLYPHS",
2920 "ANATOLIANHIEROGLYPHS");
2921
2922 /**
2923 * Constant for the "Sutton SignWriting" Unicode character block.
2924 * @since 9
2925 */
2926 public static final UnicodeBlock SUTTON_SIGNWRITING =
2927 new UnicodeBlock("SUTTON_SIGNWRITING",
2928 "SUTTON SIGNWRITING",
2929 "SUTTONSIGNWRITING");
2930
2931 /**
2932 * Constant for the "Supplemental Symbols and Pictographs" Unicode
2933 * character block.
2934 * @since 9
2935 */
2936 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2937 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2938 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
2939 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
2940
2941 /**
2942 * Constant for the "CJK Unified Ideographs Extension E" Unicode
2943 * character block.
2944 * @since 9
2945 */
2946 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2947 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2948 "CJK UNIFIED IDEOGRAPHS EXTENSION E",
2949 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
2950
2951 /**
2952 * Constant for the "Syriac Supplement" Unicode
2953 * character block.
2954 * @since 11
2955 */
2956 public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2957 new UnicodeBlock("SYRIAC_SUPPLEMENT",
2958 "SYRIAC SUPPLEMENT",
2959 "SYRIACSUPPLEMENT");
2960
2961 /**
2962 * Constant for the "Cyrillic Extended-C" Unicode
2963 * character block.
2964 * @since 11
2965 */
2966 public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2967 new UnicodeBlock("CYRILLIC_EXTENDED_C",
2968 "CYRILLIC EXTENDED-C",
2969 "CYRILLICEXTENDED-C");
2970
2971 /**
2972 * Constant for the "Osage" Unicode
2973 * character block.
2974 * @since 11
2975 */
2976 public static final UnicodeBlock OSAGE =
2977 new UnicodeBlock("OSAGE");
2978
2979 /**
2980 * Constant for the "Newa" Unicode
2981 * character block.
2982 * @since 11
2983 */
2984 public static final UnicodeBlock NEWA =
2985 new UnicodeBlock("NEWA");
2986
2987 /**
2988 * Constant for the "Mongolian Supplement" Unicode
2989 * character block.
2990 * @since 11
2991 */
2992 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2993 new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
2994 "MONGOLIAN SUPPLEMENT",
2995 "MONGOLIANSUPPLEMENT");
2996
2997 /**
2998 * Constant for the "Marchen" Unicode
2999 * character block.
3000 * @since 11
3001 */
3002 public static final UnicodeBlock MARCHEN =
3003 new UnicodeBlock("MARCHEN");
3004
3005 /**
3006 * Constant for the "Ideographic Symbols and Punctuation" Unicode
3007 * character block.
3008 * @since 11
3009 */
3010 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3011 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3012 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3013 "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3014
3015 /**
3016 * Constant for the "Tangut" Unicode
3017 * character block.
3018 * @since 11
3019 */
3020 public static final UnicodeBlock TANGUT =
3021 new UnicodeBlock("TANGUT");
3022
3023 /**
3024 * Constant for the "Tangut Components" Unicode
3025 * character block.
3026 * @since 11
3027 */
3028 public static final UnicodeBlock TANGUT_COMPONENTS =
3029 new UnicodeBlock("TANGUT_COMPONENTS",
3030 "TANGUT COMPONENTS",
3031 "TANGUTCOMPONENTS");
3032
3033 /**
3034 * Constant for the "Kana Extended-A" Unicode
3035 * character block.
3036 * @since 11
3037 */
3038 public static final UnicodeBlock KANA_EXTENDED_A =
3039 new UnicodeBlock("KANA_EXTENDED_A",
3040 "KANA EXTENDED-A",
3041 "KANAEXTENDED-A");
3042 /**
3043 * Constant for the "Glagolitic Supplement" Unicode
3044 * character block.
3045 * @since 11
3046 */
3047 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3048 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3049 "GLAGOLITIC SUPPLEMENT",
3050 "GLAGOLITICSUPPLEMENT");
3051 /**
3052 * Constant for the "Adlam" Unicode
3053 * character block.
3054 * @since 11
3055 */
3056 public static final UnicodeBlock ADLAM =
3057 new UnicodeBlock("ADLAM");
3058
3059 /**
3060 * Constant for the "Masaram Gondi" Unicode
3061 * character block.
3062 * @since 11
3063 */
3064 public static final UnicodeBlock MASARAM_GONDI =
3065 new UnicodeBlock("MASARAM_GONDI",
3066 "MASARAM GONDI",
3067 "MASARAMGONDI");
3068
3069 /**
3070 * Constant for the "Zanabazar Square" Unicode
3071 * character block.
3072 * @since 11
3073 */
3074 public static final UnicodeBlock ZANABAZAR_SQUARE =
3075 new UnicodeBlock("ZANABAZAR_SQUARE",
3076 "ZANABAZAR SQUARE",
3077 "ZANABAZARSQUARE");
3078
3079 /**
3080 * Constant for the "Nushu" Unicode
3081 * character block.
3082 * @since 11
3083 */
3084 public static final UnicodeBlock NUSHU =
3085 new UnicodeBlock("NUSHU");
3086
3087 /**
3088 * Constant for the "Soyombo" Unicode
3089 * character block.
3090 * @since 11
3091 */
3092 public static final UnicodeBlock SOYOMBO =
3093 new UnicodeBlock("SOYOMBO");
3094
3095 /**
3096 * Constant for the "Bhaiksuki" Unicode
3097 * character block.
3098 * @since 11
3099 */
3100 public static final UnicodeBlock BHAIKSUKI =
3101 new UnicodeBlock("BHAIKSUKI");
3102
3103 /**
3104 * Constant for the "CJK Unified Ideographs Extension F" Unicode
3105 * character block.
3106 * @since 11
3107 */
3108 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3109 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3110 "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3111 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3112
3113 private static final int blockStarts[] = {
3114 0x0000, // 0000..007F; Basic Latin
3115 0x0080, // 0080..00FF; Latin-1 Supplement
3116 0x0100, // 0100..017F; Latin Extended-A
3117 0x0180, // 0180..024F; Latin Extended-B
3118 0x0250, // 0250..02AF; IPA Extensions
3119 0x02B0, // 02B0..02FF; Spacing Modifier Letters
3120 0x0300, // 0300..036F; Combining Diacritical Marks
3121 0x0370, // 0370..03FF; Greek and Coptic
3122 0x0400, // 0400..04FF; Cyrillic
3123 0x0500, // 0500..052F; Cyrillic Supplement
3124 0x0530, // 0530..058F; Armenian
3125 0x0590, // 0590..05FF; Hebrew
3126 0x0600, // 0600..06FF; Arabic
3127 0x0700, // 0700..074F; Syriac
3128 0x0750, // 0750..077F; Arabic Supplement
3129 0x0780, // 0780..07BF; Thaana
3130 0x07C0, // 07C0..07FF; NKo
3131 0x0800, // 0800..083F; Samaritan
3132 0x0840, // 0840..085F; Mandaic
3133 0x0860, // 0860..086F; Syriac Supplement
3134 0x0870, // unassigned
3135 0x08A0, // 08A0..08FF; Arabic Extended-A
3136 0x0900, // 0900..097F; Devanagari
3137 0x0980, // 0980..09FF; Bengali
3138 0x0A00, // 0A00..0A7F; Gurmukhi
3139 0x0A80, // 0A80..0AFF; Gujarati
3140 0x0B00, // 0B00..0B7F; Oriya
3141 0x0B80, // 0B80..0BFF; Tamil
3142 0x0C00, // 0C00..0C7F; Telugu
3143 0x0C80, // 0C80..0CFF; Kannada
3144 0x0D00, // 0D00..0D7F; Malayalam
3145 0x0D80, // 0D80..0DFF; Sinhala
3146 0x0E00, // 0E00..0E7F; Thai
3147 0x0E80, // 0E80..0EFF; Lao
3148 0x0F00, // 0F00..0FFF; Tibetan
3149 0x1000, // 1000..109F; Myanmar
3150 0x10A0, // 10A0..10FF; Georgian
3151 0x1100, // 1100..11FF; Hangul Jamo
3152 0x1200, // 1200..137F; Ethiopic
3153 0x1380, // 1380..139F; Ethiopic Supplement
3154 0x13A0, // 13A0..13FF; Cherokee
3155 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics
3156 0x1680, // 1680..169F; Ogham
3157 0x16A0, // 16A0..16FF; Runic
3158 0x1700, // 1700..171F; Tagalog
3159 0x1720, // 1720..173F; Hanunoo
3160 0x1740, // 1740..175F; Buhid
3161 0x1760, // 1760..177F; Tagbanwa
3162 0x1780, // 1780..17FF; Khmer
3163 0x1800, // 1800..18AF; Mongolian
3164 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3165 0x1900, // 1900..194F; Limbu
3166 0x1950, // 1950..197F; Tai Le
3167 0x1980, // 1980..19DF; New Tai Lue
3168 0x19E0, // 19E0..19FF; Khmer Symbols
3169 0x1A00, // 1A00..1A1F; Buginese
3170 0x1A20, // 1A20..1AAF; Tai Tham
3171 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended
3172 0x1B00, // 1B00..1B7F; Balinese
3173 0x1B80, // 1B80..1BBF; Sundanese
3174 0x1BC0, // 1BC0..1BFF; Batak
3175 0x1C00, // 1C00..1C4F; Lepcha
3176 0x1C50, // 1C50..1C7F; Ol Chiki
3177 0x1C80, // 1C80..1C8F; Cyrillic Extended-C
3178 0x1C90, // unassigned
3179 0x1CC0, // 1CC0..1CCF; Sundanese Supplement
3180 0x1CD0, // 1CD0..1CFF; Vedic Extensions
3181 0x1D00, // 1D00..1D7F; Phonetic Extensions
3182 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement
3183 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement
3184 0x1E00, // 1E00..1EFF; Latin Extended Additional
3185 0x1F00, // 1F00..1FFF; Greek Extended
3186 0x2000, // 2000..206F; General Punctuation
3187 0x2070, // 2070..209F; Superscripts and Subscripts
3188 0x20A0, // 20A0..20CF; Currency Symbols
3189 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols
3190 0x2100, // 2100..214F; Letterlike Symbols
3191 0x2150, // 2150..218F; Number Forms
3192 0x2190, // 2190..21FF; Arrows
3193 0x2200, // 2200..22FF; Mathematical Operators
3194 0x2300, // 2300..23FF; Miscellaneous Technical
3195 0x2400, // 2400..243F; Control Pictures
3196 0x2440, // 2440..245F; Optical Character Recognition
3197 0x2460, // 2460..24FF; Enclosed Alphanumerics
3198 0x2500, // 2500..257F; Box Drawing
3199 0x2580, // 2580..259F; Block Elements
3200 0x25A0, // 25A0..25FF; Geometric Shapes
3201 0x2600, // 2600..26FF; Miscellaneous Symbols
3202 0x2700, // 2700..27BF; Dingbats
3203 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3204 0x27F0, // 27F0..27FF; Supplemental Arrows-A
3205 0x2800, // 2800..28FF; Braille Patterns
3206 0x2900, // 2900..297F; Supplemental Arrows-B
3207 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B
3208 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators
3209 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows
3210 0x2C00, // 2C00..2C5F; Glagolitic
3211 0x2C60, // 2C60..2C7F; Latin Extended-C
3212 0x2C80, // 2C80..2CFF; Coptic
3213 0x2D00, // 2D00..2D2F; Georgian Supplement
3214 0x2D30, // 2D30..2D7F; Tifinagh
3215 0x2D80, // 2D80..2DDF; Ethiopic Extended
3216 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A
3217 0x2E00, // 2E00..2E7F; Supplemental Punctuation
3218 0x2E80, // 2E80..2EFF; CJK Radicals Supplement
3219 0x2F00, // 2F00..2FDF; Kangxi Radicals
3220 0x2FE0, // unassigned
3221 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters
3222 0x3000, // 3000..303F; CJK Symbols and Punctuation
3223 0x3040, // 3040..309F; Hiragana
3224 0x30A0, // 30A0..30FF; Katakana
3225 0x3100, // 3100..312F; Bopomofo
3226 0x3130, // 3130..318F; Hangul Compatibility Jamo
3227 0x3190, // 3190..319F; Kanbun
3228 0x31A0, // 31A0..31BF; Bopomofo Extended
3229 0x31C0, // 31C0..31EF; CJK Strokes
3230 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions
3231 0x3200, // 3200..32FF; Enclosed CJK Letters and Months
3232 0x3300, // 3300..33FF; CJK Compatibility
3233 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A
3234 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols
3235 0x4E00, // 4E00..9FFF; CJK Unified Ideographs
3236 0xA000, // A000..A48F; Yi Syllables
3237 0xA490, // A490..A4CF; Yi Radicals
3238 0xA4D0, // A4D0..A4FF; Lisu
3239 0xA500, // A500..A63F; Vai
3240 0xA640, // A640..A69F; Cyrillic Extended-B
3241 0xA6A0, // A6A0..A6FF; Bamum
3242 0xA700, // A700..A71F; Modifier Tone Letters
3243 0xA720, // A720..A7FF; Latin Extended-D
3244 0xA800, // A800..A82F; Syloti Nagri
3245 0xA830, // A830..A83F; Common Indic Number Forms
3246 0xA840, // A840..A87F; Phags-pa
3247 0xA880, // A880..A8DF; Saurashtra
3248 0xA8E0, // A8E0..A8FF; Devanagari Extended
3249 0xA900, // A900..A92F; Kayah Li
3250 0xA930, // A930..A95F; Rejang
3251 0xA960, // A960..A97F; Hangul Jamo Extended-A
3252 0xA980, // A980..A9DF; Javanese
3253 0xA9E0, // A9E0..A9FF; Myanmar Extended-B
3254 0xAA00, // AA00..AA5F; Cham
3255 0xAA60, // AA60..AA7F; Myanmar Extended-A
3256 0xAA80, // AA80..AADF; Tai Viet
3257 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions
3258 0xAB00, // AB00..AB2F; Ethiopic Extended-A
3259 0xAB30, // AB30..AB6F; Latin Extended-E
3260 0xAB70, // AB70..ABBF; Cherokee Supplement
3261 0xABC0, // ABC0..ABFF; Meetei Mayek
3262 0xAC00, // AC00..D7AF; Hangul Syllables
3263 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B
3264 0xD800, // D800..DB7F; High Surrogates
3265 0xDB80, // DB80..DBFF; High Private Use Surrogates
3266 0xDC00, // DC00..DFFF; Low Surrogates
3267 0xE000, // E000..F8FF; Private Use Area
3268 0xF900, // F900..FAFF; CJK Compatibility Ideographs
3269 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms
3270 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A
3271 0xFE00, // FE00..FE0F; Variation Selectors
3272 0xFE10, // FE10..FE1F; Vertical Forms
3273 0xFE20, // FE20..FE2F; Combining Half Marks
3274 0xFE30, // FE30..FE4F; CJK Compatibility Forms
3275 0xFE50, // FE50..FE6F; Small Form Variants
3276 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B
3277 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms
3278 0xFFF0, // FFF0..FFFF; Specials
3279 0x10000, // 10000..1007F; Linear B Syllabary
3280 0x10080, // 10080..100FF; Linear B Ideograms
3281 0x10100, // 10100..1013F; Aegean Numbers
3282 0x10140, // 10140..1018F; Ancient Greek Numbers
3283 0x10190, // 10190..101CF; Ancient Symbols
3284 0x101D0, // 101D0..101FF; Phaistos Disc
3285 0x10200, // unassigned
3286 0x10280, // 10280..1029F; Lycian
3287 0x102A0, // 102A0..102DF; Carian
3288 0x102E0, // 102E0..102FF; Coptic Epact Numbers
3289 0x10300, // 10300..1032F; Old Italic
3290 0x10330, // 10330..1034F; Gothic
3291 0x10350, // 10350..1037F; Old Permic
3292 0x10380, // 10380..1039F; Ugaritic
3293 0x103A0, // 103A0..103DF; Old Persian
3294 0x103E0, // unassigned
3295 0x10400, // 10400..1044F; Deseret
3296 0x10450, // 10450..1047F; Shavian
3297 0x10480, // 10480..104AF; Osmanya
3298 0x104B0, // 104B0..104FF; Osage
3299 0x10500, // 10500..1052F; Elbasan
3300 0x10530, // 10530..1056F; Caucasian Albanian
3301 0x10570, // unassigned
3302 0x10600, // 10600..1077F; Linear A
3303 0x10780, // unassigned
3304 0x10800, // 10800..1083F; Cypriot Syllabary
3305 0x10840, // 10840..1085F; Imperial Aramaic
3306 0x10860, // 10860..1087F; Palmyrene
3307 0x10880, // 10880..108AF; Nabataean
3308 0x108B0, // unassigned
3309 0x108E0, // 108E0..108FF; Hatran
3310 0x10900, // 10900..1091F; Phoenician
3311 0x10920, // 10920..1093F; Lydian
3312 0x10940, // unassigned
3313 0x10980, // 10980..1099F; Meroitic Hieroglyphs
3314 0x109A0, // 109A0..109FF; Meroitic Cursive
3315 0x10A00, // 10A00..10A5F; Kharoshthi
3316 0x10A60, // 10A60..10A7F; Old South Arabian
3317 0x10A80, // 10A80..10A9F; Old North Arabian
3318 0x10AA0, // unassigned
3319 0x10AC0, // 10AC0..10AFF; Manichaean
3320 0x10B00, // 10B00..10B3F; Avestan
3321 0x10B40, // 10B40..10B5F; Inscriptional Parthian
3322 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi
3323 0x10B80, // 10B80..10BAF; Psalter Pahlavi
3324 0x10BB0, // unassigned
3325 0x10C00, // 10C00..10C4F; Old Turkic
3326 0x10C50, // unassigned
3327 0x10C80, // 10C80..10CFF; Old Hungarian
3328 0x10D00, // unassigned
3329 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols
3330 0x10E80, // unassigned
3331 0x11000, // 11000..1107F; Brahmi
3332 0x11080, // 11080..110CF; Kaithi
3333 0x110D0, // 110D0..110FF; Sora Sompeng
3334 0x11100, // 11100..1114F; Chakma
3335 0x11150, // 11150..1117F; Mahajani
3336 0x11180, // 11180..111DF; Sharada
3337 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers
3338 0x11200, // 11200..1124F; Khojki
3339 0x11250, // unassigned
3340 0x11280, // 11280..112AF; Multani
3341 0x112B0, // 112B0..112FF; Khudawadi
3342 0x11300, // 11300..1137F; Grantha
3343 0x11380, // unassigned
3344 0x11400, // 11400..1147F; Newa
3345 0x11480, // 11480..114DF; Tirhuta
3346 0x114E0, // unassigned
3347 0x11580, // 11580..115FF; Siddham
3348 0x11600, // 11600..1165F; Modi
3349 0x11660, // 11660..1167F; Mongolian Supplement
3350 0x11680, // 11680..116CF; Takri
3351 0x116D0, // unassigned
3352 0x11700, // 11700..1173F; Ahom
3353 0x11740, // unassigned
3354 0x118A0, // 118A0..118FF; Warang Citi
3355 0x11900, // unassigned
3356 0x11A00, // 11A00..11A4F; Zanabazar Square
3357 0x11A50, // 11A50..11AAF; Soyombo
3358 0x11AB0, // unassigned
3359 0x11AC0, // 11AC0..11AFF; Pau Cin Hau
3360 0x11B00, // unassigned
3361 0x11C00, // 11C00..11C6F; Bhaiksuki
3362 0x11C70, // 11C70..11CBF; Marchen
3363 0x11CC0, // unassigned
3364 0x11D00, // 11D00..11D5F; Masaram Gondi
3365 0x11D60, // unassigned
3366 0x12000, // 12000..123FF; Cuneiform
3367 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation
3368 0x12480, // 12480..1254F; Early Dynastic Cuneiform
3369 0x12550, // unassigned
3370 0x13000, // 13000..1342F; Egyptian Hieroglyphs
3371 0x13430, // unassigned
3372 0x14400, // 14400..1467F; Anatolian Hieroglyphs
3373 0x14680, // unassigned
3374 0x16800, // 16800..16A3F; Bamum Supplement
3375 0x16A40, // 16A40..16A6F; Mro
3376 0x16A70, // unassigned
3377 0x16AD0, // 16AD0..16AFF; Bassa Vah
3378 0x16B00, // 16B00..16B8F; Pahawh Hmong
3379 0x16B90, // unassigned
3380 0x16F00, // 16F00..16F9F; Miao
3381 0x16FA0, // unassigned
3382 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation
3383 0x17000, // 17000..187FF; Tangut
3384 0x18800, // 18800..18AFF; Tangut Components
3385 0x18B00, // unassigned
3386 0x1B000, // 1B000..1B0FF; Kana Supplement
3387 0x1B100, // 1B100..1B12F; Kana Extended-A
3388 0x1B130, // unassigned
3389 0x1B170, // 1B170..1B2FF; Nushu
3390 0x1B300, // unassigned
3391 0x1BC00, // 1BC00..1BC9F; Duployan
3392 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls
3393 0x1BCB0, // unassigned
3394 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols
3395 0x1D100, // 1D100..1D1FF; Musical Symbols
3396 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation
3397 0x1D250, // unassigned
3398 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols
3399 0x1D360, // 1D360..1D37F; Counting Rod Numerals
3400 0x1D380, // unassigned
3401 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols
3402 0x1D800, // 1D800..1DAAF; Sutton SignWriting
3403 0x1DAB0, // unassigned
3404 0x1E000, // 1E000..1E02F; Glagolitic Supplement
3405 0x1E030, // unassigned
3406 0x1E800, // 1E800..1E8DF; Mende Kikakui
3407 0x1E8E0, // unassigned
3408 0x1E900, // 1E900..1E95F; Adlam
3409 0x1E960, // unassigned
3410 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
3411 0x1EF00, // unassigned
3412 0x1F000, // 1F000..1F02F; Mahjong Tiles
3413 0x1F030, // 1F030..1F09F; Domino Tiles
3414 0x1F0A0, // 1F0A0..1F0FF; Playing Cards
3415 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement
3416 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement
3417 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
3418 0x1F600, // 1F600..1F64F; Emoticons
3419 0x1F650, // 1F650..1F67F; Ornamental Dingbats
3420 0x1F680, // 1F680..1F6FF; Transport and Map Symbols
3421 0x1F700, // 1F700..1F77F; Alchemical Symbols
3422 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended
3423 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C
3424 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs
3425 0x1FA00, // unassigned
3426 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B
3427 0x2A6E0, // unassigned
3428 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C
3429 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D
3430 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E
3431 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
3432 0x2EBF0, // unassigned
3433 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
3434 0x2FA20, // unassigned
3435 0xE0000, // E0000..E007F; Tags
3436 0xE0080, // unassigned
3437 0xE0100, // E0100..E01EF; Variation Selectors Supplement
3438 0xE01F0, // unassigned
3439 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A
3440 0x100000 // 100000..10FFFF; Supplementary Private Use Area-B
3441 };
3442
3443 private static final UnicodeBlock[] blocks = {
3444 BASIC_LATIN,
3445 LATIN_1_SUPPLEMENT,
3446 LATIN_EXTENDED_A,
3447 LATIN_EXTENDED_B,
3448 IPA_EXTENSIONS,
3449 SPACING_MODIFIER_LETTERS,
3450 COMBINING_DIACRITICAL_MARKS,
3451 GREEK,
3452 CYRILLIC,
3453 CYRILLIC_SUPPLEMENTARY,
3454 ARMENIAN,
3455 HEBREW,
3456 ARABIC,
3457 SYRIAC,
3458 ARABIC_SUPPLEMENT,
3459 THAANA,
3460 NKO,
3461 SAMARITAN,
3462 MANDAIC,
3463 SYRIAC_SUPPLEMENT,
3464 null,
3465 ARABIC_EXTENDED_A,
3466 DEVANAGARI,
3467 BENGALI,
3468 GURMUKHI,
3469 GUJARATI,
3470 ORIYA,
3471 TAMIL,
3472 TELUGU,
3473 KANNADA,
3474 MALAYALAM,
3475 SINHALA,
3476 THAI,
3477 LAO,
3478 TIBETAN,
3479 MYANMAR,
3480 GEORGIAN,
3481 HANGUL_JAMO,
3482 ETHIOPIC,
3483 ETHIOPIC_SUPPLEMENT,
3484 CHEROKEE,
3485 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
3486 OGHAM,
3487 RUNIC,
3488 TAGALOG,
3489 HANUNOO,
3490 BUHID,
3491 TAGBANWA,
3492 KHMER,
3493 MONGOLIAN,
3494 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
3495 LIMBU,
3496 TAI_LE,
3497 NEW_TAI_LUE,
3498 KHMER_SYMBOLS,
3499 BUGINESE,
3500 TAI_THAM,
3501 COMBINING_DIACRITICAL_MARKS_EXTENDED,
3502 BALINESE,
3503 SUNDANESE,
3504 BATAK,
3505 LEPCHA,
3506 OL_CHIKI,
3507 CYRILLIC_EXTENDED_C,
3508 null,
3509 SUNDANESE_SUPPLEMENT,
3510 VEDIC_EXTENSIONS,
3511 PHONETIC_EXTENSIONS,
3512 PHONETIC_EXTENSIONS_SUPPLEMENT,
3513 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
3514 LATIN_EXTENDED_ADDITIONAL,
3515 GREEK_EXTENDED,
3516 GENERAL_PUNCTUATION,
3517 SUPERSCRIPTS_AND_SUBSCRIPTS,
3518 CURRENCY_SYMBOLS,
3519 COMBINING_MARKS_FOR_SYMBOLS,
3520 LETTERLIKE_SYMBOLS,
3521 NUMBER_FORMS,
3522 ARROWS,
3523 MATHEMATICAL_OPERATORS,
3524 MISCELLANEOUS_TECHNICAL,
3525 CONTROL_PICTURES,
3526 OPTICAL_CHARACTER_RECOGNITION,
3527 ENCLOSED_ALPHANUMERICS,
3528 BOX_DRAWING,
3529 BLOCK_ELEMENTS,
3530 GEOMETRIC_SHAPES,
3531 MISCELLANEOUS_SYMBOLS,
3532 DINGBATS,
3533 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
3534 SUPPLEMENTAL_ARROWS_A,
3535 BRAILLE_PATTERNS,
3536 SUPPLEMENTAL_ARROWS_B,
3537 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
3538 SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
3539 MISCELLANEOUS_SYMBOLS_AND_ARROWS,
3540 GLAGOLITIC,
3541 LATIN_EXTENDED_C,
3542 COPTIC,
3543 GEORGIAN_SUPPLEMENT,
3544 TIFINAGH,
3545 ETHIOPIC_EXTENDED,
3546 CYRILLIC_EXTENDED_A,
3547 SUPPLEMENTAL_PUNCTUATION,
3548 CJK_RADICALS_SUPPLEMENT,
3549 KANGXI_RADICALS,
3550 null,
3551 IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
3552 CJK_SYMBOLS_AND_PUNCTUATION,
3553 HIRAGANA,
3554 KATAKANA,
3555 BOPOMOFO,
3556 HANGUL_COMPATIBILITY_JAMO,
3557 KANBUN,
3558 BOPOMOFO_EXTENDED,
3559 CJK_STROKES,
3560 KATAKANA_PHONETIC_EXTENSIONS,
3561 ENCLOSED_CJK_LETTERS_AND_MONTHS,
3562 CJK_COMPATIBILITY,
3563 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
3564 YIJING_HEXAGRAM_SYMBOLS,
3565 CJK_UNIFIED_IDEOGRAPHS,
3566 YI_SYLLABLES,
3567 YI_RADICALS,
3568 LISU,
3569 VAI,
3570 CYRILLIC_EXTENDED_B,
3571 BAMUM,
3572 MODIFIER_TONE_LETTERS,
3573 LATIN_EXTENDED_D,
3574 SYLOTI_NAGRI,
3575 COMMON_INDIC_NUMBER_FORMS,
3576 PHAGS_PA,
3577 SAURASHTRA,
3578 DEVANAGARI_EXTENDED,
3579 KAYAH_LI,
3580 REJANG,
3581 HANGUL_JAMO_EXTENDED_A,
3582 JAVANESE,
3583 MYANMAR_EXTENDED_B,
3584 CHAM,
3585 MYANMAR_EXTENDED_A,
3586 TAI_VIET,
3587 MEETEI_MAYEK_EXTENSIONS,
3588 ETHIOPIC_EXTENDED_A,
3589 LATIN_EXTENDED_E,
3590 CHEROKEE_SUPPLEMENT,
3591 MEETEI_MAYEK,
3592 HANGUL_SYLLABLES,
3593 HANGUL_JAMO_EXTENDED_B,
3594 HIGH_SURROGATES,
3595 HIGH_PRIVATE_USE_SURROGATES,
3596 LOW_SURROGATES,
3597 PRIVATE_USE_AREA,
3598 CJK_COMPATIBILITY_IDEOGRAPHS,
3599 ALPHABETIC_PRESENTATION_FORMS,
3600 ARABIC_PRESENTATION_FORMS_A,
3601 VARIATION_SELECTORS,
3602 VERTICAL_FORMS,
3603 COMBINING_HALF_MARKS,
3604 CJK_COMPATIBILITY_FORMS,
3605 SMALL_FORM_VARIANTS,
3606 ARABIC_PRESENTATION_FORMS_B,
3607 HALFWIDTH_AND_FULLWIDTH_FORMS,
3608 SPECIALS,
3609 LINEAR_B_SYLLABARY,
3610 LINEAR_B_IDEOGRAMS,
3611 AEGEAN_NUMBERS,
3612 ANCIENT_GREEK_NUMBERS,
3613 ANCIENT_SYMBOLS,
3614 PHAISTOS_DISC,
3615 null,
3616 LYCIAN,
3617 CARIAN,
3618 COPTIC_EPACT_NUMBERS,
3619 OLD_ITALIC,
3620 GOTHIC,
3621 OLD_PERMIC,
3622 UGARITIC,
3623 OLD_PERSIAN,
3624 null,
3625 DESERET,
3626 SHAVIAN,
3627 OSMANYA,
3628 OSAGE,
3629 ELBASAN,
3630 CAUCASIAN_ALBANIAN,
3631 null,
3632 LINEAR_A,
3633 null,
3634 CYPRIOT_SYLLABARY,
3635 IMPERIAL_ARAMAIC,
3636 PALMYRENE,
3637 NABATAEAN,
3638 null,
3639 HATRAN,
3640 PHOENICIAN,
3641 LYDIAN,
3642 null,
3643 MEROITIC_HIEROGLYPHS,
3644 MEROITIC_CURSIVE,
3645 KHAROSHTHI,
3646 OLD_SOUTH_ARABIAN,
3647 OLD_NORTH_ARABIAN,
3648 null,
3649 MANICHAEAN,
3650 AVESTAN,
3651 INSCRIPTIONAL_PARTHIAN,
3652 INSCRIPTIONAL_PAHLAVI,
3653 PSALTER_PAHLAVI,
3654 null,
3655 OLD_TURKIC,
3656 null,
3657 OLD_HUNGARIAN,
3658 null,
3659 RUMI_NUMERAL_SYMBOLS,
3660 null,
3661 BRAHMI,
3662 KAITHI,
3663 SORA_SOMPENG,
3664 CHAKMA,
3665 MAHAJANI,
3666 SHARADA,
3667 SINHALA_ARCHAIC_NUMBERS,
3668 KHOJKI,
3669 null,
3670 MULTANI,
3671 KHUDAWADI,
3672 GRANTHA,
3673 null,
3674 NEWA,
3675 TIRHUTA,
3676 null,
3677 SIDDHAM,
3678 MODI,
3679 MONGOLIAN_SUPPLEMENT,
3680 TAKRI,
3681 null,
3682 AHOM,
3683 null,
3684 WARANG_CITI,
3685 null,
3686 ZANABAZAR_SQUARE,
3687 SOYOMBO,
3688 null,
3689 PAU_CIN_HAU,
3690 null,
3691 BHAIKSUKI,
3692 MARCHEN,
3693 null,
3694 MASARAM_GONDI,
3695 null,
3696 CUNEIFORM,
3697 CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3698 EARLY_DYNASTIC_CUNEIFORM,
3699 null,
3700 EGYPTIAN_HIEROGLYPHS,
3701 null,
3702 ANATOLIAN_HIEROGLYPHS,
3703 null,
3704 BAMUM_SUPPLEMENT,
3705 MRO,
3706 null,
3707 BASSA_VAH,
3708 PAHAWH_HMONG,
3709 null,
3710 MIAO,
3711 null,
3712 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
3713 TANGUT,
3714 TANGUT_COMPONENTS,
3715 null,
3716 KANA_SUPPLEMENT,
3717 KANA_EXTENDED_A,
3718 null,
3719 NUSHU,
3720 null,
3721 DUPLOYAN,
3722 SHORTHAND_FORMAT_CONTROLS,
3723 null,
3724 BYZANTINE_MUSICAL_SYMBOLS,
3725 MUSICAL_SYMBOLS,
3726 ANCIENT_GREEK_MUSICAL_NOTATION,
3727 null,
3728 TAI_XUAN_JING_SYMBOLS,
3729 COUNTING_ROD_NUMERALS,
3730 null,
3731 MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3732 SUTTON_SIGNWRITING,
3733 null,
3734 GLAGOLITIC_SUPPLEMENT,
3735 null,
3736 MENDE_KIKAKUI,
3737 null,
3738 ADLAM,
3739 null,
3740 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3741 null,
3742 MAHJONG_TILES,
3743 DOMINO_TILES,
3744 PLAYING_CARDS,
3745 ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3746 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3747 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3748 EMOTICONS,
3749 ORNAMENTAL_DINGBATS,
3750 TRANSPORT_AND_MAP_SYMBOLS,
3751 ALCHEMICAL_SYMBOLS,
3752 GEOMETRIC_SHAPES_EXTENDED,
3753 SUPPLEMENTAL_ARROWS_C,
3754 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
3755 null,
3756 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3757 null,
3758 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3759 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3760 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
3761 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
3762 null,
3763 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3764 null,
3765 TAGS,
3766 null,
3767 VARIATION_SELECTORS_SUPPLEMENT,
3768 null,
3769 SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3770 SUPPLEMENTARY_PRIVATE_USE_AREA_B
3771 };
3772
3773
3774 /**
3775 * Returns the object representing the Unicode block containing the
3776 * given character, or {@code null} if the character is not a
3777 * member of a defined block.
3778 *
3779 * <p><b>Note:</b> This method cannot handle
3780 * <a href="Character.html#supplementary"> supplementary
3781 * characters</a>. To support all Unicode characters, including
3782 * supplementary characters, use the {@link #of(int)} method.
3783 *
3784 * @param c The character in question
3785 * @return The {@code UnicodeBlock} instance representing the
3786 * Unicode block of which this character is a member, or
3787 * {@code null} if the character is not a member of any
3788 * Unicode block
3789 */
3790 public static UnicodeBlock of(char c) {
3791 return of((int)c);
3792 }
3793
3794 /**
3795 * Returns the object representing the Unicode block
3796 * containing the given character (Unicode code point), or
3797 * {@code null} if the character is not a member of a
3798 * defined block.
3799 *
3800 * @param codePoint the character (Unicode code point) in question.
3801 * @return The {@code UnicodeBlock} instance representing the
3802 * Unicode block of which this character is a member, or
3803 * {@code null} if the character is not a member of any
3804 * Unicode block
3805 * @throws IllegalArgumentException if the specified
3806 * {@code codePoint} is an invalid Unicode code point.
3807 * @see Character#isValidCodePoint(int)
3808 * @since 1.5
3809 */
3810 public static UnicodeBlock of(int codePoint) {
3811 if (!isValidCodePoint(codePoint)) {
3812 throw new IllegalArgumentException(
3813 String.format("Not a valid Unicode code point: 0x%X", codePoint));
3814 }
3815
3816 int top, bottom, current;
3817 bottom = 0;
3818 top = blockStarts.length;
3819 current = top/2;
3820
3821 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3822 while (top - bottom > 1) {
3823 if (codePoint >= blockStarts[current]) {
3824 bottom = current;
3825 } else {
3826 top = current;
3827 }
3828 current = (top + bottom) / 2;
3829 }
3830 return blocks[current];
3831 }
3832
3833 /**
3834 * Returns the UnicodeBlock with the given name. Block
3835 * names are determined by The Unicode Standard. The file
3836 * {@code Blocks-<version>.txt} defines blocks for a particular
3837 * version of the standard. The {@link Character} class specifies
3838 * the version of the standard that it supports.
3839 * <p>
3840 * This method accepts block names in the following forms:
3841 * <ol>
3842 * <li> Canonical block names as defined by the Unicode Standard.
3843 * For example, the standard defines a "Basic Latin" block. Therefore, this
3844 * method accepts "Basic Latin" as a valid block name. The documentation of
3845 * each UnicodeBlock provides the canonical name.
3846 * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3847 * is a valid block name for the "Basic Latin" block.
3848 * <li>The text representation of each constant UnicodeBlock identifier.
3849 * For example, this method will return the {@link #BASIC_LATIN} block if
3850 * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3851 * hyphens in the canonical name with underscores.
3852 * </ol>
3853 * Finally, character case is ignored for all of the valid block name forms.
3854 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3855 * The en_US locale's case mapping rules are used to provide case-insensitive
3856 * string comparisons for block name validation.
3857 * <p>
3858 * If the Unicode Standard changes block names, both the previous and
3859 * current names will be accepted.
3860 *
3861 * @param blockName A {@code UnicodeBlock} name.
3862 * @return The {@code UnicodeBlock} instance identified
3863 * by {@code blockName}
3864 * @throws IllegalArgumentException if {@code blockName} is an
3865 * invalid name
3866 * @throws NullPointerException if {@code blockName} is null
3867 * @since 1.5
3868 */
3869 public static final UnicodeBlock forName(String blockName) {
3870 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3871 if (block == null) {
3872 throw new IllegalArgumentException("Not a valid block name: "
3873 + blockName);
3874 }
3875 return block;
3876 }
3877 }
3878
3879
3880 /**
3881 * A family of character subsets representing the character scripts
3882 * defined in the <a href="http://www.unicode.org/reports/tr24/">
3883 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3884 * character is assigned to a single Unicode script, either a specific
3885 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3886 * one of the following three special values,
3887 * {@link Character.UnicodeScript#INHERITED Inherited},
3888 * {@link Character.UnicodeScript#COMMON Common} or
3889 * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3890 *
3891 * @since 1.7
3892 */
3893 public static enum UnicodeScript {
3894 /**
3895 * Unicode script "Common".
3896 */
3897 COMMON,
3898
3899 /**
3900 * Unicode script "Latin".
3901 */
3902 LATIN,
3903
3904 /**
3905 * Unicode script "Greek".
3906 */
3907 GREEK,
3908
3909 /**
3910 * Unicode script "Cyrillic".
3911 */
3912 CYRILLIC,
3913
3914 /**
3915 * Unicode script "Armenian".
3916 */
3917 ARMENIAN,
3918
3919 /**
3920 * Unicode script "Hebrew".
3921 */
3922 HEBREW,
3923
3924 /**
3925 * Unicode script "Arabic".
3926 */
3927 ARABIC,
3928
3929 /**
3930 * Unicode script "Syriac".
3931 */
3932 SYRIAC,
3933
3934 /**
3935 * Unicode script "Thaana".
3936 */
3937 THAANA,
3938
3939 /**
3940 * Unicode script "Devanagari".
3941 */
3942 DEVANAGARI,
3943
3944 /**
3945 * Unicode script "Bengali".
3946 */
3947 BENGALI,
3948
3949 /**
3950 * Unicode script "Gurmukhi".
3951 */
3952 GURMUKHI,
3953
3954 /**
3955 * Unicode script "Gujarati".
3956 */
3957 GUJARATI,
3958
3959 /**
3960 * Unicode script "Oriya".
3961 */
3962 ORIYA,
3963
3964 /**
3965 * Unicode script "Tamil".
3966 */
3967 TAMIL,
3968
3969 /**
3970 * Unicode script "Telugu".
3971 */
3972 TELUGU,
3973
3974 /**
3975 * Unicode script "Kannada".
3976 */
3977 KANNADA,
3978
3979 /**
3980 * Unicode script "Malayalam".
3981 */
3982 MALAYALAM,
3983
3984 /**
3985 * Unicode script "Sinhala".
3986 */
3987 SINHALA,
3988
3989 /**
3990 * Unicode script "Thai".
3991 */
3992 THAI,
3993
3994 /**
3995 * Unicode script "Lao".
3996 */
3997 LAO,
3998
3999 /**
4000 * Unicode script "Tibetan".
4001 */
4002 TIBETAN,
4003
4004 /**
4005 * Unicode script "Myanmar".
4006 */
4007 MYANMAR,
4008
4009 /**
4010 * Unicode script "Georgian".
4011 */
4012 GEORGIAN,
4013
4014 /**
4015 * Unicode script "Hangul".
4016 */
4017 HANGUL,
4018
4019 /**
4020 * Unicode script "Ethiopic".
4021 */
4022 ETHIOPIC,
4023
4024 /**
4025 * Unicode script "Cherokee".
4026 */
4027 CHEROKEE,
4028
4029 /**
4030 * Unicode script "Canadian_Aboriginal".
4031 */
4032 CANADIAN_ABORIGINAL,
4033
4034 /**
4035 * Unicode script "Ogham".
4036 */
4037 OGHAM,
4038
4039 /**
4040 * Unicode script "Runic".
4041 */
4042 RUNIC,
4043
4044 /**
4045 * Unicode script "Khmer".
4046 */
4047 KHMER,
4048
4049 /**
4050 * Unicode script "Mongolian".
4051 */
4052 MONGOLIAN,
4053
4054 /**
4055 * Unicode script "Hiragana".
4056 */
4057 HIRAGANA,
4058
4059 /**
4060 * Unicode script "Katakana".
4061 */
4062 KATAKANA,
4063
4064 /**
4065 * Unicode script "Bopomofo".
4066 */
4067 BOPOMOFO,
4068
4069 /**
4070 * Unicode script "Han".
4071 */
4072 HAN,
4073
4074 /**
4075 * Unicode script "Yi".
4076 */
4077 YI,
4078
4079 /**
4080 * Unicode script "Old_Italic".
4081 */
4082 OLD_ITALIC,
4083
4084 /**
4085 * Unicode script "Gothic".
4086 */
4087 GOTHIC,
4088
4089 /**
4090 * Unicode script "Deseret".
4091 */
4092 DESERET,
4093
4094 /**
4095 * Unicode script "Inherited".
4096 */
4097 INHERITED,
4098
4099 /**
4100 * Unicode script "Tagalog".
4101 */
4102 TAGALOG,
4103
4104 /**
4105 * Unicode script "Hanunoo".
4106 */
4107 HANUNOO,
4108
4109 /**
4110 * Unicode script "Buhid".
4111 */
4112 BUHID,
4113
4114 /**
4115 * Unicode script "Tagbanwa".
4116 */
4117 TAGBANWA,
4118
4119 /**
4120 * Unicode script "Limbu".
4121 */
4122 LIMBU,
4123
4124 /**
4125 * Unicode script "Tai_Le".
4126 */
4127 TAI_LE,
4128
4129 /**
4130 * Unicode script "Linear_B".
4131 */
4132 LINEAR_B,
4133
4134 /**
4135 * Unicode script "Ugaritic".
4136 */
4137 UGARITIC,
4138
4139 /**
4140 * Unicode script "Shavian".
4141 */
4142 SHAVIAN,
4143
4144 /**
4145 * Unicode script "Osmanya".
4146 */
4147 OSMANYA,
4148
4149 /**
4150 * Unicode script "Cypriot".
4151 */
4152 CYPRIOT,
4153
4154 /**
4155 * Unicode script "Braille".
4156 */
4157 BRAILLE,
4158
4159 /**
4160 * Unicode script "Buginese".
4161 */
4162 BUGINESE,
4163
4164 /**
4165 * Unicode script "Coptic".
4166 */
4167 COPTIC,
4168
4169 /**
4170 * Unicode script "New_Tai_Lue".
4171 */
4172 NEW_TAI_LUE,
4173
4174 /**
4175 * Unicode script "Glagolitic".
4176 */
4177 GLAGOLITIC,
4178
4179 /**
4180 * Unicode script "Tifinagh".
4181 */
4182 TIFINAGH,
4183
4184 /**
4185 * Unicode script "Syloti_Nagri".
4186 */
4187 SYLOTI_NAGRI,
4188
4189 /**
4190 * Unicode script "Old_Persian".
4191 */
4192 OLD_PERSIAN,
4193
4194 /**
4195 * Unicode script "Kharoshthi".
4196 */
4197 KHAROSHTHI,
4198
4199 /**
4200 * Unicode script "Balinese".
4201 */
4202 BALINESE,
4203
4204 /**
4205 * Unicode script "Cuneiform".
4206 */
4207 CUNEIFORM,
4208
4209 /**
4210 * Unicode script "Phoenician".
4211 */
4212 PHOENICIAN,
4213
4214 /**
4215 * Unicode script "Phags_Pa".
4216 */
4217 PHAGS_PA,
4218
4219 /**
4220 * Unicode script "Nko".
4221 */
4222 NKO,
4223
4224 /**
4225 * Unicode script "Sundanese".
4226 */
4227 SUNDANESE,
4228
4229 /**
4230 * Unicode script "Batak".
4231 */
4232 BATAK,
4233
4234 /**
4235 * Unicode script "Lepcha".
4236 */
4237 LEPCHA,
4238
4239 /**
4240 * Unicode script "Ol_Chiki".
4241 */
4242 OL_CHIKI,
4243
4244 /**
4245 * Unicode script "Vai".
4246 */
4247 VAI,
4248
4249 /**
4250 * Unicode script "Saurashtra".
4251 */
4252 SAURASHTRA,
4253
4254 /**
4255 * Unicode script "Kayah_Li".
4256 */
4257 KAYAH_LI,
4258
4259 /**
4260 * Unicode script "Rejang".
4261 */
4262 REJANG,
4263
4264 /**
4265 * Unicode script "Lycian".
4266 */
4267 LYCIAN,
4268
4269 /**
4270 * Unicode script "Carian".
4271 */
4272 CARIAN,
4273
4274 /**
4275 * Unicode script "Lydian".
4276 */
4277 LYDIAN,
4278
4279 /**
4280 * Unicode script "Cham".
4281 */
4282 CHAM,
4283
4284 /**
4285 * Unicode script "Tai_Tham".
4286 */
4287 TAI_THAM,
4288
4289 /**
4290 * Unicode script "Tai_Viet".
4291 */
4292 TAI_VIET,
4293
4294 /**
4295 * Unicode script "Avestan".
4296 */
4297 AVESTAN,
4298
4299 /**
4300 * Unicode script "Egyptian_Hieroglyphs".
4301 */
4302 EGYPTIAN_HIEROGLYPHS,
4303
4304 /**
4305 * Unicode script "Samaritan".
4306 */
4307 SAMARITAN,
4308
4309 /**
4310 * Unicode script "Mandaic".
4311 */
4312 MANDAIC,
4313
4314 /**
4315 * Unicode script "Lisu".
4316 */
4317 LISU,
4318
4319 /**
4320 * Unicode script "Bamum".
4321 */
4322 BAMUM,
4323
4324 /**
4325 * Unicode script "Javanese".
4326 */
4327 JAVANESE,
4328
4329 /**
4330 * Unicode script "Meetei_Mayek".
4331 */
4332 MEETEI_MAYEK,
4333
4334 /**
4335 * Unicode script "Imperial_Aramaic".
4336 */
4337 IMPERIAL_ARAMAIC,
4338
4339 /**
4340 * Unicode script "Old_South_Arabian".
4341 */
4342 OLD_SOUTH_ARABIAN,
4343
4344 /**
4345 * Unicode script "Inscriptional_Parthian".
4346 */
4347 INSCRIPTIONAL_PARTHIAN,
4348
4349 /**
4350 * Unicode script "Inscriptional_Pahlavi".
4351 */
4352 INSCRIPTIONAL_PAHLAVI,
4353
4354 /**
4355 * Unicode script "Old_Turkic".
4356 */
4357 OLD_TURKIC,
4358
4359 /**
4360 * Unicode script "Brahmi".
4361 */
4362 BRAHMI,
4363
4364 /**
4365 * Unicode script "Kaithi".
4366 */
4367 KAITHI,
4368
4369 /**
4370 * Unicode script "Meroitic Hieroglyphs".
4371 * @since 1.8
4372 */
4373 MEROITIC_HIEROGLYPHS,
4374
4375 /**
4376 * Unicode script "Meroitic Cursive".
4377 * @since 1.8
4378 */
4379 MEROITIC_CURSIVE,
4380
4381 /**
4382 * Unicode script "Sora Sompeng".
4383 * @since 1.8
4384 */
4385 SORA_SOMPENG,
4386
4387 /**
4388 * Unicode script "Chakma".
4389 * @since 1.8
4390 */
4391 CHAKMA,
4392
4393 /**
4394 * Unicode script "Sharada".
4395 * @since 1.8
4396 */
4397 SHARADA,
4398
4399 /**
4400 * Unicode script "Takri".
4401 * @since 1.8
4402 */
4403 TAKRI,
4404
4405 /**
4406 * Unicode script "Miao".
4407 * @since 1.8
4408 */
4409 MIAO,
4410
4411 /**
4412 * Unicode script "Caucasian Albanian".
4413 * @since 9
4414 */
4415 CAUCASIAN_ALBANIAN,
4416
4417 /**
4418 * Unicode script "Bassa Vah".
4419 * @since 9
4420 */
4421 BASSA_VAH,
4422
4423 /**
4424 * Unicode script "Duployan".
4425 * @since 9
4426 */
4427 DUPLOYAN,
4428
4429 /**
4430 * Unicode script "Elbasan".
4431 * @since 9
4432 */
4433 ELBASAN,
4434
4435 /**
4436 * Unicode script "Grantha".
4437 * @since 9
4438 */
4439 GRANTHA,
4440
4441 /**
4442 * Unicode script "Pahawh Hmong".
4443 * @since 9
4444 */
4445 PAHAWH_HMONG,
4446
4447 /**
4448 * Unicode script "Khojki".
4449 * @since 9
4450 */
4451 KHOJKI,
4452
4453 /**
4454 * Unicode script "Linear A".
4455 * @since 9
4456 */
4457 LINEAR_A,
4458
4459 /**
4460 * Unicode script "Mahajani".
4461 * @since 9
4462 */
4463 MAHAJANI,
4464
4465 /**
4466 * Unicode script "Manichaean".
4467 * @since 9
4468 */
4469 MANICHAEAN,
4470
4471 /**
4472 * Unicode script "Mende Kikakui".
4473 * @since 9
4474 */
4475 MENDE_KIKAKUI,
4476
4477 /**
4478 * Unicode script "Modi".
4479 * @since 9
4480 */
4481 MODI,
4482
4483 /**
4484 * Unicode script "Mro".
4485 * @since 9
4486 */
4487 MRO,
4488
4489 /**
4490 * Unicode script "Old North Arabian".
4491 * @since 9
4492 */
4493 OLD_NORTH_ARABIAN,
4494
4495 /**
4496 * Unicode script "Nabataean".
4497 * @since 9
4498 */
4499 NABATAEAN,
4500
4501 /**
4502 * Unicode script "Palmyrene".
4503 * @since 9
4504 */
4505 PALMYRENE,
4506
4507 /**
4508 * Unicode script "Pau Cin Hau".
4509 * @since 9
4510 */
4511 PAU_CIN_HAU,
4512
4513 /**
4514 * Unicode script "Old Permic".
4515 * @since 9
4516 */
4517 OLD_PERMIC,
4518
4519 /**
4520 * Unicode script "Psalter Pahlavi".
4521 * @since 9
4522 */
4523 PSALTER_PAHLAVI,
4524
4525 /**
4526 * Unicode script "Siddham".
4527 * @since 9
4528 */
4529 SIDDHAM,
4530
4531 /**
4532 * Unicode script "Khudawadi".
4533 * @since 9
4534 */
4535 KHUDAWADI,
4536
4537 /**
4538 * Unicode script "Tirhuta".
4539 * @since 9
4540 */
4541 TIRHUTA,
4542
4543 /**
4544 * Unicode script "Warang Citi".
4545 * @since 9
4546 */
4547 WARANG_CITI,
4548
4549 /**
4550 * Unicode script "Ahom".
4551 * @since 9
4552 */
4553 AHOM,
4554
4555 /**
4556 * Unicode script "Anatolian Hieroglyphs".
4557 * @since 9
4558 */
4559 ANATOLIAN_HIEROGLYPHS,
4560
4561 /**
4562 * Unicode script "Hatran".
4563 * @since 9
4564 */
4565 HATRAN,
4566
4567 /**
4568 * Unicode script "Multani".
4569 * @since 9
4570 */
4571 MULTANI,
4572
4573 /**
4574 * Unicode script "Old Hungarian".
4575 * @since 9
4576 */
4577 OLD_HUNGARIAN,
4578
4579 /**
4580 * Unicode script "SignWriting".
4581 * @since 9
4582 */
4583 SIGNWRITING,
4584
4585 /**
4586 * Unicode script "Adlam".
4587 * @since 11
4588 */
4589 ADLAM,
4590
4591 /**
4592 * Unicode script "Bhaiksuki".
4593 * @since 11
4594 */
4595 BHAIKSUKI,
4596
4597 /**
4598 * Unicode script "Marchen".
4599 * @since 11
4600 */
4601 MARCHEN,
4602
4603 /**
4604 * Unicode script "Newa".
4605 * @since 11
4606 */
4607 NEWA,
4608
4609 /**
4610 * Unicode script "Osage".
4611 * @since 11
4612 */
4613 OSAGE,
4614
4615 /**
4616 * Unicode script "Tangut".
4617 * @since 11
4618 */
4619 TANGUT,
4620
4621 /**
4622 * Unicode script "Masaram Gondi".
4623 * @since 11
4624 */
4625 MASARAM_GONDI,
4626
4627 /**
4628 * Unicode script "Nushu".
4629 * @since 11
4630 */
4631 NUSHU,
4632
4633 /**
4634 * Unicode script "Soyombo".
4635 * @since 11
4636 */
4637 SOYOMBO,
4638
4639 /**
4640 * Unicode script "Zanabazar Square".
4641 * @since 11
4642 */
4643 ZANABAZAR_SQUARE,
4644
4645 /**
4646 * Unicode script "Unknown".
4647 */
4648 UNKNOWN;
4649
4650 private static final int[] scriptStarts = {
4651 0x0000, // 0000..0040; COMMON
4652 0x0041, // 0041..005A; LATIN
4653 0x005B, // 005B..0060; COMMON
4654 0x0061, // 0061..007A; LATIN
4655 0x007B, // 007B..00A9; COMMON
4656 0x00AA, // 00AA ; LATIN
4657 0x00AB, // 00AB..00B9; COMMON
4658 0x00BA, // 00BA ; LATIN
4659 0x00BB, // 00BB..00BF; COMMON
4660 0x00C0, // 00C0..00D6; LATIN
4661 0x00D7, // 00D7 ; COMMON
4662 0x00D8, // 00D8..00F6; LATIN
4663 0x00F7, // 00F7 ; COMMON
4664 0x00F8, // 00F8..02B8; LATIN
4665 0x02B9, // 02B9..02DF; COMMON
4666 0x02E0, // 02E0..02E4; LATIN
4667 0x02E5, // 02E5..02E9; COMMON
4668 0x02EA, // 02EA..02EB; BOPOMOFO
4669 0x02EC, // 02EC..02FF; COMMON
4670 0x0300, // 0300..036F; INHERITED
4671 0x0370, // 0370..0373; GREEK
4672 0x0374, // 0374 ; COMMON
4673 0x0375, // 0375..0377; GREEK
4674 0x0378, // 0378..0379; UNKNOWN
4675 0x037A, // 037A..037D; GREEK
4676 0x037E, // 037E ; COMMON
4677 0x037F, // 037F ; GREEK
4678 0x0380, // 0380..0383; UNKNOWN
4679 0x0384, // 0384 ; GREEK
4680 0x0385, // 0385 ; COMMON
4681 0x0386, // 0386 ; GREEK
4682 0x0387, // 0387 ; COMMON
4683 0x0388, // 0388..038A; GREEK
4684 0x038B, // 038B ; UNKNOWN
4685 0x038C, // 038C ; GREEK
4686 0x038D, // 038D ; UNKNOWN
4687 0x038E, // 038E..03A1; GREEK
4688 0x03A2, // 03A2 ; UNKNOWN
4689 0x03A3, // 03A3..03E1; GREEK
4690 0x03E2, // 03E2..03EF; COPTIC
4691 0x03F0, // 03F0..03FF; GREEK
4692 0x0400, // 0400..0484; CYRILLIC
4693 0x0485, // 0485..0486; INHERITED
4694 0x0487, // 0487..052F; CYRILLIC
4695 0x0530, // 0530 ; UNKNOWN
4696 0x0531, // 0531..0556; ARMENIAN
4697 0x0557, // 0557..0558; UNKNOWN
4698 0x0559, // 0559..055F; ARMENIAN
4699 0x0560, // 0560 ; UNKNOWN
4700 0x0561, // 0561..0587; ARMENIAN
4701 0x0588, // 0588 ; UNKNOWN
4702 0x0589, // 0589 ; COMMON
4703 0x058A, // 058A ; ARMENIAN
4704 0x058B, // 058B..058C; UNKNOWN
4705 0x058D, // 058D..058F; ARMENIAN
4706 0x0590, // 0590 ; UNKNOWN
4707 0x0591, // 0591..05C7; HEBREW
4708 0x05C8, // 05C8..05CF; UNKNOWN
4709 0x05D0, // 05D0..05EA; HEBREW
4710 0x05EB, // 05EB..05EF; UNKNOWN
4711 0x05F0, // 05F0..05F4; HEBREW
4712 0x05F5, // 05F5..05FF; UNKNOWN
4713 0x0600, // 0600..0604; ARABIC
4714 0x0605, // 0605 ; COMMON
4715 0x0606, // 0606..060B; ARABIC
4716 0x060C, // 060C ; COMMON
4717 0x060D, // 060D..061A; ARABIC
4718 0x061B, // 061B ; COMMON
4719 0x061C, // 061C ; ARABIC
4720 0x061D, // 061D ; UNKNOWN
4721 0x061E, // 061E ; ARABIC
4722 0x061F, // 061F ; COMMON
4723 0x0620, // 0620..063F; ARABIC
4724 0x0640, // 0640 ; COMMON
4725 0x0641, // 0641..064A; ARABIC
4726 0x064B, // 064B..0655; INHERITED
4727 0x0656, // 0656..066F; ARABIC
4728 0x0670, // 0670 ; INHERITED
4729 0x0671, // 0671..06DC; ARABIC
4730 0x06DD, // 06DD ; COMMON
4731 0x06DE, // 06DE..06FF; ARABIC
4732 0x0700, // 0700..070D; SYRIAC
4733 0x070E, // 070E ; UNKNOWN
4734 0x070F, // 070F..074A; SYRIAC
4735 0x074B, // 074B..074C; UNKNOWN
4736 0x074D, // 074D..074F; SYRIAC
4737 0x0750, // 0750..077F; ARABIC
4738 0x0780, // 0780..07B1; THAANA
4739 0x07B2, // 07B2..07BF; UNKNOWN
4740 0x07C0, // 07C0..07FA; NKO
4741 0x07FB, // 07FB..07FF; UNKNOWN
4742 0x0800, // 0800..082D; SAMARITAN
4743 0x082E, // 082E..082F; UNKNOWN
4744 0x0830, // 0830..083E; SAMARITAN
4745 0x083F, // 083F ; UNKNOWN
4746 0x0840, // 0840..085B; MANDAIC
4747 0x085C, // 085C..085D; UNKNOWN
4748 0x085E, // 085E ; MANDAIC
4749 0x085F, // 085F ; UNKNOWN
4750 0x0860, // 0860..086A; SYRIAC
4751 0x086B, // 086B..089F; UNKNOWN
4752 0x08A0, // 08A0..08B4; ARABIC
4753 0x08B5, // 08B5 ; UNKNOWN
4754 0x08B6, // 08B6..08BD; ARABIC
4755 0x08BE, // 08BE..08D3; UNKNOWN
4756 0x08D4, // 08D4..08E1; ARABIC
4757 0x08E2, // 08E2 ; COMMON
4758 0x08E3, // 08E3..08FF; ARABIC
4759 0x0900, // 0900..0950; DEVANAGARI
4760 0x0951, // 0951..0952; INHERITED
4761 0x0953, // 0953..0963; DEVANAGARI
4762 0x0964, // 0964..0965; COMMON
4763 0x0966, // 0966..097F; DEVANAGARI
4764 0x0980, // 0980..0983; BENGALI
4765 0x0984, // 0984 ; UNKNOWN
4766 0x0985, // 0985..098C; BENGALI
4767 0x098D, // 098D..098E; UNKNOWN
4768 0x098F, // 098F..0990; BENGALI
4769 0x0991, // 0991..0992; UNKNOWN
4770 0x0993, // 0993..09A8; BENGALI
4771 0x09A9, // 09A9 ; UNKNOWN
4772 0x09AA, // 09AA..09B0; BENGALI
4773 0x09B1, // 09B1 ; UNKNOWN
4774 0x09B2, // 09B2 ; BENGALI
4775 0x09B3, // 09B3..09B5; UNKNOWN
4776 0x09B6, // 09B6..09B9; BENGALI
4777 0x09BA, // 09BA..09BB; UNKNOWN
4778 0x09BC, // 09BC..09C4; BENGALI
4779 0x09C5, // 09C5..09C6; UNKNOWN
4780 0x09C7, // 09C7..09C8; BENGALI
4781 0x09C9, // 09C9..09CA; UNKNOWN
4782 0x09CB, // 09CB..09CE; BENGALI
4783 0x09CF, // 09CF..09D6; UNKNOWN
4784 0x09D7, // 09D7 ; BENGALI
4785 0x09D8, // 09D8..09DB; UNKNOWN
4786 0x09DC, // 09DC..09DD; BENGALI
4787 0x09DE, // 09DE ; UNKNOWN
4788 0x09DF, // 09DF..09E3; BENGALI
4789 0x09E4, // 09E4..09E5; UNKNOWN
4790 0x09E6, // 09E6..09FD; BENGALI
4791 0x09FE, // 09FE..0A00; UNKNOWN
4792 0x0A01, // 0A01..0A03; GURMUKHI
4793 0x0A04, // 0A04 ; UNKNOWN
4794 0x0A05, // 0A05..0A0A; GURMUKHI
4795 0x0A0B, // 0A0B..0A0E; UNKNOWN
4796 0x0A0F, // 0A0F..0A10; GURMUKHI
4797 0x0A11, // 0A11..0A12; UNKNOWN
4798 0x0A13, // 0A13..0A28; GURMUKHI
4799 0x0A29, // 0A29 ; UNKNOWN
4800 0x0A2A, // 0A2A..0A30; GURMUKHI
4801 0x0A31, // 0A31 ; UNKNOWN
4802 0x0A32, // 0A32..0A33; GURMUKHI
4803 0x0A34, // 0A34 ; UNKNOWN
4804 0x0A35, // 0A35..0A36; GURMUKHI
4805 0x0A37, // 0A37 ; UNKNOWN
4806 0x0A38, // 0A38..0A39; GURMUKHI
4807 0x0A3A, // 0A3A..0A3B; UNKNOWN
4808 0x0A3C, // 0A3C ; GURMUKHI
4809 0x0A3D, // 0A3D ; UNKNOWN
4810 0x0A3E, // 0A3E..0A42; GURMUKHI
4811 0x0A43, // 0A43..0A46; UNKNOWN
4812 0x0A47, // 0A47..0A48; GURMUKHI
4813 0x0A49, // 0A49..0A4A; UNKNOWN
4814 0x0A4B, // 0A4B..0A4D; GURMUKHI
4815 0x0A4E, // 0A4E..0A50; UNKNOWN
4816 0x0A51, // 0A51 ; GURMUKHI
4817 0x0A52, // 0A52..0A58; UNKNOWN
4818 0x0A59, // 0A59..0A5C; GURMUKHI
4819 0x0A5D, // 0A5D ; UNKNOWN
4820 0x0A5E, // 0A5E ; GURMUKHI
4821 0x0A5F, // 0A5F..0A65; UNKNOWN
4822 0x0A66, // 0A66..0A75; GURMUKHI
4823 0x0A76, // 0A76..0A80; UNKNOWN
4824 0x0A81, // 0A81..0A83; GUJARATI
4825 0x0A84, // 0A84 ; UNKNOWN
4826 0x0A85, // 0A85..0A8D; GUJARATI
4827 0x0A8E, // 0A8E ; UNKNOWN
4828 0x0A8F, // 0A8F..0A91; GUJARATI
4829 0x0A92, // 0A92 ; UNKNOWN
4830 0x0A93, // 0A93..0AA8; GUJARATI
4831 0x0AA9, // 0AA9 ; UNKNOWN
4832 0x0AAA, // 0AAA..0AB0; GUJARATI
4833 0x0AB1, // 0AB1 ; UNKNOWN
4834 0x0AB2, // 0AB2..0AB3; GUJARATI
4835 0x0AB4, // 0AB4 ; UNKNOWN
4836 0x0AB5, // 0AB5..0AB9; GUJARATI
4837 0x0ABA, // 0ABA..0ABB; UNKNOWN
4838 0x0ABC, // 0ABC..0AC5; GUJARATI
4839 0x0AC6, // 0AC6 ; UNKNOWN
4840 0x0AC7, // 0AC7..0AC9; GUJARATI
4841 0x0ACA, // 0ACA ; UNKNOWN
4842 0x0ACB, // 0ACB..0ACD; GUJARATI
4843 0x0ACE, // 0ACE..0ACF; UNKNOWN
4844 0x0AD0, // 0AD0 ; GUJARATI
4845 0x0AD1, // 0AD1..0ADF; UNKNOWN
4846 0x0AE0, // 0AE0..0AE3; GUJARATI
4847 0x0AE4, // 0AE4..0AE5; UNKNOWN
4848 0x0AE6, // 0AE6..0AF1; GUJARATI
4849 0x0AF2, // 0AF2..0AF8; UNKNOWN
4850 0x0AF9, // 0AF9..0AFF; GUJARATI
4851 0x0B00, // 0B00 ; UNKNOWN
4852 0x0B01, // 0B01..0B03; ORIYA
4853 0x0B04, // 0B04 ; UNKNOWN
4854 0x0B05, // 0B05..0B0C; ORIYA
4855 0x0B0D, // 0B0D..0B0E; UNKNOWN
4856 0x0B0F, // 0B0F..0B10; ORIYA
4857 0x0B11, // 0B11..0B12; UNKNOWN
4858 0x0B13, // 0B13..0B28; ORIYA
4859 0x0B29, // 0B29 ; UNKNOWN
4860 0x0B2A, // 0B2A..0B30; ORIYA
4861 0x0B31, // 0B31 ; UNKNOWN
4862 0x0B32, // 0B32..0B33; ORIYA
4863 0x0B34, // 0B34 ; UNKNOWN
4864 0x0B35, // 0B35..0B39; ORIYA
4865 0x0B3A, // 0B3A..0B3B; UNKNOWN
4866 0x0B3C, // 0B3C..0B44; ORIYA
4867 0x0B45, // 0B45..0B46; UNKNOWN
4868 0x0B47, // 0B47..0B48; ORIYA
4869 0x0B49, // 0B49..0B4A; UNKNOWN
4870 0x0B4B, // 0B4B..0B4D; ORIYA
4871 0x0B4E, // 0B4E..0B55; UNKNOWN
4872 0x0B56, // 0B56..0B57; ORIYA
4873 0x0B58, // 0B58..0B5B; UNKNOWN
4874 0x0B5C, // 0B5C..0B5D; ORIYA
4875 0x0B5E, // 0B5E ; UNKNOWN
4876 0x0B5F, // 0B5F..0B63; ORIYA
4877 0x0B64, // 0B64..0B65; UNKNOWN
4878 0x0B66, // 0B66..0B77; ORIYA
4879 0x0B78, // 0B78..0B81; UNKNOWN
4880 0x0B82, // 0B82..0B83; TAMIL
4881 0x0B84, // 0B84 ; UNKNOWN
4882 0x0B85, // 0B85..0B8A; TAMIL
4883 0x0B8B, // 0B8B..0B8D; UNKNOWN
4884 0x0B8E, // 0B8E..0B90; TAMIL
4885 0x0B91, // 0B91 ; UNKNOWN
4886 0x0B92, // 0B92..0B95; TAMIL
4887 0x0B96, // 0B96..0B98; UNKNOWN
4888 0x0B99, // 0B99..0B9A; TAMIL
4889 0x0B9B, // 0B9B ; UNKNOWN
4890 0x0B9C, // 0B9C ; TAMIL
4891 0x0B9D, // 0B9D ; UNKNOWN
4892 0x0B9E, // 0B9E..0B9F; TAMIL
4893 0x0BA0, // 0BA0..0BA2; UNKNOWN
4894 0x0BA3, // 0BA3..0BA4; TAMIL
4895 0x0BA5, // 0BA5..0BA7; UNKNOWN
4896 0x0BA8, // 0BA8..0BAA; TAMIL
4897 0x0BAB, // 0BAB..0BAD; UNKNOWN
4898 0x0BAE, // 0BAE..0BB9; TAMIL
4899 0x0BBA, // 0BBA..0BBD; UNKNOWN
4900 0x0BBE, // 0BBE..0BC2; TAMIL
4901 0x0BC3, // 0BC3..0BC5; UNKNOWN
4902 0x0BC6, // 0BC6..0BC8; TAMIL
4903 0x0BC9, // 0BC9 ; UNKNOWN
4904 0x0BCA, // 0BCA..0BCD; TAMIL
4905 0x0BCE, // 0BCE..0BCF; UNKNOWN
4906 0x0BD0, // 0BD0 ; TAMIL
4907 0x0BD1, // 0BD1..0BD6; UNKNOWN
4908 0x0BD7, // 0BD7 ; TAMIL
4909 0x0BD8, // 0BD8..0BE5; UNKNOWN
4910 0x0BE6, // 0BE6..0BFA; TAMIL
4911 0x0BFB, // 0BFB..0BFF; UNKNOWN
4912 0x0C00, // 0C00..0C03; TELUGU
4913 0x0C04, // 0C04 ; UNKNOWN
4914 0x0C05, // 0C05..0C0C; TELUGU
4915 0x0C0D, // 0C0D ; UNKNOWN
4916 0x0C0E, // 0C0E..0C10; TELUGU
4917 0x0C11, // 0C11 ; UNKNOWN
4918 0x0C12, // 0C12..0C28; TELUGU
4919 0x0C29, // 0C29 ; UNKNOWN
4920 0x0C2A, // 0C2A..0C39; TELUGU
4921 0x0C3A, // 0C3A..0C3C; UNKNOWN
4922 0x0C3D, // 0C3D..0C44; TELUGU
4923 0x0C45, // 0C45 ; UNKNOWN
4924 0x0C46, // 0C46..0C48; TELUGU
4925 0x0C49, // 0C49 ; UNKNOWN
4926 0x0C4A, // 0C4A..0C4D; TELUGU
4927 0x0C4E, // 0C4E..0C54; UNKNOWN
4928 0x0C55, // 0C55..0C56; TELUGU
4929 0x0C57, // 0C57 ; UNKNOWN
4930 0x0C58, // 0C58..0C5A; TELUGU
4931 0x0C5B, // 0C5B..0C5F; UNKNOWN
4932 0x0C60, // 0C60..0C63; TELUGU
4933 0x0C64, // 0C64..0C65; UNKNOWN
4934 0x0C66, // 0C66..0C6F; TELUGU
4935 0x0C70, // 0C70..0C77; UNKNOWN
4936 0x0C78, // 0C78..0C7F; TELUGU
4937 0x0C80, // 0C80..0C83; KANNADA
4938 0x0C84, // 0C84 ; UNKNOWN
4939 0x0C85, // 0C85..0C8C; KANNADA
4940 0x0C8D, // 0C8D ; UNKNOWN
4941 0x0C8E, // 0C8E..0C90; KANNADA
4942 0x0C91, // 0C91 ; UNKNOWN
4943 0x0C92, // 0C92..0CA8; KANNADA
4944 0x0CA9, // 0CA9 ; UNKNOWN
4945 0x0CAA, // 0CAA..0CB3; KANNADA
4946 0x0CB4, // 0CB4 ; UNKNOWN
4947 0x0CB5, // 0CB5..0CB9; KANNADA
4948 0x0CBA, // 0CBA..0CBB; UNKNOWN
4949 0x0CBC, // 0CBC..0CC4; KANNADA
4950 0x0CC5, // 0CC5 ; UNKNOWN
4951 0x0CC6, // 0CC6..0CC8; KANNADA
4952 0x0CC9, // 0CC9 ; UNKNOWN
4953 0x0CCA, // 0CCA..0CCD; KANNADA
4954 0x0CCE, // 0CCE..0CD4; UNKNOWN
4955 0x0CD5, // 0CD5..0CD6; KANNADA
4956 0x0CD7, // 0CD7..0CDD; UNKNOWN
4957 0x0CDE, // 0CDE ; KANNADA
4958 0x0CDF, // 0CDF ; UNKNOWN
4959 0x0CE0, // 0CE0..0CE3; KANNADA
4960 0x0CE4, // 0CE4..0CE5; UNKNOWN
4961 0x0CE6, // 0CE6..0CEF; KANNADA
4962 0x0CF0, // 0CF0 ; UNKNOWN
4963 0x0CF1, // 0CF1..0CF2; KANNADA
4964 0x0CF3, // 0CF3..0CFF; UNKNOWN
4965 0x0D00, // 0D00..0D03; MALAYALAM
4966 0x0D04, // 0D04 ; UNKNOWN
4967 0x0D05, // 0D05..0D0C; MALAYALAM
4968 0x0D0D, // 0D0D ; UNKNOWN
4969 0x0D0E, // 0D0E..0D10; MALAYALAM
4970 0x0D11, // 0D11 ; UNKNOWN
4971 0x0D12, // 0D12..0D44; MALAYALAM
4972 0x0D45, // 0D45 ; UNKNOWN
4973 0x0D46, // 0D46..0D48; MALAYALAM
4974 0x0D49, // 0D49 ; UNKNOWN
4975 0x0D4A, // 0D4A..0D4F; MALAYALAM
4976 0x0D50, // 0D50..0D53; UNKNOWN
4977 0x0D54, // 0D54..0D63; MALAYALAM
4978 0x0D64, // 0D64..0D65; UNKNOWN
4979 0x0D66, // 0D66..0D7F; MALAYALAM
4980 0x0D80, // 0D80..0D81; UNKNOWN
4981 0x0D82, // 0D82..0D83; SINHALA
4982 0x0D84, // 0D84 ; UNKNOWN
4983 0x0D85, // 0D85..0D96; SINHALA
4984 0x0D97, // 0D97..0D99; UNKNOWN
4985 0x0D9A, // 0D9A..0DB1; SINHALA
4986 0x0DB2, // 0DB2 ; UNKNOWN
4987 0x0DB3, // 0DB3..0DBB; SINHALA
4988 0x0DBC, // 0DBC ; UNKNOWN
4989 0x0DBD, // 0DBD ; SINHALA
4990 0x0DBE, // 0DBE..0DBF; UNKNOWN
4991 0x0DC0, // 0DC0..0DC6; SINHALA
4992 0x0DC7, // 0DC7..0DC9; UNKNOWN
4993 0x0DCA, // 0DCA ; SINHALA
4994 0x0DCB, // 0DCB..0DCE; UNKNOWN
4995 0x0DCF, // 0DCF..0DD4; SINHALA
4996 0x0DD5, // 0DD5 ; UNKNOWN
4997 0x0DD6, // 0DD6 ; SINHALA
4998 0x0DD7, // 0DD7 ; UNKNOWN
4999 0x0DD8, // 0DD8..0DDF; SINHALA
5000 0x0DE0, // 0DE0..0DE5; UNKNOWN
5001 0x0DE6, // 0DE6..0DEF; SINHALA
5002 0x0DF0, // 0DF0..0DF1; UNKNOWN
5003 0x0DF2, // 0DF2..0DF4; SINHALA
5004 0x0DF5, // 0DF5..0E00; UNKNOWN
5005 0x0E01, // 0E01..0E3A; THAI
5006 0x0E3B, // 0E3B..0E3E; UNKNOWN
5007 0x0E3F, // 0E3F ; COMMON
5008 0x0E40, // 0E40..0E5B; THAI
5009 0x0E5C, // 0E5C..0E80; UNKNOWN
5010 0x0E81, // 0E81..0E82; LAO
5011 0x0E83, // 0E83 ; UNKNOWN
5012 0x0E84, // 0E84 ; LAO
5013 0x0E85, // 0E85..0E86; UNKNOWN
5014 0x0E87, // 0E87..0E88; LAO
5015 0x0E89, // 0E89 ; UNKNOWN
5016 0x0E8A, // 0E8A ; LAO
5017 0x0E8B, // 0E8B..0E8C; UNKNOWN
5018 0x0E8D, // 0E8D ; LAO
5019 0x0E8E, // 0E8E..0E93; UNKNOWN
5020 0x0E94, // 0E94..0E97; LAO
5021 0x0E98, // 0E98 ; UNKNOWN
5022 0x0E99, // 0E99..0E9F; LAO
5023 0x0EA0, // 0EA0 ; UNKNOWN
5024 0x0EA1, // 0EA1..0EA3; LAO
5025 0x0EA4, // 0EA4 ; UNKNOWN
5026 0x0EA5, // 0EA5 ; LAO
5027 0x0EA6, // 0EA6 ; UNKNOWN
5028 0x0EA7, // 0EA7 ; LAO
5029 0x0EA8, // 0EA8..0EA9; UNKNOWN
5030 0x0EAA, // 0EAA..0EAB; LAO
5031 0x0EAC, // 0EAC ; UNKNOWN
5032 0x0EAD, // 0EAD..0EB9; LAO
5033 0x0EBA, // 0EBA ; UNKNOWN
5034 0x0EBB, // 0EBB..0EBD; LAO
5035 0x0EBE, // 0EBE..0EBF; UNKNOWN
5036 0x0EC0, // 0EC0..0EC4; LAO
5037 0x0EC5, // 0EC5 ; UNKNOWN
5038 0x0EC6, // 0EC6 ; LAO
5039 0x0EC7, // 0EC7 ; UNKNOWN
5040 0x0EC8, // 0EC8..0ECD; LAO
5041 0x0ECE, // 0ECE..0ECF; UNKNOWN
5042 0x0ED0, // 0ED0..0ED9; LAO
5043 0x0EDA, // 0EDA..0EDB; UNKNOWN
5044 0x0EDC, // 0EDC..0EDF; LAO
5045 0x0EE0, // 0EE0..0EFF; UNKNOWN
5046 0x0F00, // 0F00..0F47; TIBETAN
5047 0x0F48, // 0F48 ; UNKNOWN
5048 0x0F49, // 0F49..0F6C; TIBETAN
5049 0x0F6D, // 0F6D..0F70; UNKNOWN
5050 0x0F71, // 0F71..0F97; TIBETAN
5051 0x0F98, // 0F98 ; UNKNOWN
5052 0x0F99, // 0F99..0FBC; TIBETAN
5053 0x0FBD, // 0FBD ; UNKNOWN
5054 0x0FBE, // 0FBE..0FCC; TIBETAN
5055 0x0FCD, // 0FCD ; UNKNOWN
5056 0x0FCE, // 0FCE..0FD4; TIBETAN
5057 0x0FD5, // 0FD5..0FD8; COMMON
5058 0x0FD9, // 0FD9..0FDA; TIBETAN
5059 0x0FDB, // 0FDB..FFF; UNKNOWN
5060 0x1000, // 1000..109F; MYANMAR
5061 0x10A0, // 10A0..10C5; GEORGIAN
5062 0x10C6, // 10C6 ; UNKNOWN
5063 0x10C7, // 10C7 ; GEORGIAN
5064 0x10C8, // 10C8..10CC; UNKNOWN
5065 0x10CD, // 10CD ; GEORGIAN
5066 0x10CE, // 10CE..10CF; UNKNOWN
5067 0x10D0, // 10D0..10FA; GEORGIAN
5068 0x10FB, // 10FB ; COMMON
5069 0x10FC, // 10FC..10FF; GEORGIAN
5070 0x1100, // 1100..11FF; HANGUL
5071 0x1200, // 1200..1248; ETHIOPIC
5072 0x1249, // 1249 ; UNKNOWN
5073 0x124A, // 124A..124D; ETHIOPIC
5074 0x124E, // 124E..124F; UNKNOWN
5075 0x1250, // 1250..1256; ETHIOPIC
5076 0x1257, // 1257 ; UNKNOWN
5077 0x1258, // 1258 ; ETHIOPIC
5078 0x1259, // 1259 ; UNKNOWN
5079 0x125A, // 125A..125D; ETHIOPIC
5080 0x125E, // 125E..125F; UNKNOWN
5081 0x1260, // 1260..1288; ETHIOPIC
5082 0x1289, // 1289 ; UNKNOWN
5083 0x128A, // 128A..128D; ETHIOPIC
5084 0x128E, // 128E..128F; UNKNOWN
5085 0x1290, // 1290..12B0; ETHIOPIC
5086 0x12B1, // 12B1 ; UNKNOWN
5087 0x12B2, // 12B2..12B5; ETHIOPIC
5088 0x12B6, // 12B6..12B7; UNKNOWN
5089 0x12B8, // 12B8..12BE; ETHIOPIC
5090 0x12BF, // 12BF ; UNKNOWN
5091 0x12C0, // 12C0 ; ETHIOPIC
5092 0x12C1, // 12C1 ; UNKNOWN
5093 0x12C2, // 12C2..12C5; ETHIOPIC
5094 0x12C6, // 12C6..12C7; UNKNOWN
5095 0x12C8, // 12C8..12D6; ETHIOPIC
5096 0x12D7, // 12D7 ; UNKNOWN
5097 0x12D8, // 12D8..1310; ETHIOPIC
5098 0x1311, // 1311 ; UNKNOWN
5099 0x1312, // 1312..1315; ETHIOPIC
5100 0x1316, // 1316..1317; UNKNOWN
5101 0x1318, // 1318..135A; ETHIOPIC
5102 0x135B, // 135B..135C; UNKNOWN
5103 0x135D, // 135D..137C; ETHIOPIC
5104 0x137D, // 137D..137F; UNKNOWN
5105 0x1380, // 1380..1399; ETHIOPIC
5106 0x139A, // 139A..139F; UNKNOWN
5107 0x13A0, // 13A0..13F5; CHEROKEE
5108 0x13F6, // 13F6..13F7; UNKNOWN
5109 0x13F8, // 13F8..13FD; CHEROKEE
5110 0x13FE, // 13FE..13FF; UNKNOWN
5111 0x1400, // 1400..167F; CANADIAN_ABORIGINAL
5112 0x1680, // 1680..169C; OGHAM
5113 0x169D, // 169D..169F; UNKNOWN
5114 0x16A0, // 16A0..16EA; RUNIC
5115 0x16EB, // 16EB..16ED; COMMON
5116 0x16EE, // 16EE..16F8; RUNIC
5117 0x16F9, // 16F9..16FF; UNKNOWN
5118 0x1700, // 1700..170C; TAGALOG
5119 0x170D, // 170D ; UNKNOWN
5120 0x170E, // 170E..1714; TAGALOG
5121 0x1715, // 1715..171F; UNKNOWN
5122 0x1720, // 1720..1734; HANUNOO
5123 0x1735, // 1735..1736; COMMON
5124 0x1737, // 1737..173F; UNKNOWN
5125 0x1740, // 1740..1753; BUHID
5126 0x1754, // 1754..175F; UNKNOWN
5127 0x1760, // 1760..176C; TAGBANWA
5128 0x176D, // 176D ; UNKNOWN
5129 0x176E, // 176E..1770; TAGBANWA
5130 0x1771, // 1771 ; UNKNOWN
5131 0x1772, // 1772..1773; TAGBANWA
5132 0x1774, // 1774..177F; UNKNOWN
5133 0x1780, // 1780..17DD; KHMER
5134 0x17DE, // 17DE..17DF; UNKNOWN
5135 0x17E0, // 17E0..17E9; KHMER
5136 0x17EA, // 17EA..17EF; UNKNOWN
5137 0x17F0, // 17F0..17F9; KHMER
5138 0x17FA, // 17FA..17FF; UNKNOWN
5139 0x1800, // 1800..1801; MONGOLIAN
5140 0x1802, // 1802..1803; COMMON
5141 0x1804, // 1804 ; MONGOLIAN
5142 0x1805, // 1805 ; COMMON
5143 0x1806, // 1806..180E; MONGOLIAN
5144 0x180F, // 180F ; UNKNOWN
5145 0x1810, // 1810..1819; MONGOLIAN
5146 0x181A, // 181A..181F; UNKNOWN
5147 0x1820, // 1820..1877; MONGOLIAN
5148 0x1878, // 1878..187F; UNKNOWN
5149 0x1880, // 1880..18AA; MONGOLIAN
5150 0x18AB, // 18AB..18AF; UNKNOWN
5151 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL
5152 0x18F6, // 18F6..18FF; UNKNOWN
5153 0x1900, // 1900..191E; LIMBU
5154 0x191F, // 191F ; UNKNOWN
5155 0x1920, // 1920..192B; LIMBU
5156 0x192C, // 192C..192F; UNKNOWN
5157 0x1930, // 1930..193B; LIMBU
5158 0x193C, // 193C..193F; UNKNOWN
5159 0x1940, // 1940 ; LIMBU
5160 0x1941, // 1941..1943; UNKNOWN
5161 0x1944, // 1944..194F; LIMBU
5162 0x1950, // 1950..196D; TAI_LE
5163 0x196E, // 196E..196F; UNKNOWN
5164 0x1970, // 1970..1974; TAI_LE
5165 0x1975, // 1975..197F; UNKNOWN
5166 0x1980, // 1980..19AB; NEW_TAI_LUE
5167 0x19AC, // 19AC..19AF; UNKNOWN
5168 0x19B0, // 19B0..19C9; NEW_TAI_LUE
5169 0x19CA, // 19CA..19CF; UNKNOWN
5170 0x19D0, // 19D0..19DA; NEW_TAI_LUE
5171 0x19DB, // 19DB..19DD; UNKNOWN
5172 0x19DE, // 19DE..19DF; NEW_TAI_LUE
5173 0x19E0, // 19E0..19FF; KHMER
5174 0x1A00, // 1A00..1A1B; BUGINESE
5175 0x1A1C, // 1A1C..1A1D; UNKNOWN
5176 0x1A1E, // 1A1E..1A1F; BUGINESE
5177 0x1A20, // 1A20..1A5E; TAI_THAM
5178 0x1A5F, // 1A5F ; UNKNOWN
5179 0x1A60, // 1A60..1A7C; TAI_THAM
5180 0x1A7D, // 1A7D..1A7E; UNKNOWN
5181 0x1A7F, // 1A7F..1A89; TAI_THAM
5182 0x1A8A, // 1A8A..1A8F; UNKNOWN
5183 0x1A90, // 1A90..1A99; TAI_THAM
5184 0x1A9A, // 1A9A..1A9F; UNKNOWN
5185 0x1AA0, // 1AA0..1AAD; TAI_THAM
5186 0x1AAE, // 1AAE..1AAF; UNKNOWN
5187 0x1AB0, // 1AB0..1ABE; INHERITED
5188 0x1ABF, // 1ABF..1AFF; UNKNOWN
5189 0x1B00, // 1B00..1B4B; BALINESE
5190 0x1B4C, // 1B4C..1B4F; UNKNOWN
5191 0x1B50, // 1B50..1B7C; BALINESE
5192 0x1B7D, // 1B7D..1B7F; UNKNOWN
5193 0x1B80, // 1B80..1BBF; SUNDANESE
5194 0x1BC0, // 1BC0..1BF3; BATAK
5195 0x1BF4, // 1BF4..1BFB; UNKNOWN
5196 0x1BFC, // 1BFC..1BFF; BATAK
5197 0x1C00, // 1C00..1C37; LEPCHA
5198 0x1C38, // 1C38..1C3A; UNKNOWN
5199 0x1C3B, // 1C3B..1C49; LEPCHA
5200 0x1C4A, // 1C4A..1C4C; UNKNOWN
5201 0x1C4D, // 1C4D..1C4F; LEPCHA
5202 0x1C50, // 1C50..1C7F; OL_CHIKI
5203 0x1C80, // 1C80..1C88; CYRILLIC
5204 0x1C89, // 1C89..1CBF; UNKNOWN
5205 0x1CC0, // 1CC0..1CC7; SUNDANESE
5206 0x1CC8, // 1CC8..1CCF; UNKNOWN
5207 0x1CD0, // 1CD0..1CD2; INHERITED
5208 0x1CD3, // 1CD3 ; COMMON
5209 0x1CD4, // 1CD4..1CE0; INHERITED
5210 0x1CE1, // 1CE1 ; COMMON
5211 0x1CE2, // 1CE2..1CE8; INHERITED
5212 0x1CE9, // 1CE9..1CEC; COMMON
5213 0x1CED, // 1CED ; INHERITED
5214 0x1CEE, // 1CEE..1CF3; COMMON
5215 0x1CF4, // 1CF4 ; INHERITED
5216 0x1CF5, // 1CF5..1CF7; COMMON
5217 0x1CF8, // 1CF8..1CF9; INHERITED
5218 0x1CFA, // 1CFA..1CFF; UNKNOWN
5219 0x1D00, // 1D00..1D25; LATIN
5220 0x1D26, // 1D26..1D2A; GREEK
5221 0x1D2B, // 1D2B ; CYRILLIC
5222 0x1D2C, // 1D2C..1D5C; LATIN
5223 0x1D5D, // 1D5D..1D61; GREEK
5224 0x1D62, // 1D62..1D65; LATIN
5225 0x1D66, // 1D66..1D6A; GREEK
5226 0x1D6B, // 1D6B..1D77; LATIN
5227 0x1D78, // 1D78 ; CYRILLIC
5228 0x1D79, // 1D79..1DBE; LATIN
5229 0x1DBF, // 1DBF ; GREEK
5230 0x1DC0, // 1DC0..1DF9; INHERITED
5231 0x1DFA, // 1DFA ; UNKNOWN
5232 0x1DFB, // 1DFB..1DFF; INHERITED
5233 0x1E00, // 1E00..1EFF; LATIN
5234 0x1F00, // 1F00..1F15; GREEK
5235 0x1F16, // 1F16..1F17; UNKNOWN
5236 0x1F18, // 1F18..1F1D; GREEK
5237 0x1F1E, // 1F1E..1F1F; UNKNOWN
5238 0x1F20, // 1F20..1F45; GREEK
5239 0x1F46, // 1F46..1F47; UNKNOWN
5240 0x1F48, // 1F48..1F4D; GREEK
5241 0x1F4E, // 1F4E..1F4F; UNKNOWN
5242 0x1F50, // 1F50..1F57; GREEK
5243 0x1F58, // 1F58 ; UNKNOWN
5244 0x1F59, // 1F59 ; GREEK
5245 0x1F5A, // 1F5A ; UNKNOWN
5246 0x1F5B, // 1F5B ; GREEK
5247 0x1F5C, // 1F5C ; UNKNOWN
5248 0x1F5D, // 1F5D ; GREEK
5249 0x1F5E, // 1F5E ; UNKNOWN
5250 0x1F5F, // 1F5F..1F7D; GREEK
5251 0x1F7E, // 1F7E..1F7F; UNKNOWN
5252 0x1F80, // 1F80..1FB4; GREEK
5253 0x1FB5, // 1FB5 ; UNKNOWN
5254 0x1FB6, // 1FB6..1FC4; GREEK
5255 0x1FC5, // 1FC5 ; UNKNOWN
5256 0x1FC6, // 1FC6..1FD3; GREEK
5257 0x1FD4, // 1FD4..1FD5; UNKNOWN
5258 0x1FD6, // 1FD6..1FDB; GREEK
5259 0x1FDC, // 1FDC ; UNKNOWN
5260 0x1FDD, // 1FDD..1FEF; GREEK
5261 0x1FF0, // 1FF0..1FF1; UNKNOWN
5262 0x1FF2, // 1FF2..1FF4; GREEK
5263 0x1FF5, // 1FF5 ; UNKNOWN
5264 0x1FF6, // 1FF6..1FFE; GREEK
5265 0x1FFF, // 1FFF ; UNKNOWN
5266 0x2000, // 2000..200B; COMMON
5267 0x200C, // 200C..200D; INHERITED
5268 0x200E, // 200E..2064; COMMON
5269 0x2065, // 2065 ; UNKNOWN
5270 0x2066, // 2066..2070; COMMON
5271 0x2071, // 2071 ; LATIN
5272 0x2072, // 2072..2073; UNKNOWN
5273 0x2074, // 2074..207E; COMMON
5274 0x207F, // 207F ; LATIN
5275 0x2080, // 2080..208E; COMMON
5276 0x208F, // 208F ; UNKNOWN
5277 0x2090, // 2090..209C; LATIN
5278 0x209D, // 209D..209F; UNKNOWN
5279 0x20A0, // 20A0..20BF; COMMON
5280 0x20C0, // 20C0..20CF; UNKNOWN
5281 0x20D0, // 20D0..20F0; INHERITED
5282 0x20F1, // 20F1..20FF; UNKNOWN
5283 0x2100, // 2100..2125; COMMON
5284 0x2126, // 2126 ; GREEK
5285 0x2127, // 2127..2129; COMMON
5286 0x212A, // 212A..212B; LATIN
5287 0x212C, // 212C..2131; COMMON
5288 0x2132, // 2132 ; LATIN
5289 0x2133, // 2133..214D; COMMON
5290 0x214E, // 214E ; LATIN
5291 0x214F, // 214F..215F; COMMON
5292 0x2160, // 2160..2188; LATIN
5293 0x2189, // 2189..218B; COMMON
5294 0x218C, // 218C..218F; UNKNOWN
5295 0x2190, // 2190..2426; COMMON
5296 0x2427, // 2427..243F; UNKNOWN
5297 0x2440, // 2440..244A; COMMON
5298 0x244B, // 244B..245F; UNKNOWN
5299 0x2460, // 2460..27FF; COMMON
5300 0x2800, // 2800..28FF; BRAILLE
5301 0x2900, // 2900..2B73; COMMON
5302 0x2B74, // 2B74..2B75; UNKNOWN
5303 0x2B76, // 2B76..2B95; COMMON
5304 0x2B96, // 2B96..2B97; UNKNOWN
5305 0x2B98, // 2B98..2BB9; COMMON
5306 0x2BBA, // 2BBA..2BBC; UNKNOWN
5307 0x2BBD, // 2BBD..2BC8; COMMON
5308 0x2BC9, // 2BC9 ; UNKNOWN
5309 0x2BCA, // 2BCA..2BD2; COMMON
5310 0x2BD3, // 2BD3..2BEB; UNKNOWN
5311 0x2BEC, // 2BEC..2BEF; COMMON
5312 0x2BF0, // 2BF0..2BFF; UNKNOWN
5313 0x2C00, // 2C00..2C2E; GLAGOLITIC
5314 0x2C2F, // 2C2F ; UNKNOWN
5315 0x2C30, // 2C30..2C5E; GLAGOLITIC
5316 0x2C5F, // 2C5F ; UNKNOWN
5317 0x2C60, // 2C60..2C7F; LATIN
5318 0x2C80, // 2C80..2CF3; COPTIC
5319 0x2CF4, // 2CF4..2CF8; UNKNOWN
5320 0x2CF9, // 2CF9..2CFF; COPTIC
5321 0x2D00, // 2D00..2D25; GEORGIAN
5322 0x2D26, // 2D26 ; UNKNOWN
5323 0x2D27, // 2D27 ; GEORGIAN
5324 0x2D28, // 2D28..2D2C; UNKNOWN
5325 0x2D2D, // 2D2D ; GEORGIAN
5326 0x2D2E, // 2D2E..2D2F; UNKNOWN
5327 0x2D30, // 2D30..2D67; TIFINAGH
5328 0x2D68, // 2D68..2D6E; UNKNOWN
5329 0x2D6F, // 2D6F..2D70; TIFINAGH
5330 0x2D71, // 2D71..2D7E; UNKNOWN
5331 0x2D7F, // 2D7F ; TIFINAGH
5332 0x2D80, // 2D80..2D96; ETHIOPIC
5333 0x2D97, // 2D97..2D9F; UNKNOWN
5334 0x2DA0, // 2DA0..2DA6; ETHIOPIC
5335 0x2DA7, // 2DA7 ; UNKNOWN
5336 0x2DA8, // 2DA8..2DAE; ETHIOPIC
5337 0x2DAF, // 2DAF ; UNKNOWN
5338 0x2DB0, // 2DB0..2DB6; ETHIOPIC
5339 0x2DB7, // 2DB7 ; UNKNOWN
5340 0x2DB8, // 2DB8..2DBE; ETHIOPIC
5341 0x2DBF, // 2DBF ; UNKNOWN
5342 0x2DC0, // 2DC0..2DC6; ETHIOPIC
5343 0x2DC7, // 2DC7 ; UNKNOWN
5344 0x2DC8, // 2DC8..2DCE; ETHIOPIC
5345 0x2DCF, // 2DCF ; UNKNOWN
5346 0x2DD0, // 2DD0..2DD6; ETHIOPIC
5347 0x2DD7, // 2DD7 ; UNKNOWN
5348 0x2DD8, // 2DD8..2DDE; ETHIOPIC
5349 0x2DDF, // 2DDF ; UNKNOWN
5350 0x2DE0, // 2DE0..2DFF; CYRILLIC
5351 0x2E00, // 2E00..2E49; COMMON
5352 0x2E50, // 2E50..2E7F; UNKNOWN
5353 0x2E80, // 2E80..2E99; HAN
5354 0x2E9A, // 2E9A ; UNKNOWN
5355 0x2E9B, // 2E9B..2EF3; HAN
5356 0x2EF4, // 2EF4..2EFF; UNKNOWN
5357 0x2F00, // 2F00..2FD5; HAN
5358 0x2FD6, // 2FD6..2FEF; UNKNOWN
5359 0x2FF0, // 2FF0..2FFB; COMMON
5360 0x2FFC, // 2FFC..2FFF; UNKNOWN
5361 0x3000, // 3000..3004; COMMON
5362 0x3005, // 3005 ; HAN
5363 0x3006, // 3006 ; COMMON
5364 0x3007, // 3007 ; HAN
5365 0x3008, // 3008..3020; COMMON
5366 0x3021, // 3021..3029; HAN
5367 0x302A, // 302A..302D; INHERITED
5368 0x302E, // 302E..302F; HANGUL
5369 0x3030, // 3030..3037; COMMON
5370 0x3038, // 3038..303B; HAN
5371 0x303C, // 303C..303F; COMMON
5372 0x3040, // 3040 ; UNKNOWN
5373 0x3041, // 3041..3096; HIRAGANA
5374 0x3097, // 3097..3098; UNKNOWN
5375 0x3099, // 3099..309A; INHERITED
5376 0x309B, // 309B..309C; COMMON
5377 0x309D, // 309D..309F; HIRAGANA
5378 0x30A0, // 30A0 ; COMMON
5379 0x30A1, // 30A1..30FA; KATAKANA
5380 0x30FB, // 30FB..30FC; COMMON
5381 0x30FD, // 30FD..30FF; KATAKANA
5382 0x3100, // 3100..3104; UNKNOWN
5383 0x3105, // 3105..312E; BOPOMOFO
5384 0x312F, // 312F..3130; UNKNOWN
5385 0x3131, // 3131..318E; HANGUL
5386 0x318F, // 318F ; UNKNOWN
5387 0x3190, // 3190..319F; COMMON
5388 0x31A0, // 31A0..31BA; BOPOMOFO
5389 0x31BB, // 31BB..31BF; UNKNOWN
5390 0x31C0, // 31C0..31E3; COMMON
5391 0x31E4, // 31E4..31EF; UNKNOWN
5392 0x31F0, // 31F0..31FF; KATAKANA
5393 0x3200, // 3200..321E; HANGUL
5394 0x321F, // 321F ; UNKNOWN
5395 0x3220, // 3220..325F; COMMON
5396 0x3260, // 3260..327E; HANGUL
5397 0x327F, // 327F..32CF; COMMON
5398 0x32D0, // 32D0..32FE; KATAKANA
5399 0x32FF, // 32FF ; COMMON
5400 0x3300, // 3300..3357; KATAKANA
5401 0x3358, // 3358..33FF; COMMON
5402 0x3400, // 3400..4DB5; HAN
5403 0x4DB6, // 4DB6..4DBF; UNKNOWN
5404 0x4DC0, // 4DC0..4DFF; COMMON
5405 0x4E00, // 4E00..9FEA; HAN
5406 0x9FEB, // 9FEB..9FFF; UNKNOWN
5407 0xA000, // A000..A48C; YI
5408 0xA48D, // A48D..A48F; UNKNOWN
5409 0xA490, // A490..A4C6; YI
5410 0xA4C7, // A4C7..A4CF; UNKNOWN
5411 0xA4D0, // A4D0..A4FF; LISU
5412 0xA500, // A500..A62B; VAI
5413 0xA62C, // A62C..A63F; UNKNOWN
5414 0xA640, // A640..A69F; CYRILLIC
5415 0xA6A0, // A6A0..A6F7; BAMUM
5416 0xA6F8, // A6F8..A6FF; UNKNOWN
5417 0xA700, // A700..A721; COMMON
5418 0xA722, // A722..A787; LATIN
5419 0xA788, // A788..A78A; COMMON
5420 0xA78B, // A78B..A7AE; LATIN
5421 0xA7AF, // A7AF ; UNKNOWN
5422 0xA7B0, // A7B0..A7B7; LATIN
5423 0xA7B8, // A7B8..A7F6; UNKNOWN
5424 0xA7F7, // A7F7..A7FF; LATIN
5425 0xA800, // A800..A82B; SYLOTI_NAGRI
5426 0xA82C, // A82C..A82F; UNKNOWN
5427 0xA830, // A830..A839; COMMON
5428 0xA83A, // A83A..A83F; UNKNOWN
5429 0xA840, // A840..A877; PHAGS_PA
5430 0xA878, // A878..A87F; UNKNOWN
5431 0xA880, // A880..A8C5; SAURASHTRA
5432 0xA8C6, // A8C6..A8CD; UNKNOWN
5433 0xA8CE, // A8CE..A8D9; SAURASHTRA
5434 0xA8DA, // A8DA..A8DF; UNKNOWN
5435 0xA8E0, // A8E0..A8FD; DEVANAGARI
5436 0xA8FE, // A8FE..A8FF; UNKNOWN
5437 0xA900, // A900..A92D; KAYAH_LI
5438 0xA92E, // A92E ; COMMON
5439 0xA92F, // A92F ; KAYAH_LI
5440 0xA930, // A930..A953; REJANG
5441 0xA954, // A954..A95E; UNKNOWN
5442 0xA95F, // A95F ; REJANG
5443 0xA960, // A960..A97C; HANGUL
5444 0xA97D, // A97D..A97F; UNKNOWN
5445 0xA980, // A980..A9CD; JAVANESE
5446 0xA9CE, // A9CE ; UNKNOWN
5447 0xA9CF, // A9CF ; COMMON
5448 0xA9D0, // A9D0..A9D9; JAVANESE
5449 0xA9DA, // A9DA..A9DD; UNKNOWN
5450 0xA9DE, // A9DE..A9DF; JAVANESE
5451 0xA9E0, // A9E0..A9FE; MYANMAR
5452 0xA9FF, // A9FF ; UNKNOWN
5453 0xAA00, // AA00..AA36; CHAM
5454 0xAA37, // AA37..AA3F; UNKNOWN
5455 0xAA40, // AA40..AA4D; CHAM
5456 0xAA4E, // AA4E..AA4F; UNKNOWN
5457 0xAA50, // AA50..AA59; CHAM
5458 0xAA5A, // AA5A..AA5B; UNKNOWN
5459 0xAA5C, // AA5C..AA5F; CHAM
5460 0xAA60, // AA60..AA7F; MYANMAR
5461 0xAA80, // AA80..AAC2; TAI_VIET
5462 0xAAC3, // AAC3..AADA; UNKNOWN
5463 0xAADB, // AADB..AADF; TAI_VIET
5464 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK
5465 0xAAF7, // AAF7..AB00; UNKNOWN
5466 0xAB01, // AB01..AB06; ETHIOPIC
5467 0xAB07, // AB07..AB08; UNKNOWN
5468 0xAB09, // AB09..AB0E; ETHIOPIC
5469 0xAB0F, // AB0F..AB10; UNKNOWN
5470 0xAB11, // AB11..AB16; ETHIOPIC
5471 0xAB17, // AB17..AB1F; UNKNOWN
5472 0xAB20, // AB20..AB26; ETHIOPIC
5473 0xAB27, // AB27 ; UNKNOWN
5474 0xAB28, // AB28..AB2E; ETHIOPIC
5475 0xAB2F, // AB2F ; UNKNOWN
5476 0xAB30, // AB30..AB5A; LATIN
5477 0xAB5B, // AB5B ; COMMON
5478 0xAB5C, // AB5C..AB64; LATIN
5479 0xAB65, // AB65 ; GREEK
5480 0xAB66, // AB66..AB6F; UNKNOWN
5481 0xAB70, // AB70..ABBF; CHEROKEE
5482 0xABC0, // ABC0..ABED; MEETEI_MAYEK
5483 0xABEE, // ABEE..ABEF; UNKNOWN
5484 0xABF0, // ABF0..ABF9; MEETEI_MAYEK
5485 0xABFA, // ABFA..ABFF; UNKNOWN
5486 0xAC00, // AC00..D7A3; HANGUL
5487 0xD7A4, // D7A4..D7AF; UNKNOWN
5488 0xD7B0, // D7B0..D7C6; HANGUL
5489 0xD7C7, // D7C7..D7CA; UNKNOWN
5490 0xD7CB, // D7CB..D7FB; HANGUL
5491 0xD7FC, // D7FC..F8FF; UNKNOWN
5492 0xF900, // F900..FA6D; HAN
5493 0xFA6E, // FA6E..FA6F; UNKNOWN
5494 0xFA70, // FA70..FAD9; HAN
5495 0xFADA, // FADA..FAFF; UNKNOWN
5496 0xFB00, // FB00..FB06; LATIN
5497 0xFB07, // FB07..FB12; UNKNOWN
5498 0xFB13, // FB13..FB17; ARMENIAN
5499 0xFB18, // FB18..FB1C; UNKNOWN
5500 0xFB1D, // FB1D..FB36; HEBREW
5501 0xFB37, // FB37 ; UNKNOWN
5502 0xFB38, // FB38..FB3C; HEBREW
5503 0xFB3D, // FB3D ; UNKNOWN
5504 0xFB3E, // FB3E ; HEBREW
5505 0xFB3F, // FB3F ; UNKNOWN
5506 0xFB40, // FB40..FB41; HEBREW
5507 0xFB42, // FB42 ; UNKNOWN
5508 0xFB43, // FB43..FB44; HEBREW
5509 0xFB45, // FB45 ; UNKNOWN
5510 0xFB46, // FB46..FB4F; HEBREW
5511 0xFB50, // FB50..FBC1; ARABIC
5512 0xFBC2, // FBC2..FBD2; UNKNOWN
5513 0xFBD3, // FBD3..FD3D; ARABIC
5514 0xFD3E, // FD3E..FD3F; COMMON
5515 0xFD40, // FD40..FD4F; UNKNOWN
5516 0xFD50, // FD50..FD8F; ARABIC
5517 0xFD90, // FD90..FD91; UNKNOWN
5518 0xFD92, // FD92..FDC7; ARABIC
5519 0xFDC8, // FDC8..FDEF; UNKNOWN
5520 0xFDF0, // FDF0..FDFD; ARABIC
5521 0xFDFE, // FDFE..FDFF; UNKNOWN
5522 0xFE00, // FE00..FE0F; INHERITED
5523 0xFE10, // FE10..FE19; COMMON
5524 0xFE1A, // FE1A..FE1F; UNKNOWN
5525 0xFE20, // FE20..FE2D; INHERITED
5526 0xFE2E, // FE2E..FE2F; CYRILLIC
5527 0xFE30, // FE30..FE52; COMMON
5528 0xFE53, // FE53 ; UNKNOWN
5529 0xFE54, // FE54..FE66; COMMON
5530 0xFE67, // FE67 ; UNKNOWN
5531 0xFE68, // FE68..FE6B; COMMON
5532 0xFE6C, // FE6C..FE6F; UNKNOWN
5533 0xFE70, // FE70..FE74; ARABIC
5534 0xFE75, // FE75 ; UNKNOWN
5535 0xFE76, // FE76..FEFC; ARABIC
5536 0xFEFD, // FEFD..FEFE; UNKNOWN
5537 0xFEFF, // FEFF ; COMMON
5538 0xFF00, // FF00 ; UNKNOWN
5539 0xFF01, // FF01..FF20; COMMON
5540 0xFF21, // FF21..FF3A; LATIN
5541 0xFF3B, // FF3B..FF40; COMMON
5542 0xFF41, // FF41..FF5A; LATIN
5543 0xFF5B, // FF5B..FF65; COMMON
5544 0xFF66, // FF66..FF6F; KATAKANA
5545 0xFF70, // FF70 ; COMMON
5546 0xFF71, // FF71..FF9D; KATAKANA
5547 0xFF9E, // FF9E..FF9F; COMMON
5548 0xFFA0, // FFA0..FFBE; HANGUL
5549 0xFFBF, // FFBF..FFC1; UNKNOWN
5550 0xFFC2, // FFC2..FFC7; HANGUL
5551 0xFFC8, // FFC8..FFC9; UNKNOWN
5552 0xFFCA, // FFCA..FFCF; HANGUL
5553 0xFFD0, // FFD0..FFD1; UNKNOWN
5554 0xFFD2, // FFD2..FFD7; HANGUL
5555 0xFFD8, // FFD8..FFD9; UNKNOWN
5556 0xFFDA, // FFDA..FFDC; HANGUL
5557 0xFFDD, // FFDD..FFDF; UNKNOWN
5558 0xFFE0, // FFE0..FFE6; COMMON
5559 0xFFE7, // FFE7 ; UNKNOWN
5560 0xFFE8, // FFE8..FFEE; COMMON
5561 0xFFEF, // FFEF..FFF8; UNKNOWN
5562 0xFFF9, // FFF9..FFFD; COMMON
5563 0xFFFE, // FFFE..FFFF; UNKNOWN
5564 0x10000, // 10000..1000B; LINEAR_B
5565 0x1000C, // 1000C ; UNKNOWN
5566 0x1000D, // 1000D..10026; LINEAR_B
5567 0x10027, // 10027 ; UNKNOWN
5568 0x10028, // 10028..1003A; LINEAR_B
5569 0x1003B, // 1003B ; UNKNOWN
5570 0x1003C, // 1003C..1003D; LINEAR_B
5571 0x1003E, // 1003E ; UNKNOWN
5572 0x1003F, // 1003F..1004D; LINEAR_B
5573 0x1004E, // 1004E..1004F; UNKNOWN
5574 0x10050, // 10050..1005D; LINEAR_B
5575 0x1005E, // 1005E..1007F; UNKNOWN
5576 0x10080, // 10080..100FA; LINEAR_B
5577 0x100FB, // 100FB..100FF; UNKNOWN
5578 0x10100, // 10100..10102; COMMON
5579 0x10103, // 10103..10106; UNKNOWN
5580 0x10107, // 10107..10133; COMMON
5581 0x10134, // 10134..10136; UNKNOWN
5582 0x10137, // 10137..1013F; COMMON
5583 0x10140, // 10140..1018E; GREEK
5584 0x1018F, // 1018F ; UNKNOWN
5585 0x10190, // 10190..1019B; COMMON
5586 0x1019C, // 1019C..1019F; UNKNOWN
5587 0x101A0, // 101A0 ; GREEK
5588 0x101A1, // 101A1..101CF; UNKNOWN
5589 0x101D0, // 101D0..101FC; COMMON
5590 0x101FD, // 101FD ; INHERITED
5591 0x101FE, // 101FE..1027F; UNKNOWN
5592 0x10280, // 10280..1029C; LYCIAN
5593 0x1029D, // 1029D..1029F; UNKNOWN
5594 0x102A0, // 102A0..102D0; CARIAN
5595 0x102D1, // 102D1..102DF; UNKNOWN
5596 0x102E0, // 102E0 ; INHERITED
5597 0x102E1, // 102E1..102FB; COMMON
5598 0x102FC, // 102FC..102FF; UNKNOWN
5599 0x10300, // 10300..10323; OLD_ITALIC
5600 0x10324, // 10324..1032C; UNKNOWN
5601 0x1032D, // 1032D..1032F; OLD_ITALIC
5602 0x10330, // 10330..1034A; GOTHIC
5603 0x1034B, // 1034B..1034F; UNKNOWN
5604 0x10350, // 10350..1037A; OLD_PERMIC
5605 0x1037B, // 1037B..1037F; UNKNOWN
5606 0x10380, // 10380..1039D; UGARITIC
5607 0x1039E, // 1039E ; UNKNOWN
5608 0x1039F, // 1039F ; UGARITIC
5609 0x103A0, // 103A0..103C3; OLD_PERSIAN
5610 0x103C4, // 103C4..103C7; UNKNOWN
5611 0x103C8, // 103C8..103D5; OLD_PERSIAN
5612 0x103D6, // 103D6..103FF; UNKNOWN
5613 0x10400, // 10400..1044F; DESERET
5614 0x10450, // 10450..1047F; SHAVIAN
5615 0x10480, // 10480..1049D; OSMANYA
5616 0x1049E, // 1049E..1049F; UNKNOWN
5617 0x104A0, // 104A0..104A9; OSMANYA
5618 0x104AA, // 104AA..104AF; UNKNOWN
5619 0x104B0, // 104B0..104D3; OSAGE
5620 0x104D4, // 104D4..104D7; UNKNOWN
5621 0x104D8, // 104D8..104FB; OSAGE
5622 0x104FC, // 104FC..104FF; UNKNOWN
5623 0x10500, // 10500..10527; ELBASAN
5624 0x10528, // 10528..1052F; UNKNOWN
5625 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN
5626 0x10564, // 10564..1056E; UNKNOWN
5627 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN
5628 0x10570, // 10570..105FF; UNKNOWN
5629 0x10600, // 10600..10736; LINEAR_A
5630 0x10737, // 10737..1073F; UNKNOWN
5631 0x10740, // 10740..10755; LINEAR_A
5632 0x10756, // 10756..1075F; UNKNOWN
5633 0x10760, // 10760..10767; LINEAR_A
5634 0x10768, // 10768..107FF; UNKNOWN
5635 0x10800, // 10800..10805; CYPRIOT
5636 0x10806, // 10806..10807; UNKNOWN
5637 0x10808, // 10808 ; CYPRIOT
5638 0x10809, // 10809 ; UNKNOWN
5639 0x1080A, // 1080A..10835; CYPRIOT
5640 0x10836, // 10836 ; UNKNOWN
5641 0x10837, // 10837..10838; CYPRIOT
5642 0x10839, // 10839..1083B; UNKNOWN
5643 0x1083C, // 1083C ; CYPRIOT
5644 0x1083D, // 1083D..1083E; UNKNOWN
5645 0x1083F, // 1083F ; CYPRIOT
5646 0x10840, // 10840..10855; IMPERIAL_ARAMAIC
5647 0x10856, // 10856 ; UNKNOWN
5648 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC
5649 0x10860, // 10860..1087F; PALMYRENE
5650 0x10880, // 10880..1089E; NABATAEAN
5651 0x1089F, // 1089F..108A6; UNKNOWN
5652 0x108A7, // 108A7..108AF; NABATAEAN
5653 0x108B0, // 108B0..108DF; UNKNOWN
5654 0x108E0, // 108E0..108F2; HATRAN
5655 0x108F3, // 108F3 ; UNKNOWN
5656 0x108F4, // 108F4..108F5; HATRAN
5657 0x108F6, // 108F6..108FA; UNKNOWN
5658 0x108FB, // 108FB..108FF; HATRAN
5659 0x10900, // 10900..1091B; PHOENICIAN
5660 0x1091C, // 1091C..1091E; UNKNOWN
5661 0x1091F, // 1091F ; PHOENICIAN
5662 0x10920, // 10920..10939; LYDIAN
5663 0x1093A, // 1093A..1093E; UNKNOWN
5664 0x1093F, // 1093F ; LYDIAN
5665 0x10940, // 10940..1097F; UNKNOWN
5666 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS
5667 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE
5668 0x109B8, // 109B8..109BB; UNKNOWN
5669 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE
5670 0x109D0, // 109D0..109D1; UNKNOWN
5671 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE
5672 0x10A00, // 10A00..10A03; KHAROSHTHI
5673 0x10A04, // 10A04 ; UNKNOWN
5674 0x10A05, // 10A05..10A06; KHAROSHTHI
5675 0x10A07, // 10A07..10A0B; UNKNOWN
5676 0x10A0C, // 10A0C..10A13; KHAROSHTHI
5677 0x10A14, // 10A14 ; UNKNOWN
5678 0x10A15, // 10A15..10A17; KHAROSHTHI
5679 0x10A18, // 10A18 ; UNKNOWN
5680 0x10A19, // 10A19..10A33; KHAROSHTHI
5681 0x10A34, // 10A34..10A37; UNKNOWN
5682 0x10A38, // 10A38..10A3A; KHAROSHTHI
5683 0x10A3B, // 10A3B..10A3E; UNKNOWN
5684 0x10A3F, // 10A3F..10A47; KHAROSHTHI
5685 0x10A48, // 10A48..10A4F; UNKNOWN
5686 0x10A50, // 10A50..10A58; KHAROSHTHI
5687 0x10A59, // 10A59..10A5F; UNKNOWN
5688 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN
5689 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN
5690 0x10AA0, // 10AA0..10ABF; UNKNOWN
5691 0x10AC0, // 10AC0..10AE6; MANICHAEAN
5692 0x10AE7, // 10AE7..10AEA; UNKNOWN
5693 0x10AEB, // 10AEB..10AF6; MANICHAEAN
5694 0x10AF7, // 10AF7..10AFF; UNKNOWN
5695 0x10B00, // 10B00..10B35; AVESTAN
5696 0x10B36, // 10B36..10B38; UNKNOWN
5697 0x10B39, // 10B39..10B3F; AVESTAN
5698 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
5699 0x10B56, // 10B56..10B57; UNKNOWN
5700 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
5701 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
5702 0x10B73, // 10B73..10B77; UNKNOWN
5703 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
5704 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI
5705 0x10B92, // 10B92..10B98; UNKNOWN
5706 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI
5707 0x10B9D, // 10B9D..10BA8; UNKNOWN
5708 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI
5709 0x10BB0, // 10BB0..10BFF; UNKNOWN
5710 0x10C00, // 10C00..10C48; OLD_TURKIC
5711 0x10C49, // 10C49..10C7F; UNKNOWN
5712 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN
5713 0x10CB3, // 10CB3..10CBF; UNKNOWN
5714 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN
5715 0x10CF3, // 10CF3..10CF9; UNKNOWN
5716 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN
5717 0x10D00, // 10D00..10E5F; UNKNOWN
5718 0x10E60, // 10E60..10E7E; ARABIC
5719 0x10E7F, // 10E7F..10FFF; UNKNOWN
5720 0x11000, // 11000..1104D; BRAHMI
5721 0x1104E, // 1104E..11051; UNKNOWN
5722 0x11052, // 11052..1106F; BRAHMI
5723 0x11070, // 11070..1107E; UNKNOWN
5724 0x1107F, // 1107F ; BRAHMI
5725 0x11080, // 11080..110C1; KAITHI
5726 0x110C2, // 110C2..110CF; UNKNOWN
5727 0x110D0, // 110D0..110E8; SORA_SOMPENG
5728 0x110E9, // 110E9..110EF; UNKNOWN
5729 0x110F0, // 110F0..110F9; SORA_SOMPENG
5730 0x110FA, // 110FA..110FF; UNKNOWN
5731 0x11100, // 11100..11134; CHAKMA
5732 0x11135, // 11135 ; UNKNOWN
5733 0x11136, // 11136..11143; CHAKMA
5734 0x11144, // 11144..1114F; UNKNOWN
5735 0x11150, // 11150..11176; MAHAJANI
5736 0x11177, // 11177..1117F; UNKNOWN
5737 0x11180, // 11180..111CD; SHARADA
5738 0x111CE, // 111CE..111CF; UNKNOWN
5739 0x111D0, // 111D0..111DF; SHARADA
5740 0x111E0, // 111E0 ; UNKNOWN
5741 0x111E1, // 111E1..111F4; SINHALA
5742 0x111F5, // 111F5..111FF; UNKNOWN
5743 0x11200, // 11200..11211; KHOJKI
5744 0x11212, // 11212 ; UNKNOWN
5745 0x11213, // 11213..1123E; KHOJKI
5746 0x1123F, // 1123F..1127F; UNKNOWN
5747 0x11280, // 11280..11286; MULTANI
5748 0x11287, // 11287 ; UNKNOWN
5749 0x11288, // 11288 ; MULTANI
5750 0x11289, // 11289 ; UNKNOWN
5751 0x1128A, // 1128A..1128D; MULTANI
5752 0x1128E, // 1128E ; UNKNOWN
5753 0x1128F, // 1128F..1129D; MULTANI
5754 0x1129E, // 1129E ; UNKNOWN
5755 0x1129F, // 1129F..112A9; MULTANI
5756 0x112AA, // 112AA..112AF; UNKNOWN
5757 0x112B0, // 112B0..112EA; KHUDAWADI
5758 0x112EB, // 112EB..112EF; UNKNOWN
5759 0x112F0, // 112F0..112F9; KHUDAWADI
5760 0x112FA, // 112FA..112FF; UNKNOWN
5761 0x11300, // 11300..11303; GRANTHA
5762 0x11304, // 11304 ; UNKNOWN
5763 0x11305, // 11305..1130C; GRANTHA
5764 0x1130D, // 1130D..1130E; UNKNOWN
5765 0x1130F, // 1130F..11310; GRANTHA
5766 0x11311, // 11311..11312; UNKNOWN
5767 0x11313, // 11313..11328; GRANTHA
5768 0x11329, // 11329 ; UNKNOWN
5769 0x1132A, // 1132A..11330; GRANTHA
5770 0x11331, // 11331 ; UNKNOWN
5771 0x11332, // 11332..11333; GRANTHA
5772 0x11334, // 11334 ; UNKNOWN
5773 0x11335, // 11335..11339; GRANTHA
5774 0x1133A, // 1133A..1133B; UNKNOWN
5775 0x1133C, // 1133C..11344; GRANTHA
5776 0x11345, // 11345..11346; UNKNOWN
5777 0x11347, // 11347..11348; GRANTHA
5778 0x11349, // 11349..1134A; UNKNOWN
5779 0x1134B, // 1134B..1134D; GRANTHA
5780 0x1134E, // 1134E..1134F; UNKNOWN
5781 0x11350, // 11350 ; GRANTHA
5782 0x11351, // 11351..11356; UNKNOWN
5783 0x11357, // 11357 ; GRANTHA
5784 0x11358, // 11358..1135C; UNKNOWN
5785 0x1135D, // 1135D..11363; GRANTHA
5786 0x11364, // 11364..11365; UNKNOWN
5787 0x11366, // 11366..1136C; GRANTHA
5788 0x1136D, // 1136D..1136F; UNKNOWN
5789 0x11370, // 11370..11374; GRANTHA
5790 0x11375, // 11375..113FF; UNKNOWN
5791 0x11400, // 11400..11459; NEWA
5792 0x1145A, // 1145A ; UNKNOWN
5793 0x1145B, // 1145B ; NEWA
5794 0x1145C, // 1145C ; UNKNOWN
5795 0x1145D, // 1145D ; NEWA
5796 0x1145E, // 1145E..1147F; UNKNOWN
5797 0x11480, // 11480..114C7; TIRHUTA
5798 0x114C8, // 114C8..114CF; UNKNOWN
5799 0x114D0, // 114D0..114D9; TIRHUTA
5800 0x114DA, // 114DA..1157F; UNKNOWN
5801 0x11580, // 11580..115B5; SIDDHAM
5802 0x115B6, // 115B6..115B7; UNKNOWN
5803 0x115B8, // 115B8..115DD; SIDDHAM
5804 0x115DE, // 115DE..115FF; UNKNOWN
5805 0x11600, // 11600..11644; MODI
5806 0x11645, // 11645..1164F; UNKNOWN
5807 0x11650, // 11650..11659; MODI
5808 0x1165A, // 1165A..1165F; UNKNOWN
5809 0x11660, // 11660..1166C; MONGOLIAN
5810 0X1166D, // 1166D..1167F; UNKNOWN
5811 0x11680, // 11680..116B7; TAKRI
5812 0x116B8, // 116B8..116BF; UNKNOWN
5813 0x116C0, // 116C0..116C9; TAKRI
5814 0x116CA, // 116CA..116FF; UNKNOWN
5815 0x11700, // 11700..11719; AHOM
5816 0x1171A, // 1171A..1171C; UNKNOWN
5817 0x1171D, // 1171D..1172B; AHOM
5818 0x1172C, // 1172C..1172F; UNKNOWN
5819 0x11730, // 11730..1173F; AHOM
5820 0x11740, // 11740..1189F; UNKNOWN
5821 0x118A0, // 118A0..118F2; WARANG_CITI
5822 0x118F3, // 118F3..118FE; UNKNOWN
5823 0x118FF, // 118FF ; WARANG_CITI
5824 0x11900, // 11900..119FF; UNKNOWN
5825 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE
5826 0X11A48, // 11A48..11A4F; UNKNOWN
5827 0x11A50, // 11A50..11A83; SOYOMBO
5828 0x11A84, // 11A84..11A85; UNKNOWN
5829 0x11A86, // 11A86..11A9C; SOYOMBO
5830 0x11A9D, // 11A9D ; UNKNOWN
5831 0x11A9E, // 11A9E..11AA2; SOYOMBO
5832 0x11AA3, // 11AA3..11ABF; UNKNOWN
5833 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU
5834 0x11AF9, // 11AF9..11BFF; UNKNOWN
5835 0x11C00, // 11C00..11C08; BHAIKSUKI
5836 0x11C09, // 11C09 ; UNKNOWN
5837 0x11C0A, // 11C0A..11C36; BHAIKSUKI
5838 0x11C37, // 11C37 ; UNKNOWN
5839 0x11C38, // 11C38..11C45; BHAIKSUKI
5840 0x11C46, // 11C46..11C49; UNKNOWN
5841 0x11C50, // 11C50..11C6C; BHAIKSUKI
5842 0x11C6D, // 11C6D..11C6F; UNKNOWN
5843 0x11C70, // 11C70..11C8F; MARCHEN
5844 0x11C90, // 11C90..11C91; UNKNOWN
5845 0x11C92, // 11C92..11CA7; MARCHEN
5846 0x11CA8, // 11CA8 ; UNKNOWN
5847 0x11CA9, // 11CA9..11CB6; MARCHEN
5848 0x11CB7, // 11CB7..11CFF; UNKNOWN
5849 0x11D00, // 11D00..11D06; MASARAM_GONDI
5850 0x11D07, // 11D07 ; UNKNOWN
5851 0x11D08, // 11D08..11D09; MASARAM_GONDI
5852 0x11D0A, // 11D0A ; UNKNOWN
5853 0x11D0B, // 11D0B..11D36; MASARAM_GONDI
5854 0x11D37, // 11D37..11D39; UNKNOWN
5855 0x11D3A, // 11D3A ; MASARAM_GONDI
5856 0x11D3B, // 11D3B ; UNKNOWN
5857 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI
5858 0x11D3E, // 11D3E ; UNKNOWN
5859 0x11D3F, // 11D3F..11D47; MASARAM_GONDI
5860 0x11D48, // 11D48..11D49, UNKNOWN
5861 0x11D50, // 11D50..11D59; MASARAM_GONDI
5862 0x11D5A, // 11D5A..1AFFF; UNKNOWN
5863 0x12000, // 12000..12399; CUNEIFORM
5864 0x1239A, // 1239A..123FF; UNKNOWN
5865 0x12400, // 12400..1246E; CUNEIFORM
5866 0x1246F, // 1246F ; UNKNOWN
5867 0x12470, // 12470..12474; CUNEIFORM
5868 0x12475, // 12475..1247F; UNKNOWN
5869 0x12480, // 12480..12543; CUNEIFORM
5870 0x12544, // 12544..12FFF; UNKNOWN
5871 0x13000, // 13000..1342E; EGYPTIAN_HIEROGLYPHS
5872 0x1342F, // 1342F..143FF; UNKNOWN
5873 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS
5874 0x14647, // 14647..167FF; UNKNOWN
5875 0x16800, // 16800..16A38; BAMUM
5876 0x16A39, // 16A39..16A3F; UNKNOWN
5877 0x16A40, // 16A40..16A5E; MRO
5878 0x16A5F, // 16A5F ; UNKNOWN
5879 0x16A60, // 16A60..16A69; MRO
5880 0x16A6A, // 16A6A..16A6D; UNKNOWN
5881 0x16A6E, // 16A6E..16A6F; MRO
5882 0x16A70, // 16A70..16ACF; UNKNOWN
5883 0x16AD0, // 16AD0..16AED; BASSA_VAH
5884 0x16AEE, // 16AEE..16AEF; UNKNOWN
5885 0x16AF0, // 16AF0..16AF5; BASSA_VAH
5886 0x16AF6, // 16AF6..16AFF; UNKNOWN
5887 0x16B00, // 16B00..16B45; PAHAWH_HMONG
5888 0x16B46, // 16B46..16B4F; UNKNOWN
5889 0x16B50, // 16B50..16B59; PAHAWH_HMONG
5890 0x16B5A, // 16B5A ; UNKNOWN
5891 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG
5892 0x16B62, // 16B62 ; UNKNOWN
5893 0x16B63, // 16B63..16B77; PAHAWH_HMONG
5894 0x16B78, // 16B78..16B7C; UNKNOWN
5895 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG
5896 0x16B90, // 16B90..16EFF; UNKNOWN
5897 0x16F00, // 16F00..16F44; MIAO
5898 0x16F45, // 16F45..16F4F; UNKNOWN
5899 0x16F50, // 16F50..16F7E; MIAO
5900 0x16F7F, // 16F7F..16F8E; UNKNOWN
5901 0x16F8F, // 16F8F..16F9F; MIAO
5902 0x16FA0, // 16FA0..16FDF; UNKNOWN
5903 0x16FE0, // 16FE0 ; TANGUT
5904 0x16FE1, // 16FE1 ; NUSHU
5905 0x16FE2, // 16FE2..16FFF; UNKNOWN
5906 0x17000, // 17000..187EC; TANGUT
5907 0x187ED, // 187ED..187FF; UNKNOWN
5908 0x18800, // 18800..18AF2; TANGUT
5909 0x18AF3, // 18AF3..1AFFF; UNKNOWN
5910 0x1B000, // 1B000 ; KATAKANA
5911 0x1B001, // 1B001..1B11E; HIRAGANA
5912 0x1B11F, // 1B11F..1B16F; UNKNOWN
5913 0x1B170, // 1B170..1B2FB; NUSHU
5914 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN
5915 0x1BC00, // 1BC00..1BC6A; DUPLOYAN
5916 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN
5917 0x1BC70, // 1BC70..1BC7C; DUPLOYAN
5918 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN
5919 0x1BC80, // 1BC80..1BC88; DUPLOYAN
5920 0x1BC89, // 1BC89..1BC8F; UNKNOWN
5921 0x1BC90, // 1BC90..1BC99; DUPLOYAN
5922 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN
5923 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN
5924 0x1BCA0, // 1BCA0..1BCA3; COMMON
5925 0x1BCA4, // 1BCA4..1CFFF; UNKNOWN
5926 0x1D000, // 1D000..1D0F5; COMMON
5927 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN
5928 0x1D100, // 1D100..1D126; COMMON
5929 0x1D127, // 1D127..1D128; UNKNOWN
5930 0x1D129, // 1D129..1D166; COMMON
5931 0x1D167, // 1D167..1D169; INHERITED
5932 0x1D16A, // 1D16A..1D17A; COMMON
5933 0x1D17B, // 1D17B..1D182; INHERITED
5934 0x1D183, // 1D183..1D184; COMMON
5935 0x1D185, // 1D185..1D18B; INHERITED
5936 0x1D18C, // 1D18C..1D1A9; COMMON
5937 0x1D1AA, // 1D1AA..1D1AD; INHERITED
5938 0x1D1AE, // 1D1AE..1D1E8; COMMON
5939 0x1D1E9, // 1D1E9..1D1FF; UNKNOWN
5940 0x1D200, // 1D200..1D245; GREEK
5941 0x1D246, // 1D246..1D2FF; UNKNOWN
5942 0x1D300, // 1D300..1D356; COMMON
5943 0x1D357, // 1D357..1D35F; UNKNOWN
5944 0x1D360, // 1D360..1D371; COMMON
5945 0x1D372, // 1D372..1D3FF; UNKNOWN
5946 0x1D400, // 1D400..1D454; COMMON
5947 0x1D455, // 1D455 ; UNKNOWN
5948 0x1D456, // 1D456..1D49C; COMMON
5949 0x1D49D, // 1D49D ; UNKNOWN
5950 0x1D49E, // 1D49E..1D49F; COMMON
5951 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN
5952 0x1D4A2, // 1D4A2 ; COMMON
5953 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN
5954 0x1D4A5, // 1D4A5..1D4A6; COMMON
5955 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN
5956 0x1D4A9, // 1D4A9..1D4AC; COMMON
5957 0x1D4AD, // 1D4AD ; UNKNOWN
5958 0x1D4AE, // 1D4AE..1D4B9; COMMON
5959 0x1D4BA, // 1D4BA ; UNKNOWN
5960 0x1D4BB, // 1D4BB ; COMMON
5961 0x1D4BC, // 1D4BC ; UNKNOWN
5962 0x1D4BD, // 1D4BD..1D4C3; COMMON
5963 0x1D4C4, // 1D4C4 ; UNKNOWN
5964 0x1D4C5, // 1D4C5..1D505; COMMON
5965 0x1D506, // 1D506 ; UNKNOWN
5966 0x1D507, // 1D507..1D50A; COMMON
5967 0x1D50B, // 1D50B..1D50C; UNKNOWN
5968 0x1D50D, // 1D50D..1D514; COMMON
5969 0x1D515, // 1D515 ; UNKNOWN
5970 0x1D516, // 1D516..1D51C; COMMON
5971 0x1D51D, // 1D51D ; UNKNOWN
5972 0x1D51E, // 1D51E..1D539; COMMON
5973 0x1D53A, // 1D53A ; UNKNOWN
5974 0x1D53B, // 1D53B..1D53E; COMMON
5975 0x1D53F, // 1D53F ; UNKNOWN
5976 0x1D540, // 1D540..1D544; COMMON
5977 0x1D545, // 1D545 ; UNKNOWN
5978 0x1D546, // 1D546 ; COMMON
5979 0x1D547, // 1D547..1D549; UNKNOWN
5980 0x1D54A, // 1D54A..1D550; COMMON
5981 0x1D551, // 1D551 ; UNKNOWN
5982 0x1D552, // 1D552..1D6A5; COMMON
5983 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN
5984 0x1D6A8, // 1D6A8..1D7CB; COMMON
5985 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN
5986 0x1D7CE, // 1D7CE..1D7FF; COMMON
5987 0x1D800, // 1D800..1DA8B; SIGNWRITING
5988 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN
5989 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING
5990 0x1DAA0, // 1DAA0 ; UNKNOWN
5991 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING
5992 0x1DAB0, // 1DAB0..1DFFF; UNKNOWN
5993 0x1E000, // 1E000..1E006; GLAGOLITIC
5994 0x1E007, // 1E007 ; UNKNOWN
5995 0x1E008, // 1E008..1E018; GLAGOLITIC
5996 0x1E019, // 1E019..1E01A; UNKNOWN
5997 0x1E01B, // 1E01B..1E021; GLAGOLITIC
5998 0x1E022, // 1E022 ; UNKNOWN
5999 0x1E023, // 1E023..1E024; GLAGOLITIC
6000 0x1E025, // 1E025 ; UNKNOWN
6001 0x1E026, // 1E026..1E02A; GLAGOLITIC
6002 0x1E02B, // 1E02B..1E7FF; UNKNOWN
6003 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI
6004 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN
6005 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI
6006 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN
6007 0x1E900, // 1E900..1E94A; ADLAM
6008 0x1E94B, // 1E94B..1E94F; UNKNOWN
6009 0x1E950, // 1E950..1E959; ADLAM
6010 0x1E95A, // 1E95A..1E95D; UNKNOWN
6011 0x1E95E, // 1E95E..1E95F; ADLAM
6012 0x1E960, // 1E960..1EDFF; UNKNOWN
6013 0x1EE00, // 1EE00..1EE03; ARABIC
6014 0x1EE04, // 1EE04 ; UNKNOWN
6015 0x1EE05, // 1EE05..1EE1F; ARABIC
6016 0x1EE20, // 1EE20 ; UNKNOWN
6017 0x1EE21, // 1EE21..1EE22; ARABIC
6018 0x1EE23, // 1EE23 ; UNKNOWN
6019 0x1EE24, // 1EE24 ; ARABIC
6020 0x1EE25, // 1EE25..1EE26; UNKNOWN
6021 0x1EE27, // 1EE27 ; ARABIC
6022 0x1EE28, // 1EE28 ; UNKNOWN
6023 0x1EE29, // 1EE29..1EE32; ARABIC
6024 0x1EE33, // 1EE33 ; UNKNOWN
6025 0x1EE34, // 1EE34..1EE37; ARABIC
6026 0x1EE38, // 1EE38 ; UNKNOWN
6027 0x1EE39, // 1EE39 ; ARABIC
6028 0x1EE3A, // 1EE3A ; UNKNOWN
6029 0x1EE3B, // 1EE3B ; ARABIC
6030 0x1EE3C, // 1EE3C..1EE41; UNKNOWN
6031 0x1EE42, // 1EE42 ; ARABIC
6032 0x1EE43, // 1EE43..1EE46; UNKNOWN
6033 0x1EE47, // 1EE47 ; ARABIC
6034 0x1EE48, // 1EE48 ; UNKNOWN
6035 0x1EE49, // 1EE49 ; ARABIC
6036 0x1EE4A, // 1EE4A ; UNKNOWN
6037 0x1EE4B, // 1EE4B ; ARABIC
6038 0x1EE4C, // 1EE4C ; UNKNOWN
6039 0x1EE4D, // 1EE4D..1EE4F; ARABIC
6040 0x1EE50, // 1EE50 ; UNKNOWN
6041 0x1EE51, // 1EE51..1EE52; ARABIC
6042 0x1EE53, // 1EE53 ; UNKNOWN
6043 0x1EE54, // 1EE54 ; ARABIC
6044 0x1EE55, // 1EE55..1EE56; UNKNOWN
6045 0x1EE57, // 1EE57 ; ARABIC
6046 0x1EE58, // 1EE58 ; UNKNOWN
6047 0x1EE59, // 1EE59 ; ARABIC
6048 0x1EE5A, // 1EE5A ; UNKNOWN
6049 0x1EE5B, // 1EE5B ; ARABIC
6050 0x1EE5C, // 1EE5C ; UNKNOWN
6051 0x1EE5D, // 1EE5D ; ARABIC
6052 0x1EE5E, // 1EE5E ; UNKNOWN
6053 0x1EE5F, // 1EE5F ; ARABIC
6054 0x1EE60, // 1EE60 ; UNKNOWN
6055 0x1EE61, // 1EE61..1EE62; ARABIC
6056 0x1EE63, // 1EE63 ; UNKNOWN
6057 0x1EE64, // 1EE64 ; ARABIC
6058 0x1EE65, // 1EE65..1EE66; UNKNOWN
6059 0x1EE67, // 1EE67..1EE6A; ARABIC
6060 0x1EE6B, // 1EE6B ; UNKNOWN
6061 0x1EE6C, // 1EE6C..1EE72; ARABIC
6062 0x1EE73, // 1EE73 ; UNKNOWN
6063 0x1EE74, // 1EE74..1EE77; ARABIC
6064 0x1EE78, // 1EE78 ; UNKNOWN
6065 0x1EE79, // 1EE79..1EE7C; ARABIC
6066 0x1EE7D, // 1EE7D ; UNKNOWN
6067 0x1EE7E, // 1EE7E ; ARABIC
6068 0x1EE7F, // 1EE7F ; UNKNOWN
6069 0x1EE80, // 1EE80..1EE89; ARABIC
6070 0x1EE8A, // 1EE8A ; UNKNOWN
6071 0x1EE8B, // 1EE8B..1EE9B; ARABIC
6072 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN
6073 0x1EEA1, // 1EEA1..1EEA3; ARABIC
6074 0x1EEA4, // 1EEA4 ; UNKNOWN
6075 0x1EEA5, // 1EEA5..1EEA9; ARABIC
6076 0x1EEAA, // 1EEAA ; UNKNOWN
6077 0x1EEAB, // 1EEAB..1EEBB; ARABIC
6078 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN
6079 0x1EEF0, // 1EEF0..1EEF1; ARABIC
6080 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN
6081 0x1F000, // 1F000..1F02B; COMMON
6082 0x1F02C, // 1F02C..1F02F; UNKNOWN
6083 0x1F030, // 1F030..1F093; COMMON
6084 0x1F094, // 1F094..1F09F; UNKNOWN
6085 0x1F0A0, // 1F0A0..1F0AE; COMMON
6086 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN
6087 0x1F0B1, // 1F0B1..1F0BF; COMMON
6088 0x1F0C0, // 1F0C0 ; UNKNOWN
6089 0x1F0C1, // 1F0C1..1F0CF; COMMON
6090 0x1F0D0, // 1F0D0 ; UNKNOWN
6091 0x1F0D1, // 1F0D1..1F0F5; COMMON
6092 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN
6093 0x1F100, // 1F100..1F10C; COMMON
6094 0x1F10D, // 1F10D..1F10F; UNKNOWN
6095 0x1F110, // 1F110..1F12E; COMMON
6096 0x1F12F, // 1F12F ; UNKNOWN
6097 0x1F130, // 1F130..1F16B; COMMON
6098 0x1F16C, // 1F16C..1F16F; UNKNOWN
6099 0x1F170, // 1F170..1F1AC; COMMON
6100 0x1F1AD, // 1F1AD..1F1E5; UNKNOWN
6101 0x1F1E6, // 1F1E6..1F1FF; COMMON
6102 0x1F200, // 1F200 ; HIRAGANA
6103 0x1F201, // 1F201..1F202; COMMON
6104 0x1F203, // 1F203..1F20F; UNKNOWN
6105 0x1F210, // 1F210..1F23B; COMMON
6106 0x1F23C, // 1F23C..1F23F; UNKNOWN
6107 0x1F240, // 1F240..1F248; COMMON
6108 0x1F249, // 1F249..1F24F; UNKNOWN
6109 0x1F250, // 1F250..1F251; COMMON
6110 0x1F252, // 1F252..1F25F; UNKNOWN
6111 0x1F260, // 1F260..1F265; COMMON
6112 0x1F266, // 1F266..1F2FF; UNKNOWN
6113 0x1F300, // 1F300..1F6D4; COMMON
6114 0x1F6D5, // 1F6D5..1F6DF; UNKNOWN
6115 0x1F6E0, // 1F6E0..1F6EC; COMMON
6116 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN
6117 0x1F6F0, // 1F6F0..1F6F8; COMMON
6118 0x1F6F9, // 1F6F9..1F6FF; UNKNOWN
6119 0x1F700, // 1F700..1F773; COMMON
6120 0x1F774, // 1F774..1F77F; UNKNOWN
6121 0x1F780, // 1F780..1F7D4; COMMON
6122 0x1F7D5, // 1F7D5..1F7FF; UNKNOWN
6123 0x1F800, // 1F800..1F80B; COMMON
6124 0x1F80C, // 1F80C..1F80F; UNKNOWN
6125 0x1F810, // 1F810..1F847; COMMON
6126 0x1F848, // 1F848..1F84F; UNKNOWN
6127 0x1F850, // 1F850..1F859; COMMON
6128 0x1F85A, // 1F85A..1F85F; UNKNOWN
6129 0x1F860, // 1F860..1F887; COMMON
6130 0x1F888, // 1F888..1F88F; UNKNOWN
6131 0x1F890, // 1F890..1F8AD; COMMON
6132 0x1F8AE, // 1F8AE..1F8FF; UNKNOWN
6133 0x1F900, // 1F900..1F90B; COMMON
6134 0x1F90C, // 1F90C..1F90F; UNKNOWN
6135 0x1F910, // 1F910..1F93E; COMMON
6136 0x1F93F, // 1F93F ; UNKNOWN
6137 0x1F940, // 1F940..1F94C; COMMON
6138 0x1F94D, // 1F94D..1F94F; UNKNOWN
6139 0x1F950, // 1F950..1F96B; COMMON
6140 0x1F96C, // 1F96C..1F97F; UNKNOWN
6141 0x1F980, // 1F980..1F997; COMMON
6142 0x1F998, // 1F998..1F9BF; UNKNOWN
6143 0x1F9C0, // 1F9C0 ; COMMON
6144 0x1F9C1, // 1F9C1..1F9CF; UNKNOWN
6145 0x1F9D0, // 1F9D0..1F9E6; COMMON
6146 0x1F9E7, // 1F9E7..1FFFF; UNKNOWN
6147 0x20000, // 20000..2A6D6; HAN
6148 0x2A6D7, // 2A6D7..2A6FF; UNKNOWN
6149 0x2A700, // 2A700..2B734; HAN
6150 0x2B735, // 2B735..2B73F; UNKNOWN
6151 0x2B740, // 2B740..2B81D; HAN
6152 0x2B81E, // 2B81E..2B81F; UNKNOWN
6153 0x2B820, // 2B820..2CEA1; HAN
6154 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN
6155 0x2CEB0, // 2CEB0..2EBE0; HAN
6156 0x2EBE1, // 2EBE1..2F7FF; UNKNOWN
6157 0x2F800, // 2F800..2FA1D; HAN
6158 0x2FA1E, // 2FA1E..E0000; UNKNOWN
6159 0xE0001, // E0001 ; COMMON
6160 0xE0002, // E0002..E001F; UNKNOWN
6161 0xE0020, // E0020..E007F; COMMON
6162 0xE0080, // E0080..E00FF; UNKNOWN
6163 0xE0100, // E0100..E01EF; INHERITED
6164 0xE01F0 // E01F0..10FFFF; UNKNOWN
6165 };
6166
6167 private static final UnicodeScript[] scripts = {
6168 COMMON, // 0000..0040
6169 LATIN, // 0041..005A
6170 COMMON, // 005B..0060
6171 LATIN, // 0061..007A
6172 COMMON, // 007B..00A9
6173 LATIN, // 00AA
6174 COMMON, // 00AB..00B9
6175 LATIN, // 00BA
6176 COMMON, // 00BB..00BF
6177 LATIN, // 00C0..00D6
6178 COMMON, // 00D7
6179 LATIN, // 00D8..00F6
6180 COMMON, // 00F7
6181 LATIN, // 00F8..02B8
6182 COMMON, // 02B9..02DF
6183 LATIN, // 02E0..02E4
6184 COMMON, // 02E5..02E9
6185 BOPOMOFO, // 02EA..02EB
6186 COMMON, // 02EC..02FF
6187 INHERITED, // 0300..036F
6188 GREEK, // 0370..0373
6189 COMMON, // 0374
6190 GREEK, // 0375..0377
6191 UNKNOWN, // 0378..0379
6192 GREEK, // 037A..037D
6193 COMMON, // 037E
6194 GREEK, // 037F
6195 UNKNOWN, // 0380..0383
6196 GREEK, // 0384
6197 COMMON, // 0385
6198 GREEK, // 0386
6199 COMMON, // 0387
6200 GREEK, // 0388..038A
6201 UNKNOWN, // 038B
6202 GREEK, // 038C
6203 UNKNOWN, // 038D
6204 GREEK, // 038E..03A1
6205 UNKNOWN, // 03A2
6206 GREEK, // 03A3..03E1
6207 COPTIC, // 03E2..03EF
6208 GREEK, // 03F0..03FF
6209 CYRILLIC, // 0400..0484
6210 INHERITED, // 0485..0486
6211 CYRILLIC, // 0487..052F
6212 UNKNOWN, // 0530
6213 ARMENIAN, // 0531..0556
6214 UNKNOWN, // 0557..0558
6215 ARMENIAN, // 0559..055F
6216 UNKNOWN, // 0560
6217 ARMENIAN, // 0561..0587
6218 UNKNOWN, // 0588
6219 COMMON, // 0589
6220 ARMENIAN, // 058A
6221 UNKNOWN, // 058B..058C
6222 ARMENIAN, // 058D..058F
6223 UNKNOWN, // 0590
6224 HEBREW, // 0591..05C7
6225 UNKNOWN, // 05C8..05CF
6226 HEBREW, // 05D0..05EA
6227 UNKNOWN, // 05EB..05EF
6228 HEBREW, // 05F0..05F4
6229 UNKNOWN, // 05F5..05FF
6230 ARABIC, // 0600..0604
6231 COMMON, // 0605
6232 ARABIC, // 0606..060B
6233 COMMON, // 060C
6234 ARABIC, // 060D..061A
6235 COMMON, // 061B
6236 ARABIC, // 061C
6237 UNKNOWN, // 061D
6238 ARABIC, // 061E
6239 COMMON, // 061F
6240 ARABIC, // 0620..063F
6241 COMMON, // 0640
6242 ARABIC, // 0641..064A
6243 INHERITED, // 064B..0655
6244 ARABIC, // 0656..066F
6245 INHERITED, // 0670
6246 ARABIC, // 0671..06DC
6247 COMMON, // 06DD
6248 ARABIC, // 06DE..06FF
6249 SYRIAC, // 0700..070D
6250 UNKNOWN, // 070E
6251 SYRIAC, // 070F..074A
6252 UNKNOWN, // 074B..074C
6253 SYRIAC, // 074D..074F
6254 ARABIC, // 0750..077F
6255 THAANA, // 0780..07B1
6256 UNKNOWN, // 07B2..07BF
6257 NKO, // 07C0..07FA
6258 UNKNOWN, // 07FB..07FF
6259 SAMARITAN, // 0800..082D
6260 UNKNOWN, // 082E..082F
6261 SAMARITAN, // 0830..083E
6262 UNKNOWN, // 083F
6263 MANDAIC, // 0840..085B
6264 UNKNOWN, // 085C..085D
6265 MANDAIC, // 085E
6266 UNKNOWN, // 085F
6267 SYRIAC, // 0860..086A
6268 UNKNOWN, // 086B..089F
6269 ARABIC, // 08A0..08B4
6270 UNKNOWN, // 08B5
6271 ARABIC, // 08B6..08BD
6272 UNKNOWN, // 08BE..08D3
6273 ARABIC, // 08D4..08E1
6274 COMMON, // 08E2
6275 ARABIC, // 08E3..08FF
6276 DEVANAGARI, // 0900..0950
6277 INHERITED, // 0951..0952
6278 DEVANAGARI, // 0953..0963
6279 COMMON, // 0964..0965
6280 DEVANAGARI, // 0966..097F
6281 BENGALI, // 0980..0983
6282 UNKNOWN, // 0984
6283 BENGALI, // 0985..098C
6284 UNKNOWN, // 098D..098E
6285 BENGALI, // 098F..0990
6286 UNKNOWN, // 0991..0992
6287 BENGALI, // 0993..09A8
6288 UNKNOWN, // 09A9
6289 BENGALI, // 09AA..09B0
6290 UNKNOWN, // 09B1
6291 BENGALI, // 09B2
6292 UNKNOWN, // 09B3..09B5
6293 BENGALI, // 09B6..09B9
6294 UNKNOWN, // 09BA..09BB
6295 BENGALI, // 09BC..09C4
6296 UNKNOWN, // 09C5..09C6
6297 BENGALI, // 09C7..09C8
6298 UNKNOWN, // 09C9..09CA
6299 BENGALI, // 09CB..09CE
6300 UNKNOWN, // 09CF..09D6
6301 BENGALI, // 09D7
6302 UNKNOWN, // 09D8..09DB
6303 BENGALI, // 09DC..09DD
6304 UNKNOWN, // 09DE
6305 BENGALI, // 09DF..09E3
6306 UNKNOWN, // 09E4..09E5
6307 BENGALI, // 09E6..09FD
6308 UNKNOWN, // 09FE..0A00
6309 GURMUKHI, // 0A01..0A03
6310 UNKNOWN, // 0A04
6311 GURMUKHI, // 0A05..0A0A
6312 UNKNOWN, // 0A0B..0A0E
6313 GURMUKHI, // 0A0F..0A10
6314 UNKNOWN, // 0A11..0A12
6315 GURMUKHI, // 0A13..0A28
6316 UNKNOWN, // 0A29
6317 GURMUKHI, // 0A2A..0A30
6318 UNKNOWN, // 0A31
6319 GURMUKHI, // 0A32..0A33
6320 UNKNOWN, // 0A34
6321 GURMUKHI, // 0A35..0A36
6322 UNKNOWN, // 0A37
6323 GURMUKHI, // 0A38..0A39
6324 UNKNOWN, // 0A3A..0A3B
6325 GURMUKHI, // 0A3C
6326 UNKNOWN, // 0A3D
6327 GURMUKHI, // 0A3E..0A42
6328 UNKNOWN, // 0A43..0A46
6329 GURMUKHI, // 0A47..0A48
6330 UNKNOWN, // 0A49..0A4A
6331 GURMUKHI, // 0A4B..0A4D
6332 UNKNOWN, // 0A4E..0A50
6333 GURMUKHI, // 0A51
6334 UNKNOWN, // 0A52..0A58
6335 GURMUKHI, // 0A59..0A5C
6336 UNKNOWN, // 0A5D
6337 GURMUKHI, // 0A5E
6338 UNKNOWN, // 0A5F..0A65
6339 GURMUKHI, // 0A66..0A75
6340 UNKNOWN, // 0A76..0A80
6341 GUJARATI, // 0A81..0A83
6342 UNKNOWN, // 0A84
6343 GUJARATI, // 0A85..0A8D
6344 UNKNOWN, // 0A8E
6345 GUJARATI, // 0A8F..0A91
6346 UNKNOWN, // 0A92
6347 GUJARATI, // 0A93..0AA8
6348 UNKNOWN, // 0AA9
6349 GUJARATI, // 0AAA..0AB0
6350 UNKNOWN, // 0AB1
6351 GUJARATI, // 0AB2..0AB3
6352 UNKNOWN, // 0AB4
6353 GUJARATI, // 0AB5..0AB9
6354 UNKNOWN, // 0ABA..0ABB
6355 GUJARATI, // 0ABC..0AC5
6356 UNKNOWN, // 0AC6
6357 GUJARATI, // 0AC7..0AC9
6358 UNKNOWN, // 0ACA
6359 GUJARATI, // 0ACB..0ACD
6360 UNKNOWN, // 0ACE..0ACF
6361 GUJARATI, // 0AD0
6362 UNKNOWN, // 0AD1..0ADF
6363 GUJARATI, // 0AE0..0AE3
6364 UNKNOWN, // 0AE4..0AE5
6365 GUJARATI, // 0AE6..0AF1
6366 UNKNOWN, // 0AF2..0AF8
6367 GUJARATI, // 0AF9..0AFF
6368 UNKNOWN, // 0B00
6369 ORIYA, // 0B01..0B03
6370 UNKNOWN, // 0B04
6371 ORIYA, // 0B05..0B0C
6372 UNKNOWN, // 0B0D..0B0E
6373 ORIYA, // 0B0F..0B10
6374 UNKNOWN, // 0B11..0B12
6375 ORIYA, // 0B13..0B28
6376 UNKNOWN, // 0B29
6377 ORIYA, // 0B2A..0B30
6378 UNKNOWN, // 0B31
6379 ORIYA, // 0B32..0B33
6380 UNKNOWN, // 0B34
6381 ORIYA, // 0B35..0B39
6382 UNKNOWN, // 0B3A..0B3B
6383 ORIYA, // 0B3C..0B44
6384 UNKNOWN, // 0B45..0B46
6385 ORIYA, // 0B47..0B48
6386 UNKNOWN, // 0B49..0B4A
6387 ORIYA, // 0B4B..0B4D
6388 UNKNOWN, // 0B4E..0B55
6389 ORIYA, // 0B56..0B57
6390 UNKNOWN, // 0B58..0B5B
6391 ORIYA, // 0B5C..0B5D
6392 UNKNOWN, // 0B5E
6393 ORIYA, // 0B5F..0B63
6394 UNKNOWN, // 0B64..0B65
6395 ORIYA, // 0B66..0B77
6396 UNKNOWN, // 0B78..0B81
6397 TAMIL, // 0B82..0B83
6398 UNKNOWN, // 0B84
6399 TAMIL, // 0B85..0B8A
6400 UNKNOWN, // 0B8B..0B8D
6401 TAMIL, // 0B8E..0B90
6402 UNKNOWN, // 0B91
6403 TAMIL, // 0B92..0B95
6404 UNKNOWN, // 0B96..0B98
6405 TAMIL, // 0B99..0B9A
6406 UNKNOWN, // 0B9B
6407 TAMIL, // 0B9C
6408 UNKNOWN, // 0B9D
6409 TAMIL, // 0B9E..0B9F
6410 UNKNOWN, // 0BA0..0BA2
6411 TAMIL, // 0BA3..0BA4
6412 UNKNOWN, // 0BA5..0BA7
6413 TAMIL, // 0BA8..0BAA
6414 UNKNOWN, // 0BAB..0BAD
6415 TAMIL, // 0BAE..0BB9
6416 UNKNOWN, // 0BBA..0BBD
6417 TAMIL, // 0BBE..0BC2
6418 UNKNOWN, // 0BC3..0BC5
6419 TAMIL, // 0BC6..0BC8
6420 UNKNOWN, // 0BC9
6421 TAMIL, // 0BCA..0BCD
6422 UNKNOWN, // 0BCE..0BCF
6423 TAMIL, // 0BD0
6424 UNKNOWN, // 0BD1..0BD6
6425 TAMIL, // 0BD7
6426 UNKNOWN, // 0BD8..0BE5
6427 TAMIL, // 0BE6..0BFA
6428 UNKNOWN, // 0BFB..0BFF
6429 TELUGU, // 0C00..0C03
6430 UNKNOWN, // 0C04
6431 TELUGU, // 0C05..0C0C
6432 UNKNOWN, // 0C0D
6433 TELUGU, // 0C0E..0C10
6434 UNKNOWN, // 0C11
6435 TELUGU, // 0C12..0C28
6436 UNKNOWN, // 0C29
6437 TELUGU, // 0C2A..0C39
6438 UNKNOWN, // 0C3A..0C3C
6439 TELUGU, // 0C3D..0C44
6440 UNKNOWN, // 0C45
6441 TELUGU, // 0C46..0C48
6442 UNKNOWN, // 0C49
6443 TELUGU, // 0C4A..0C4D
6444 UNKNOWN, // 0C4E..0C54
6445 TELUGU, // 0C55..0C56
6446 UNKNOWN, // 0C57
6447 TELUGU, // 0C58..0C5A
6448 UNKNOWN, // 0C5B..0C5F
6449 TELUGU, // 0C60..0C63
6450 UNKNOWN, // 0C64..0C65
6451 TELUGU, // 0C66..0C6F
6452 UNKNOWN, // 0C70..0C77
6453 TELUGU, // 0C78..0C7F
6454 KANNADA, // 0C80..0C83
6455 UNKNOWN, // 0C84
6456 KANNADA, // 0C85..0C8C
6457 UNKNOWN, // 0C8D
6458 KANNADA, // 0C8E..0C90
6459 UNKNOWN, // 0C91
6460 KANNADA, // 0C92..0CA8
6461 UNKNOWN, // 0CA9
6462 KANNADA, // 0CAA..0CB3
6463 UNKNOWN, // 0CB4
6464 KANNADA, // 0CB5..0CB9
6465 UNKNOWN, // 0CBA..0CBB
6466 KANNADA, // 0CBC..0CC4
6467 UNKNOWN, // 0CC5
6468 KANNADA, // 0CC6..0CC8
6469 UNKNOWN, // 0CC9
6470 KANNADA, // 0CCA..0CCD
6471 UNKNOWN, // 0CCE..0CD4
6472 KANNADA, // 0CD5..0CD6
6473 UNKNOWN, // 0CD7..0CDD
6474 KANNADA, // 0CDE
6475 UNKNOWN, // 0CDF
6476 KANNADA, // 0CE0..0CE3
6477 UNKNOWN, // 0CE4..0CE5
6478 KANNADA, // 0CE6..0CEF
6479 UNKNOWN, // 0CF0
6480 KANNADA, // 0CF1..0CF2
6481 UNKNOWN, // 0CF3..0CFF
6482 MALAYALAM, // 0D00..0D03
6483 UNKNOWN, // 0D04
6484 MALAYALAM, // 0D05..0D0C
6485 UNKNOWN, // 0D0D
6486 MALAYALAM, // 0D0E..0D10
6487 UNKNOWN, // 0D11
6488 MALAYALAM, // 0D12..0D44
6489 UNKNOWN, // 0D45
6490 MALAYALAM, // 0D46..0D48
6491 UNKNOWN, // 0D49
6492 MALAYALAM, // 0D4A..0D4F
6493 UNKNOWN, // 0D50..0D53
6494 MALAYALAM, // 0D54..0D63
6495 UNKNOWN, // 0D64..0D65
6496 MALAYALAM, // 0D66..0D7F
6497 UNKNOWN, // 0D80..0D81
6498 SINHALA, // 0D82..0D83
6499 UNKNOWN, // 0D84
6500 SINHALA, // 0D85..0D96
6501 UNKNOWN, // 0D97..0D99
6502 SINHALA, // 0D9A..0DB1
6503 UNKNOWN, // 0DB2
6504 SINHALA, // 0DB3..0DBB
6505 UNKNOWN, // 0DBC
6506 SINHALA, // 0DBD
6507 UNKNOWN, // 0DBE..0DBF
6508 SINHALA, // 0DC0..0DC6
6509 UNKNOWN, // 0DC7..0DC9
6510 SINHALA, // 0DCA
6511 UNKNOWN, // 0DCB..0DCE
6512 SINHALA, // 0DCF..0DD4
6513 UNKNOWN, // 0DD5
6514 SINHALA, // 0DD6
6515 UNKNOWN, // 0DD7
6516 SINHALA, // 0DD8..0DDF
6517 UNKNOWN, // 0DE0..0DE5
6518 SINHALA, // 0DE6..0DEF
6519 UNKNOWN, // 0DF0..0DF1
6520 SINHALA, // 0DF2..0DF4
6521 UNKNOWN, // 0DF5..0E00
6522 THAI, // 0E01..0E3A
6523 UNKNOWN, // 0E3B..0E3E
6524 COMMON, // 0E3F
6525 THAI, // 0E40..0E5B
6526 UNKNOWN, // 0E5C..0E80
6527 LAO, // 0E81..0E82
6528 UNKNOWN, // 0E83
6529 LAO, // 0E84
6530 UNKNOWN, // 0E85..0E86
6531 LAO, // 0E87..0E88
6532 UNKNOWN, // 0E89
6533 LAO, // 0E8A
6534 UNKNOWN, // 0E8B..0E8C
6535 LAO, // 0E8D
6536 UNKNOWN, // 0E8E..0E93
6537 LAO, // 0E94..0E97
6538 UNKNOWN, // 0E98
6539 LAO, // 0E99..0E9F
6540 UNKNOWN, // 0EA0
6541 LAO, // 0EA1..0EA3
6542 UNKNOWN, // 0EA4
6543 LAO, // 0EA5
6544 UNKNOWN, // 0EA6
6545 LAO, // 0EA7
6546 UNKNOWN, // 0EA8..0EA9
6547 LAO, // 0EAA..0EAB
6548 UNKNOWN, // 0EAC
6549 LAO, // 0EAD..0EB9
6550 UNKNOWN, // 0EBA
6551 LAO, // 0EBB..0EBD
6552 UNKNOWN, // 0EBE..0EBF
6553 LAO, // 0EC0..0EC4
6554 UNKNOWN, // 0EC5
6555 LAO, // 0EC6
6556 UNKNOWN, // 0EC7
6557 LAO, // 0EC8..0ECD
6558 UNKNOWN, // 0ECE..0ECF
6559 LAO, // 0ED0..0ED9
6560 UNKNOWN, // 0EDA..0EDB
6561 LAO, // 0EDC..0EDF
6562 UNKNOWN, // 0EE0..0EFF
6563 TIBETAN, // 0F00..0F47
6564 UNKNOWN, // 0F48
6565 TIBETAN, // 0F49..0F6C
6566 UNKNOWN, // 0F6D..0F70
6567 TIBETAN, // 0F71..0F97
6568 UNKNOWN, // 0F98
6569 TIBETAN, // 0F99..0FBC
6570 UNKNOWN, // 0FBD
6571 TIBETAN, // 0FBE..0FCC
6572 UNKNOWN, // 0FCD
6573 TIBETAN, // 0FCE..0FD4
6574 COMMON, // 0FD5..0FD8
6575 TIBETAN, // 0FD9..0FDA
6576 UNKNOWN, // 0FDB..FFF
6577 MYANMAR, // 1000..109F
6578 GEORGIAN, // 10A0..10C5
6579 UNKNOWN, // 10C6
6580 GEORGIAN, // 10C7
6581 UNKNOWN, // 10C8..10CC
6582 GEORGIAN, // 10CD
6583 UNKNOWN, // 10CE..10CF
6584 GEORGIAN, // 10D0..10FA
6585 COMMON, // 10FB
6586 GEORGIAN, // 10FC..10FF
6587 HANGUL, // 1100..11FF
6588 ETHIOPIC, // 1200..1248
6589 UNKNOWN, // 1249
6590 ETHIOPIC, // 124A..124D
6591 UNKNOWN, // 124E..124F
6592 ETHIOPIC, // 1250..1256
6593 UNKNOWN, // 1257
6594 ETHIOPIC, // 1258
6595 UNKNOWN, // 1259
6596 ETHIOPIC, // 125A..125D
6597 UNKNOWN, // 125E..125F
6598 ETHIOPIC, // 1260..1288
6599 UNKNOWN, // 1289
6600 ETHIOPIC, // 128A..128D
6601 UNKNOWN, // 128E..128F
6602 ETHIOPIC, // 1290..12B0
6603 UNKNOWN, // 12B1
6604 ETHIOPIC, // 12B2..12B5
6605 UNKNOWN, // 12B6..12B7
6606 ETHIOPIC, // 12B8..12BE
6607 UNKNOWN, // 12BF
6608 ETHIOPIC, // 12C0
6609 UNKNOWN, // 12C1
6610 ETHIOPIC, // 12C2..12C5
6611 UNKNOWN, // 12C6..12C7
6612 ETHIOPIC, // 12C8..12D6
6613 UNKNOWN, // 12D7
6614 ETHIOPIC, // 12D8..1310
6615 UNKNOWN, // 1311
6616 ETHIOPIC, // 1312..1315
6617 UNKNOWN, // 1316..1317
6618 ETHIOPIC, // 1318..135A
6619 UNKNOWN, // 135B..135C
6620 ETHIOPIC, // 135D..137C
6621 UNKNOWN, // 137D..137F
6622 ETHIOPIC, // 1380..1399
6623 UNKNOWN, // 139A..139F
6624 CHEROKEE, // 13A0..13F5
6625 UNKNOWN, // 13F6..13F7
6626 CHEROKEE, // 13F8..13FD
6627 UNKNOWN, // 13FE..13FF
6628 CANADIAN_ABORIGINAL, // 1400..167F
6629 OGHAM, // 1680..169C
6630 UNKNOWN, // 169D..169F
6631 RUNIC, // 16A0..16EA
6632 COMMON, // 16EB..16ED
6633 RUNIC, // 16EE..16F8
6634 UNKNOWN, // 16F9..16FF
6635 TAGALOG, // 1700..170C
6636 UNKNOWN, // 170D
6637 TAGALOG, // 170E..1714
6638 UNKNOWN, // 1715..171F
6639 HANUNOO, // 1720..1734
6640 COMMON, // 1735..1736
6641 UNKNOWN, // 1737..173F
6642 BUHID, // 1740..1753
6643 UNKNOWN, // 1754..175F
6644 TAGBANWA, // 1760..176C
6645 UNKNOWN, // 176D
6646 TAGBANWA, // 176E..1770
6647 UNKNOWN, // 1771
6648 TAGBANWA, // 1772..1773
6649 UNKNOWN, // 1774..177F
6650 KHMER, // 1780..17DD
6651 UNKNOWN, // 17DE..17DF
6652 KHMER, // 17E0..17E9
6653 UNKNOWN, // 17EA..17EF
6654 KHMER, // 17F0..17F9
6655 UNKNOWN, // 17FA..17FF
6656 MONGOLIAN, // 1800..1801
6657 COMMON, // 1802..1803
6658 MONGOLIAN, // 1804
6659 COMMON, // 1805
6660 MONGOLIAN, // 1806..180E
6661 UNKNOWN, // 180F
6662 MONGOLIAN, // 1810..1819
6663 UNKNOWN, // 181A..181F
6664 MONGOLIAN, // 1820..1877
6665 UNKNOWN, // 1878..187F
6666 MONGOLIAN, // 1880..18AA
6667 UNKNOWN, // 18AB..18AF
6668 CANADIAN_ABORIGINAL, // 18B0..18F5
6669 UNKNOWN, // 18F6..18FF
6670 LIMBU, // 1900..191E
6671 UNKNOWN, // 191F
6672 LIMBU, // 1920..192B
6673 UNKNOWN, // 192C..192F
6674 LIMBU, // 1930..193B
6675 UNKNOWN, // 193C..193F
6676 LIMBU, // 1940
6677 UNKNOWN, // 1941..1943
6678 LIMBU, // 1944..194F
6679 TAI_LE, // 1950..196D
6680 UNKNOWN, // 196E..196F
6681 TAI_LE, // 1970..1974
6682 UNKNOWN, // 1975..197F
6683 NEW_TAI_LUE, // 1980..19AB
6684 UNKNOWN, // 19AC..19AF
6685 NEW_TAI_LUE, // 19B0..19C9
6686 UNKNOWN, // 19CA..19CF
6687 NEW_TAI_LUE, // 19D0..19DA
6688 UNKNOWN, // 19DB..19DD
6689 NEW_TAI_LUE, // 19DE..19DF
6690 KHMER, // 19E0..19FF
6691 BUGINESE, // 1A00..1A1B
6692 UNKNOWN, // 1A1C..1A1D
6693 BUGINESE, // 1A1E..1A1F
6694 TAI_THAM, // 1A20..1A5E
6695 UNKNOWN, // 1A5F
6696 TAI_THAM, // 1A60..1A7C
6697 UNKNOWN, // 1A7D..1A7E
6698 TAI_THAM, // 1A7F..1A89
6699 UNKNOWN, // 1A8A..1A8F
6700 TAI_THAM, // 1A90..1A99
6701 UNKNOWN, // 1A9A..1A9F
6702 TAI_THAM, // 1AA0..1AAD
6703 UNKNOWN, // 1AAE..1AAF
6704 INHERITED, // 1AB0..1ABE
6705 UNKNOWN, // 1ABF..1AFF
6706 BALINESE, // 1B00..1B4B
6707 UNKNOWN, // 1B4C..1B4F
6708 BALINESE, // 1B50..1B7C
6709 UNKNOWN, // 1B7D..1B7F
6710 SUNDANESE, // 1B80..1BBF
6711 BATAK, // 1BC0..1BF3
6712 UNKNOWN, // 1BF4..1BFB
6713 BATAK, // 1BFC..1BFF
6714 LEPCHA, // 1C00..1C37
6715 UNKNOWN, // 1C38..1C3A
6716 LEPCHA, // 1C3B..1C49
6717 UNKNOWN, // 1C4A..1C4C
6718 LEPCHA, // 1C4D..1C4F
6719 OL_CHIKI, // 1C50..1C7F
6720 CYRILLIC, // 1C80..1C88
6721 UNKNOWN, // 1C89..1CBF
6722 SUNDANESE, // 1CC0..1CC7
6723 UNKNOWN, // 1CC8..1CCF
6724 INHERITED, // 1CD0..1CD2
6725 COMMON, // 1CD3
6726 INHERITED, // 1CD4..1CE0
6727 COMMON, // 1CE1
6728 INHERITED, // 1CE2..1CE8
6729 COMMON, // 1CE9..1CEC
6730 INHERITED, // 1CED
6731 COMMON, // 1CEE..1CF3
6732 INHERITED, // 1CF4
6733 COMMON, // 1CF5..1CF7
6734 INHERITED, // 1CF8..1CF9
6735 UNKNOWN, // 1CFA..1CFF
6736 LATIN, // 1D00..1D25
6737 GREEK, // 1D26..1D2A
6738 CYRILLIC, // 1D2B
6739 LATIN, // 1D2C..1D5C
6740 GREEK, // 1D5D..1D61
6741 LATIN, // 1D62..1D65
6742 GREEK, // 1D66..1D6A
6743 LATIN, // 1D6B..1D77
6744 CYRILLIC, // 1D78
6745 LATIN, // 1D79..1DBE
6746 GREEK, // 1DBF
6747 INHERITED, // 1DC0..1DF9
6748 UNKNOWN, // 1DFA
6749 INHERITED, // 1DFB..1DFF
6750 LATIN, // 1E00..1EFF
6751 GREEK, // 1F00..1F15
6752 UNKNOWN, // 1F16..1F17
6753 GREEK, // 1F18..1F1D
6754 UNKNOWN, // 1F1E..1F1F
6755 GREEK, // 1F20..1F45
6756 UNKNOWN, // 1F46..1F47
6757 GREEK, // 1F48..1F4D
6758 UNKNOWN, // 1F4E..1F4F
6759 GREEK, // 1F50..1F57
6760 UNKNOWN, // 1F58
6761 GREEK, // 1F59
6762 UNKNOWN, // 1F5A
6763 GREEK, // 1F5B
6764 UNKNOWN, // 1F5C
6765 GREEK, // 1F5D
6766 UNKNOWN, // 1F5E
6767 GREEK, // 1F5F..1F7D
6768 UNKNOWN, // 1F7E..1F7F
6769 GREEK, // 1F80..1FB4
6770 UNKNOWN, // 1FB5
6771 GREEK, // 1FB6..1FC4
6772 UNKNOWN, // 1FC5
6773 GREEK, // 1FC6..1FD3
6774 UNKNOWN, // 1FD4..1FD5
6775 GREEK, // 1FD6..1FDB
6776 UNKNOWN, // 1FDC
6777 GREEK, // 1FDD..1FEF
6778 UNKNOWN, // 1FF0..1FF1
6779 GREEK, // 1FF2..1FF4
6780 UNKNOWN, // 1FF5
6781 GREEK, // 1FF6..1FFE
6782 UNKNOWN, // 1FFF
6783 COMMON, // 2000..200B
6784 INHERITED, // 200C..200D
6785 COMMON, // 200E..2064
6786 UNKNOWN, // 2065
6787 COMMON, // 2066..2070
6788 LATIN, // 2071
6789 UNKNOWN, // 2072..2073
6790 COMMON, // 2074..207E
6791 LATIN, // 207F
6792 COMMON, // 2080..208E
6793 UNKNOWN, // 208F
6794 LATIN, // 2090..209C
6795 UNKNOWN, // 209D..209F
6796 COMMON, // 20A0..20BF
6797 UNKNOWN, // 20C0..20CF
6798 INHERITED, // 20D0..20F0
6799 UNKNOWN, // 20F1..20FF
6800 COMMON, // 2100..2125
6801 GREEK, // 2126
6802 COMMON, // 2127..2129
6803 LATIN, // 212A..212B
6804 COMMON, // 212C..2131
6805 LATIN, // 2132
6806 COMMON, // 2133..214D
6807 LATIN, // 214E
6808 COMMON, // 214F..215F
6809 LATIN, // 2160..2188
6810 COMMON, // 2189..218B
6811 UNKNOWN, // 218C..218F
6812 COMMON, // 2190..2426
6813 UNKNOWN, // 2427..243F
6814 COMMON, // 2440..244A
6815 UNKNOWN, // 244B..245F
6816 COMMON, // 2460..27FF
6817 BRAILLE, // 2800..28FF
6818 COMMON, // 2900..2B73
6819 UNKNOWN, // 2B74..2B75
6820 COMMON, // 2B76..2B95
6821 UNKNOWN, // 2B96..2B97
6822 COMMON, // 2B98..2BB9
6823 UNKNOWN, // 2BBA..2BBC
6824 COMMON, // 2BBD..2BC8
6825 UNKNOWN, // 2BC9
6826 COMMON, // 2BCA..2BD2
6827 UNKNOWN, // 2BD3..2BEB
6828 COMMON, // 2BEC..2BEF
6829 UNKNOWN, // 2BF0..2BFF
6830 GLAGOLITIC, // 2C00..2C2E
6831 UNKNOWN, // 2C2F
6832 GLAGOLITIC, // 2C30..2C5E
6833 UNKNOWN, // 2C5F
6834 LATIN, // 2C60..2C7F
6835 COPTIC, // 2C80..2CF3
6836 UNKNOWN, // 2CF4..2CF8
6837 COPTIC, // 2CF9..2CFF
6838 GEORGIAN, // 2D00..2D25
6839 UNKNOWN, // 2D26
6840 GEORGIAN, // 2D27
6841 UNKNOWN, // 2D28..2D2C
6842 GEORGIAN, // 2D2D
6843 UNKNOWN, // 2D2E..2D2F
6844 TIFINAGH, // 2D30..2D67
6845 UNKNOWN, // 2D68..2D6E
6846 TIFINAGH, // 2D6F..2D70
6847 UNKNOWN, // 2D71..2D7E
6848 TIFINAGH, // 2D7F
6849 ETHIOPIC, // 2D80..2D96
6850 UNKNOWN, // 2D97..2D9F
6851 ETHIOPIC, // 2DA0..2DA6
6852 UNKNOWN, // 2DA7
6853 ETHIOPIC, // 2DA8..2DAE
6854 UNKNOWN, // 2DAF
6855 ETHIOPIC, // 2DB0..2DB6
6856 UNKNOWN, // 2DB7
6857 ETHIOPIC, // 2DB8..2DBE
6858 UNKNOWN, // 2DBF
6859 ETHIOPIC, // 2DC0..2DC6
6860 UNKNOWN, // 2DC7
6861 ETHIOPIC, // 2DC8..2DCE
6862 UNKNOWN, // 2DCF
6863 ETHIOPIC, // 2DD0..2DD6
6864 UNKNOWN, // 2DD7
6865 ETHIOPIC, // 2DD8..2DDE
6866 UNKNOWN, // 2DDF
6867 CYRILLIC, // 2DE0..2DFF
6868 COMMON, // 2E00..2E49
6869 UNKNOWN, // 2E50..2E7F
6870 HAN, // 2E80..2E99
6871 UNKNOWN, // 2E9A
6872 HAN, // 2E9B..2EF3
6873 UNKNOWN, // 2EF4..2EFF
6874 HAN, // 2F00..2FD5
6875 UNKNOWN, // 2FD6..2FEF
6876 COMMON, // 2FF0..2FFB
6877 UNKNOWN, // 2FFC..2FFF
6878 COMMON, // 3000..3004
6879 HAN, // 3005
6880 COMMON, // 3006
6881 HAN, // 3007
6882 COMMON, // 3008..3020
6883 HAN, // 3021..3029
6884 INHERITED, // 302A..302D
6885 HANGUL, // 302E..302F
6886 COMMON, // 3030..3037
6887 HAN, // 3038..303B
6888 COMMON, // 303C..303F
6889 UNKNOWN, // 3040
6890 HIRAGANA, // 3041..3096
6891 UNKNOWN, // 3097..3098
6892 INHERITED, // 3099..309A
6893 COMMON, // 309B..309C
6894 HIRAGANA, // 309D..309F
6895 COMMON, // 30A0
6896 KATAKANA, // 30A1..30FA
6897 COMMON, // 30FB..30FC
6898 KATAKANA, // 30FD..30FF
6899 UNKNOWN, // 3100..3104
6900 BOPOMOFO, // 3105..312E
6901 UNKNOWN, // 312F..3130
6902 HANGUL, // 3131..318E
6903 UNKNOWN, // 318F
6904 COMMON, // 3190..319F
6905 BOPOMOFO, // 31A0..31BA
6906 UNKNOWN, // 31BB..31BF
6907 COMMON, // 31C0..31E3
6908 UNKNOWN, // 31E4..31EF
6909 KATAKANA, // 31F0..31FF
6910 HANGUL, // 3200..321E
6911 UNKNOWN, // 321F
6912 COMMON, // 3220..325F
6913 HANGUL, // 3260..327E
6914 COMMON, // 327F..32CF
6915 KATAKANA, // 32D0..32FE
6916 COMMON, // 32FF
6917 KATAKANA, // 3300..3357
6918 COMMON, // 3358..33FF
6919 HAN, // 3400..4DB5
6920 UNKNOWN, // 4DB6..4DBF
6921 COMMON, // 4DC0..4DFF
6922 HAN, // 4E00..9FEA
6923 UNKNOWN, // 9FEB..9FFF
6924 YI, // A000..A48C
6925 UNKNOWN, // A48D..A48F
6926 YI, // A490..A4C6
6927 UNKNOWN, // A4C7..A4CF
6928 LISU, // A4D0..A4FF
6929 VAI, // A500..A62B
6930 UNKNOWN, // A62C..A63F
6931 CYRILLIC, // A640..A69F
6932 BAMUM, // A6A0..A6F7
6933 UNKNOWN, // A6F8..A6FF
6934 COMMON, // A700..A721
6935 LATIN, // A722..A787
6936 COMMON, // A788..A78A
6937 LATIN, // A78B..A7AE
6938 UNKNOWN, // A7AF
6939 LATIN, // A7B0..A7B7
6940 UNKNOWN, // A7B8..A7F6
6941 LATIN, // A7F7..A7FF
6942 SYLOTI_NAGRI, // A800..A82B
6943 UNKNOWN, // A82C..A82F
6944 COMMON, // A830..A839
6945 UNKNOWN, // A83A..A83F
6946 PHAGS_PA, // A840..A877
6947 UNKNOWN, // A878..A87F
6948 SAURASHTRA, // A880..A8C5
6949 UNKNOWN, // A8C6..A8CD
6950 SAURASHTRA, // A8CE..A8D9
6951 UNKNOWN, // A8DA..A8DF
6952 DEVANAGARI, // A8E0..A8FD
6953 UNKNOWN, // A8FE..A8FF
6954 KAYAH_LI, // A900..A92D
6955 COMMON, // A92E
6956 KAYAH_LI, // A92F
6957 REJANG, // A930..A953
6958 UNKNOWN, // A954..A95E
6959 REJANG, // A95F
6960 HANGUL, // A960..A97C
6961 UNKNOWN, // A97D..A97F
6962 JAVANESE, // A980..A9CD
6963 UNKNOWN, // A9CE
6964 COMMON, // A9CF
6965 JAVANESE, // A9D0..A9D9
6966 UNKNOWN, // A9DA..A9DD
6967 JAVANESE, // A9DE..A9DF
6968 MYANMAR, // A9E0..A9FE
6969 UNKNOWN, // A9FF
6970 CHAM, // AA00..AA36
6971 UNKNOWN, // AA37..AA3F
6972 CHAM, // AA40..AA4D
6973 UNKNOWN, // AA4E..AA4F
6974 CHAM, // AA50..AA59
6975 UNKNOWN, // AA5A..AA5B
6976 CHAM, // AA5C..AA5F
6977 MYANMAR, // AA60..AA7F
6978 TAI_VIET, // AA80..AAC2
6979 UNKNOWN, // AAC3..AADA
6980 TAI_VIET, // AADB..AADF
6981 MEETEI_MAYEK, // AAE0..AAF6
6982 UNKNOWN, // AAF7..AB00
6983 ETHIOPIC, // AB01..AB06
6984 UNKNOWN, // AB07..AB08
6985 ETHIOPIC, // AB09..AB0E
6986 UNKNOWN, // AB0F..AB10
6987 ETHIOPIC, // AB11..AB16
6988 UNKNOWN, // AB17..AB1F
6989 ETHIOPIC, // AB20..AB26
6990 UNKNOWN, // AB27
6991 ETHIOPIC, // AB28..AB2E
6992 UNKNOWN, // AB2F
6993 LATIN, // AB30..AB5A
6994 COMMON, // AB5B
6995 LATIN, // AB5C..AB64
6996 GREEK, // AB65
6997 UNKNOWN, // AB66..AB6F
6998 CHEROKEE, // AB70..ABBF
6999 MEETEI_MAYEK, // ABC0..ABED
7000 UNKNOWN, // ABEE..ABEF
7001 MEETEI_MAYEK, // ABF0..ABF9
7002 UNKNOWN, // ABFA..ABFF
7003 HANGUL, // AC00..D7A3
7004 UNKNOWN, // D7A4..D7AF
7005 HANGUL, // D7B0..D7C6
7006 UNKNOWN, // D7C7..D7CA
7007 HANGUL, // D7CB..D7FB
7008 UNKNOWN, // D7FC..F8FF
7009 HAN, // F900..FA6D
7010 UNKNOWN, // FA6E..FA6F
7011 HAN, // FA70..FAD9
7012 UNKNOWN, // FADA..FAFF
7013 LATIN, // FB00..FB06
7014 UNKNOWN, // FB07..FB12
7015 ARMENIAN, // FB13..FB17
7016 UNKNOWN, // FB18..FB1C
7017 HEBREW, // FB1D..FB36
7018 UNKNOWN, // FB37
7019 HEBREW, // FB38..FB3C
7020 UNKNOWN, // FB3D
7021 HEBREW, // FB3E
7022 UNKNOWN, // FB3F
7023 HEBREW, // FB40..FB41
7024 UNKNOWN, // FB42
7025 HEBREW, // FB43..FB44
7026 UNKNOWN, // FB45
7027 HEBREW, // FB46..FB4F
7028 ARABIC, // FB50..FBC1
7029 UNKNOWN, // FBC2..FBD2
7030 ARABIC, // FBD3..FD3D
7031 COMMON, // FD3E..FD3F
7032 UNKNOWN, // FD40..FD4F
7033 ARABIC, // FD50..FD8F
7034 UNKNOWN, // FD90..FD91
7035 ARABIC, // FD92..FDC7
7036 UNKNOWN, // FDC8..FDEF
7037 ARABIC, // FDF0..FDFD
7038 UNKNOWN, // FDFE..FDFF
7039 INHERITED, // FE00..FE0F
7040 COMMON, // FE10..FE19
7041 UNKNOWN, // FE1A..FE1F
7042 INHERITED, // FE20..FE2D
7043 CYRILLIC, // FE2E..FE2F
7044 COMMON, // FE30..FE52
7045 UNKNOWN, // FE53
7046 COMMON, // FE54..FE66
7047 UNKNOWN, // FE67
7048 COMMON, // FE68..FE6B
7049 UNKNOWN, // FE6C..FE6F
7050 ARABIC, // FE70..FE74
7051 UNKNOWN, // FE75
7052 ARABIC, // FE76..FEFC
7053 UNKNOWN, // FEFD..FEFE
7054 COMMON, // FEFF
7055 UNKNOWN, // FF00
7056 COMMON, // FF01..FF20
7057 LATIN, // FF21..FF3A
7058 COMMON, // FF3B..FF40
7059 LATIN, // FF41..FF5A
7060 COMMON, // FF5B..FF65
7061 KATAKANA, // FF66..FF6F
7062 COMMON, // FF70
7063 KATAKANA, // FF71..FF9D
7064 COMMON, // FF9E..FF9F
7065 HANGUL, // FFA0..FFBE
7066 UNKNOWN, // FFBF..FFC1
7067 HANGUL, // FFC2..FFC7
7068 UNKNOWN, // FFC8..FFC9
7069 HANGUL, // FFCA..FFCF
7070 UNKNOWN, // FFD0..FFD1
7071 HANGUL, // FFD2..FFD7
7072 UNKNOWN, // FFD8..FFD9
7073 HANGUL, // FFDA..FFDC
7074 UNKNOWN, // FFDD..FFDF
7075 COMMON, // FFE0..FFE6
7076 UNKNOWN, // FFE7
7077 COMMON, // FFE8..FFEE
7078 UNKNOWN, // FFEF..FFF8
7079 COMMON, // FFF9..FFFD
7080 UNKNOWN, // FFFE..FFFF
7081 LINEAR_B, // 10000..1000B
7082 UNKNOWN, // 1000C
7083 LINEAR_B, // 1000D..10026
7084 UNKNOWN, // 10027
7085 LINEAR_B, // 10028..1003A
7086 UNKNOWN, // 1003B
7087 LINEAR_B, // 1003C..1003D
7088 UNKNOWN, // 1003E
7089 LINEAR_B, // 1003F..1004D
7090 UNKNOWN, // 1004E..1004F
7091 LINEAR_B, // 10050..1005D
7092 UNKNOWN, // 1005E..1007F
7093 LINEAR_B, // 10080..100FA
7094 UNKNOWN, // 100FB..100FF
7095 COMMON, // 10100..10102
7096 UNKNOWN, // 10103..10106
7097 COMMON, // 10107..10133
7098 UNKNOWN, // 10134..10136
7099 COMMON, // 10137..1013F
7100 GREEK, // 10140..1018E
7101 UNKNOWN, // 1018F
7102 COMMON, // 10190..1019B
7103 UNKNOWN, // 1019C..1019F
7104 GREEK, // 101A0
7105 UNKNOWN, // 101A1..101CF
7106 COMMON, // 101D0..101FC
7107 INHERITED, // 101FD
7108 UNKNOWN, // 101FE..1027F
7109 LYCIAN, // 10280..1029C
7110 UNKNOWN, // 1029D..1029F
7111 CARIAN, // 102A0..102D0
7112 UNKNOWN, // 102D1..102DF
7113 INHERITED, // 102E0
7114 COMMON, // 102E1..102FB
7115 UNKNOWN, // 102FC..102FF
7116 OLD_ITALIC, // 10300..10323
7117 UNKNOWN, // 10324..1032C
7118 OLD_ITALIC, // 1032D..1032F
7119 GOTHIC, // 10330..1034A
7120 UNKNOWN, // 1034B..1034F
7121 OLD_PERMIC, // 10350..1037A
7122 UNKNOWN, // 1037B..1037F
7123 UGARITIC, // 10380..1039D
7124 UNKNOWN, // 1039E
7125 UGARITIC, // 1039F
7126 OLD_PERSIAN, // 103A0..103C3
7127 UNKNOWN, // 103C4..103C7
7128 OLD_PERSIAN, // 103C8..103D5
7129 UNKNOWN, // 103D6..103FF
7130 DESERET, // 10400..1044F
7131 SHAVIAN, // 10450..1047F
7132 OSMANYA, // 10480..1049D
7133 UNKNOWN, // 1049E..1049F
7134 OSMANYA, // 104A0..104A9
7135 UNKNOWN, // 104AA..104AF
7136 OSAGE, // 104B0..104D3;
7137 UNKNOWN, // 104D4..104D7;
7138 OSAGE, // 104D8..104FB;
7139 UNKNOWN, // 104FC..104FF;
7140 ELBASAN, // 10500..10527
7141 UNKNOWN, // 10528..1052F
7142 CAUCASIAN_ALBANIAN, // 10530..10563
7143 UNKNOWN, // 10564..1056E
7144 CAUCASIAN_ALBANIAN, // 1056F
7145 UNKNOWN, // 10570..105FF
7146 LINEAR_A, // 10600..10736
7147 UNKNOWN, // 10737..1073F
7148 LINEAR_A, // 10740..10755
7149 UNKNOWN, // 10756..1075F
7150 LINEAR_A, // 10760..10767
7151 UNKNOWN, // 10768..107FF
7152 CYPRIOT, // 10800..10805
7153 UNKNOWN, // 10806..10807
7154 CYPRIOT, // 10808
7155 UNKNOWN, // 10809
7156 CYPRIOT, // 1080A..10835
7157 UNKNOWN, // 10836
7158 CYPRIOT, // 10837..10838
7159 UNKNOWN, // 10839..1083B
7160 CYPRIOT, // 1083C
7161 UNKNOWN, // 1083D..1083E
7162 CYPRIOT, // 1083F
7163 IMPERIAL_ARAMAIC, // 10840..10855
7164 UNKNOWN, // 10856
7165 IMPERIAL_ARAMAIC, // 10857..1085F
7166 PALMYRENE, // 10860..1087F
7167 NABATAEAN, // 10880..1089E
7168 UNKNOWN, // 1089F..108A6
7169 NABATAEAN, // 108A7..108AF
7170 UNKNOWN, // 108B0..108DF
7171 HATRAN, // 108E0..108F2
7172 UNKNOWN, // 108F3
7173 HATRAN, // 108F4..108F5
7174 UNKNOWN, // 108F6..108FA
7175 HATRAN, // 108FB..108FF
7176 PHOENICIAN, // 10900..1091B
7177 UNKNOWN, // 1091C..1091E
7178 PHOENICIAN, // 1091F
7179 LYDIAN, // 10920..10939
7180 UNKNOWN, // 1093A..1093E
7181 LYDIAN, // 1093F
7182 UNKNOWN, // 10940..1097F
7183 MEROITIC_HIEROGLYPHS, // 10980..1099F
7184 MEROITIC_CURSIVE, // 109A0..109B7
7185 UNKNOWN, // 109B8..109BB
7186 MEROITIC_CURSIVE, // 109BC..109CF
7187 UNKNOWN, // 109D0..109D1
7188 MEROITIC_CURSIVE, // 109D2..109FF
7189 KHAROSHTHI, // 10A00..10A03
7190 UNKNOWN, // 10A04
7191 KHAROSHTHI, // 10A05..10A06
7192 UNKNOWN, // 10A07..10A0B
7193 KHAROSHTHI, // 10A0C..10A13
7194 UNKNOWN, // 10A14
7195 KHAROSHTHI, // 10A15..10A17
7196 UNKNOWN, // 10A18
7197 KHAROSHTHI, // 10A19..10A33
7198 UNKNOWN, // 10A34..10A37
7199 KHAROSHTHI, // 10A38..10A3A
7200 UNKNOWN, // 10A3B..10A3E
7201 KHAROSHTHI, // 10A3F..10A47
7202 UNKNOWN, // 10A48..10A4F
7203 KHAROSHTHI, // 10A50..10A58
7204 UNKNOWN, // 10A59..10A5F
7205 OLD_SOUTH_ARABIAN, // 10A60..10A7F
7206 OLD_NORTH_ARABIAN, // 10A80..10A9F
7207 UNKNOWN, // 10AA0..10ABF
7208 MANICHAEAN, // 10AC0..10AE6
7209 UNKNOWN, // 10AE7..10AEA
7210 MANICHAEAN, // 10AEB..10AF6
7211 UNKNOWN, // 10AF7..10AFF
7212 AVESTAN, // 10B00..10B35
7213 UNKNOWN, // 10B36..10B38
7214 AVESTAN, // 10B39..10B3F
7215 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55
7216 UNKNOWN, // 10B56..10B57
7217 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F
7218 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72
7219 UNKNOWN, // 10B73..10B77
7220 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F
7221 PSALTER_PAHLAVI, // 10B80..10B91
7222 UNKNOWN, // 10B92..10B98
7223 PSALTER_PAHLAVI, // 10B99..10B9C
7224 UNKNOWN, // 10B9D..10BA8
7225 PSALTER_PAHLAVI, // 10BA9..10BAF
7226 UNKNOWN, // 10BB0..10BFF
7227 OLD_TURKIC, // 10C00..10C48
7228 UNKNOWN, // 10C49..10C7F
7229 OLD_HUNGARIAN, // 10C80..10CB2
7230 UNKNOWN, // 10CB3..10CBF
7231 OLD_HUNGARIAN, // 10CC0..10CF2
7232 UNKNOWN, // 10CF3..10CF9
7233 OLD_HUNGARIAN, // 10CFA..10CFF
7234 UNKNOWN, // 10D00..10E5F
7235 ARABIC, // 10E60..10E7E
7236 UNKNOWN, // 10E7F..10FFF
7237 BRAHMI, // 11000..1104D
7238 UNKNOWN, // 1104E..11051
7239 BRAHMI, // 11052..1106F
7240 UNKNOWN, // 11070..1107E
7241 BRAHMI, // 1107F
7242 KAITHI, // 11080..110C1
7243 UNKNOWN, // 110C2..110CF
7244 SORA_SOMPENG, // 110D0..110E8
7245 UNKNOWN, // 110E9..110EF
7246 SORA_SOMPENG, // 110F0..110F9
7247 UNKNOWN, // 110FA..110FF
7248 CHAKMA, // 11100..11134
7249 UNKNOWN, // 11135
7250 CHAKMA, // 11136..11143
7251 UNKNOWN, // 11144..1114F
7252 MAHAJANI, // 11150..11176
7253 UNKNOWN, // 11177..1117F
7254 SHARADA, // 11180..111CD
7255 UNKNOWN, // 111CE..111CF
7256 SHARADA, // 111D0..111DF
7257 UNKNOWN, // 111E0
7258 SINHALA, // 111E1..111F4
7259 UNKNOWN, // 111F5..111FF
7260 KHOJKI, // 11200..11211
7261 UNKNOWN, // 11212
7262 KHOJKI, // 11213..1123E
7263 UNKNOWN, // 1123F..1127F
7264 MULTANI, // 11280..11286
7265 UNKNOWN, // 11287
7266 MULTANI, // 11288
7267 UNKNOWN, // 11289
7268 MULTANI, // 1128A..1128D
7269 UNKNOWN, // 1128E
7270 MULTANI, // 1128F..1129D
7271 UNKNOWN, // 1129E
7272 MULTANI, // 1129F..112A9
7273 UNKNOWN, // 112AA..112AF
7274 KHUDAWADI, // 112B0..112EA
7275 UNKNOWN, // 112EB..112EF
7276 KHUDAWADI, // 112F0..112F9
7277 UNKNOWN, // 112FA..112FF
7278 GRANTHA, // 11300..11303
7279 UNKNOWN, // 11304
7280 GRANTHA, // 11305..1130C
7281 UNKNOWN, // 1130D..1130E
7282 GRANTHA, // 1130F..11310
7283 UNKNOWN, // 11311..11312
7284 GRANTHA, // 11313..11328
7285 UNKNOWN, // 11329
7286 GRANTHA, // 1132A..11330
7287 UNKNOWN, // 11331
7288 GRANTHA, // 11332..11333
7289 UNKNOWN, // 11334
7290 GRANTHA, // 11335..11339
7291 UNKNOWN, // 1133A..1133B
7292 GRANTHA, // 1133C..11344
7293 UNKNOWN, // 11345..11346
7294 GRANTHA, // 11347..11348
7295 UNKNOWN, // 11349..1134A
7296 GRANTHA, // 1134B..1134D
7297 UNKNOWN, // 1134E..1134F
7298 GRANTHA, // 11350
7299 UNKNOWN, // 11351..11356
7300 GRANTHA, // 11357
7301 UNKNOWN, // 11358..1135C
7302 GRANTHA, // 1135D..11363
7303 UNKNOWN, // 11364..11365
7304 GRANTHA, // 11366..1136C
7305 UNKNOWN, // 1136D..1136F
7306 GRANTHA, // 11370..11374
7307 UNKNOWN, // 11375..113FF
7308 NEWA, // 11400..11459
7309 UNKNOWN, // 1145A
7310 NEWA, // 1145B
7311 UNKNOWN, // 1145C
7312 NEWA, // 1145D
7313 UNKNOWN, // 1145E..1147F
7314 TIRHUTA, // 11480..114C7
7315 UNKNOWN, // 114C8..114CF
7316 TIRHUTA, // 114D0..114D9
7317 UNKNOWN, // 114DA..1157F
7318 SIDDHAM, // 11580..115B5
7319 UNKNOWN, // 115B6..115B7
7320 SIDDHAM, // 115B8..115DD
7321 UNKNOWN, // 115DE..115FF
7322 MODI, // 11600..11644
7323 UNKNOWN, // 11645..1164F
7324 MODI, // 11650..11659
7325 UNKNOWN, // 1165A..1165F
7326 MONGOLIAN, // 11660..1166C
7327 UNKNOWN, // 1166D..1167F
7328 TAKRI, // 11680..116B7
7329 UNKNOWN, // 116B8..116BF
7330 TAKRI, // 116C0..116C9
7331 UNKNOWN, // 116CA..116FF
7332 AHOM, // 11700..11719
7333 UNKNOWN, // 1171A..1171C
7334 AHOM, // 1171D..1172B
7335 UNKNOWN, // 1172C..1172F
7336 AHOM, // 11730..1173F
7337 UNKNOWN, // 11740..1189F
7338 WARANG_CITI, // 118A0..118F2
7339 UNKNOWN, // 118F3..118FE
7340 WARANG_CITI, // 118FF
7341 UNKNOWN, // 11900..119FF
7342 ZANABAZAR_SQUARE, // 11A00..11A47
7343 UNKNOWN, // 11A48..11A4F
7344 SOYOMBO, // 11A50..11A83
7345 UNKNOWN, // 11A84..11A85
7346 SOYOMBO, // 11A86..11A9C
7347 UNKNOWN, // 11A9D
7348 SOYOMBO, // 11A9E..11AA2
7349 UNKNOWN, // 11AA3..11ABF
7350 PAU_CIN_HAU, // 11AC0..11AF8
7351 UNKNOWN, // 11AF9..11BFF
7352 BHAIKSUKI, // 11C00..11C08
7353 UNKNOWN, // 11C09
7354 BHAIKSUKI, // 11C0A..11C36
7355 UNKNOWN, // 11C37
7356 BHAIKSUKI, // 11C38..11C45
7357 UNKNOWN, // 11C46..11C49
7358 BHAIKSUKI, // 11C50..11C6C
7359 UNKNOWN, // 11C6D..11C6F
7360 MARCHEN, // 11C70..11C8F
7361 UNKNOWN, // 11C90..11C91
7362 MARCHEN, // 11C92..11CA7
7363 UNKNOWN, // 11CA8
7364 MARCHEN, // 11CA9..11CB6
7365 UNKNOWN, // 11CB7..11CFF
7366 MASARAM_GONDI, // 11D00..11D06
7367 UNKNOWN, // 11D07
7368 MASARAM_GONDI, // 11D08..11D09
7369 UNKNOWN, // 11D0A
7370 MASARAM_GONDI, // 11D0B..11D36
7371 UNKNOWN, // 11D37..11D39
7372 MASARAM_GONDI, // 11D3A
7373 UNKNOWN, // 11D3B
7374 MASARAM_GONDI, // 11D3C..11D3D
7375 UNKNOWN, // 11D3E
7376 MASARAM_GONDI, // 11D3F..11D47
7377 UNKNOWN, // 11D48..11D49
7378 MASARAM_GONDI, // 11D50..11D59
7379 UNKNOWN, // 11D5A..1AFFF;
7380 CUNEIFORM, // 12000..12399
7381 UNKNOWN, // 1239A..123FF
7382 CUNEIFORM, // 12400..1246E
7383 UNKNOWN, // 1246F
7384 CUNEIFORM, // 12470..12474
7385 UNKNOWN, // 12475..1247F
7386 CUNEIFORM, // 12480..12543
7387 UNKNOWN, // 12544..12FFF
7388 EGYPTIAN_HIEROGLYPHS, // 13000..1342E
7389 UNKNOWN, // 1342F..143FF
7390 ANATOLIAN_HIEROGLYPHS, // 14400..14646
7391 UNKNOWN, // 14647..167FF
7392 BAMUM, // 16800..16A38
7393 UNKNOWN, // 16A39..16A3F
7394 MRO, // 16A40..16A5E
7395 UNKNOWN, // 16A5F
7396 MRO, // 16A60..16A69
7397 UNKNOWN, // 16A6A..16A6D
7398 MRO, // 16A6E..16A6F
7399 UNKNOWN, // 16A70..16ACF
7400 BASSA_VAH, // 16AD0..16AED
7401 UNKNOWN, // 16AEE..16AEF
7402 BASSA_VAH, // 16AF0..16AF5
7403 UNKNOWN, // 16AF6..16AFF
7404 PAHAWH_HMONG, // 16B00..16B45
7405 UNKNOWN, // 16B46..16B4F
7406 PAHAWH_HMONG, // 16B50..16B59
7407 UNKNOWN, // 16B5A
7408 PAHAWH_HMONG, // 16B5B..16B61
7409 UNKNOWN, // 16B62
7410 PAHAWH_HMONG, // 16B63..16B77
7411 UNKNOWN, // 16B78..16B7C
7412 PAHAWH_HMONG, // 16B7D..16B8F
7413 UNKNOWN, // 16B90..16EFF
7414 MIAO, // 16F00..16F44
7415 UNKNOWN, // 16F45..16F4F
7416 MIAO, // 16F50..16F7E
7417 UNKNOWN, // 16F7F..16F8E
7418 MIAO, // 16F8F..16F9F
7419 UNKNOWN, // 16FA0..16FDF
7420 TANGUT, // 16FE0
7421 NUSHU, // 16FE1
7422 UNKNOWN, // 16FE2..16FFF
7423 TANGUT, // 17000..187EC
7424 UNKNOWN, // 187ED..187FF
7425 TANGUT, // 18800..18AF2
7426 UNKNOWN, // 18AF3..1AFFF
7427 KATAKANA, // 1B000
7428 HIRAGANA, // 1B001..1B11E
7429 UNKNOWN, // 1B11F..1B16F
7430 NUSHU, // 1B170..1B2FB
7431 UNKNOWN, // 1B2FC..1BBFF
7432 DUPLOYAN, // 1BC00..1BC6A
7433 UNKNOWN, // 1BC6B..1BC6F
7434 DUPLOYAN, // 1BC70..1BC7C
7435 UNKNOWN, // 1BC7D..1BC7F
7436 DUPLOYAN, // 1BC80..1BC88
7437 UNKNOWN, // 1BC89..1BC8F
7438 DUPLOYAN, // 1BC90..1BC99
7439 UNKNOWN, // 1BC9A..1BC9B
7440 DUPLOYAN, // 1BC9C..1BC9F
7441 COMMON, // 1BCA0..1BCA3
7442 UNKNOWN, // 1BCA4..1CFFF
7443 COMMON, // 1D000..1D0F5
7444 UNKNOWN, // 1D0F6..1D0FF
7445 COMMON, // 1D100..1D126
7446 UNKNOWN, // 1D127..1D128
7447 COMMON, // 1D129..1D166
7448 INHERITED, // 1D167..1D169
7449 COMMON, // 1D16A..1D17A
7450 INHERITED, // 1D17B..1D182
7451 COMMON, // 1D183..1D184
7452 INHERITED, // 1D185..1D18B
7453 COMMON, // 1D18C..1D1A9
7454 INHERITED, // 1D1AA..1D1AD
7455 COMMON, // 1D1AE..1D1E8
7456 UNKNOWN, // 1D1E9..1D1FF
7457 GREEK, // 1D200..1D245
7458 UNKNOWN, // 1D246..1D2FF
7459 COMMON, // 1D300..1D356
7460 UNKNOWN, // 1D357..1D35F
7461 COMMON, // 1D360..1D371
7462 UNKNOWN, // 1D372..1D3FF
7463 COMMON, // 1D400..1D454
7464 UNKNOWN, // 1D455
7465 COMMON, // 1D456..1D49C
7466 UNKNOWN, // 1D49D
7467 COMMON, // 1D49E..1D49F
7468 UNKNOWN, // 1D4A0..1D4A1
7469 COMMON, // 1D4A2
7470 UNKNOWN, // 1D4A3..1D4A4
7471 COMMON, // 1D4A5..1D4A6
7472 UNKNOWN, // 1D4A7..1D4A8
7473 COMMON, // 1D4A9..1D4AC
7474 UNKNOWN, // 1D4AD
7475 COMMON, // 1D4AE..1D4B9
7476 UNKNOWN, // 1D4BA
7477 COMMON, // 1D4BB
7478 UNKNOWN, // 1D4BC
7479 COMMON, // 1D4BD..1D4C3
7480 UNKNOWN, // 1D4C4
7481 COMMON, // 1D4C5..1D505
7482 UNKNOWN, // 1D506
7483 COMMON, // 1D507..1D50A
7484 UNKNOWN, // 1D50B..1D50C
7485 COMMON, // 1D50D..1D514
7486 UNKNOWN, // 1D515
7487 COMMON, // 1D516..1D51C
7488 UNKNOWN, // 1D51D
7489 COMMON, // 1D51E..1D539
7490 UNKNOWN, // 1D53A
7491 COMMON, // 1D53B..1D53E
7492 UNKNOWN, // 1D53F
7493 COMMON, // 1D540..1D544
7494 UNKNOWN, // 1D545
7495 COMMON, // 1D546
7496 UNKNOWN, // 1D547..1D549
7497 COMMON, // 1D54A..1D550
7498 UNKNOWN, // 1D551
7499 COMMON, // 1D552..1D6A5
7500 UNKNOWN, // 1D6A6..1D6A7
7501 COMMON, // 1D6A8..1D7CB
7502 UNKNOWN, // 1D7CC..1D7CD
7503 COMMON, // 1D7CE..1D7FF
7504 SIGNWRITING, // 1D800..1DA8B
7505 UNKNOWN, // 1DA8C..1DA9A
7506 SIGNWRITING, // 1DA9B..1DA9F
7507 UNKNOWN, // 1DAA0
7508 SIGNWRITING, // 1DAA1..1DAAF
7509 UNKNOWN, // 1DAB0..1DFFF
7510 GLAGOLITIC, // 1E000..1E006
7511 UNKNOWN, // 1E007
7512 GLAGOLITIC, // 1E008..1E018
7513 UNKNOWN, // 1E019..1E01A
7514 GLAGOLITIC, // 1E01B..1E021
7515 UNKNOWN, // 1E022
7516 GLAGOLITIC, // 1E023..1E024
7517 UNKNOWN, // 1E025
7518 GLAGOLITIC, // 1E026..1E02A
7519 UNKNOWN, // 1E02B..1E7FF
7520 MENDE_KIKAKUI, // 1E800..1E8C4
7521 UNKNOWN, // 1E8C5..1E8C6
7522 MENDE_KIKAKUI, // 1E8C7..1E8D6
7523 UNKNOWN, // 1E8D7..1E8FF
7524 ADLAM, // 1E900..1E94A
7525 UNKNOWN, // 1E94B..1E94F
7526 ADLAM, // 1E950..1E959
7527 UNKNOWN, // 1E95A..1E95D
7528 ADLAM, // 1E95E..1E95F
7529 UNKNOWN, // 1E960..1EDFF
7530 ARABIC, // 1EE00..1EE03
7531 UNKNOWN, // 1EE04
7532 ARABIC, // 1EE05..1EE1F
7533 UNKNOWN, // 1EE20
7534 ARABIC, // 1EE21..1EE22
7535 UNKNOWN, // 1EE23
7536 ARABIC, // 1EE24
7537 UNKNOWN, // 1EE25..1EE26
7538 ARABIC, // 1EE27
7539 UNKNOWN, // 1EE28
7540 ARABIC, // 1EE29..1EE32
7541 UNKNOWN, // 1EE33
7542 ARABIC, // 1EE34..1EE37
7543 UNKNOWN, // 1EE38
7544 ARABIC, // 1EE39
7545 UNKNOWN, // 1EE3A
7546 ARABIC, // 1EE3B
7547 UNKNOWN, // 1EE3C..1EE41
7548 ARABIC, // 1EE42
7549 UNKNOWN, // 1EE43..1EE46
7550 ARABIC, // 1EE47
7551 UNKNOWN, // 1EE48
7552 ARABIC, // 1EE49
7553 UNKNOWN, // 1EE4A
7554 ARABIC, // 1EE4B
7555 UNKNOWN, // 1EE4C
7556 ARABIC, // 1EE4D..1EE4F
7557 UNKNOWN, // 1EE50
7558 ARABIC, // 1EE51..1EE52
7559 UNKNOWN, // 1EE53
7560 ARABIC, // 1EE54
7561 UNKNOWN, // 1EE55..1EE56
7562 ARABIC, // 1EE57
7563 UNKNOWN, // 1EE58
7564 ARABIC, // 1EE59
7565 UNKNOWN, // 1EE5A
7566 ARABIC, // 1EE5B
7567 UNKNOWN, // 1EE5C
7568 ARABIC, // 1EE5D
7569 UNKNOWN, // 1EE5E
7570 ARABIC, // 1EE5F
7571 UNKNOWN, // 1EE60
7572 ARABIC, // 1EE61..1EE62
7573 UNKNOWN, // 1EE63
7574 ARABIC, // 1EE64
7575 UNKNOWN, // 1EE65..1EE66
7576 ARABIC, // 1EE67..1EE6A
7577 UNKNOWN, // 1EE6B
7578 ARABIC, // 1EE6C..1EE72
7579 UNKNOWN, // 1EE73
7580 ARABIC, // 1EE74..1EE77
7581 UNKNOWN, // 1EE78
7582 ARABIC, // 1EE79..1EE7C
7583 UNKNOWN, // 1EE7D
7584 ARABIC, // 1EE7E
7585 UNKNOWN, // 1EE7F
7586 ARABIC, // 1EE80..1EE89
7587 UNKNOWN, // 1EE8A
7588 ARABIC, // 1EE8B..1EE9B
7589 UNKNOWN, // 1EE9C..1EEA0
7590 ARABIC, // 1EEA1..1EEA3
7591 UNKNOWN, // 1EEA4
7592 ARABIC, // 1EEA5..1EEA9
7593 UNKNOWN, // 1EEAA
7594 ARABIC, // 1EEAB..1EEBB
7595 UNKNOWN, // 1EEBC..1EEEF
7596 ARABIC, // 1EEF0..1EEF1
7597 UNKNOWN, // 1EEF2..1EFFF
7598 COMMON, // 1F000..1F02B
7599 UNKNOWN, // 1F02C..1F02F
7600 COMMON, // 1F030..1F093
7601 UNKNOWN, // 1F094..1F09F
7602 COMMON, // 1F0A0..1F0AE
7603 UNKNOWN, // 1F0AF..1F0B0
7604 COMMON, // 1F0B1..1F0BF
7605 UNKNOWN, // 1F0C0
7606 COMMON, // 1F0C1..1F0CF
7607 UNKNOWN, // 1F0D0
7608 COMMON, // 1F0D1..1F0F5
7609 UNKNOWN, // 1F0F6..1F0FF
7610 COMMON, // 1F100..1F10C
7611 UNKNOWN, // 1F10D..1F10F
7612 COMMON, // 1F110..1F12E
7613 UNKNOWN, // 1F12F
7614 COMMON, // 1F130..1F16B
7615 UNKNOWN, // 1F16C..1F16F
7616 COMMON, // 1F170..1F1AC
7617 UNKNOWN, // 1F1AD..1F1E5
7618 COMMON, // 1F1E6..1F1FF
7619 HIRAGANA, // 1F200
7620 COMMON, // 1F201..1F202
7621 UNKNOWN, // 1F203..1F20F
7622 COMMON, // 1F210..1F23B
7623 UNKNOWN, // 1F23C..1F23F
7624 COMMON, // 1F240..1F248
7625 UNKNOWN, // 1F249..1F24F
7626 COMMON, // 1F250..1F251
7627 UNKNOWN, // 1F252..1F25F
7628 COMMON, // 1F260..1F265
7629 UNKNOWN, // 1F266..1F2FF
7630 COMMON, // 1F300..1F6D4
7631 UNKNOWN, // 1F6D5..1F6DF
7632 COMMON, // 1F6E0..1F6EC
7633 UNKNOWN, // 1F6ED..1F6EF
7634 COMMON, // 1F6F0..1F6F8
7635 UNKNOWN, // 1F6F9..1F6FF
7636 COMMON, // 1F700..1F773
7637 UNKNOWN, // 1F774..1F77F
7638 COMMON, // 1F780..1F7D4
7639 UNKNOWN, // 1F7D5..1F7FF
7640 COMMON, // 1F800..1F80B
7641 UNKNOWN, // 1F80C..1F80F
7642 COMMON, // 1F810..1F847
7643 UNKNOWN, // 1F848..1F84F
7644 COMMON, // 1F850..1F859
7645 UNKNOWN, // 1F85A..1F85F
7646 COMMON, // 1F860..1F887
7647 UNKNOWN, // 1F888..1F88F
7648 COMMON, // 1F890..1F8AD
7649 UNKNOWN, // 1F8AE..1F8FF
7650 COMMON, // 1F900..1F90B
7651 UNKNOWN, // 1F90C..1F90F
7652 COMMON, // 1F910..1F93E
7653 UNKNOWN, // 1F93F
7654 COMMON, // 1F940..1F94C
7655 UNKNOWN, // 1F94D..1F94F
7656 COMMON, // 1F950..1F96B
7657 UNKNOWN, // 1F96C..1F97F
7658 COMMON, // 1F980..1F997
7659 UNKNOWN, // 1F998..1F9BF
7660 COMMON, // 1F9C0
7661 UNKNOWN, // 1F9C1..1F9CF
7662 COMMON, // 1F9D0..1F9E6
7663 UNKNOWN, // 1F9E7..1FFFF
7664 HAN, // 20000..2A6D6
7665 UNKNOWN, // 2A6D7..2A6FF
7666 HAN, // 2A700..2B734
7667 UNKNOWN, // 2B735..2B73F
7668 HAN, // 2B740..2B81D
7669 UNKNOWN, // 2B81E..2B81F
7670 HAN, // 2B820..2CEA1
7671 UNKNOWN, // 2CEA2..2CEAF
7672 HAN, // 2CEB0..2EBE0
7673 UNKNOWN, // 2EBE1..2F7FF
7674 HAN, // 2F800..2FA1D
7675 UNKNOWN, // 2FA1E..E0000
7676 COMMON, // E0001
7677 UNKNOWN, // E0002..E001F
7678 COMMON, // E0020..E007F
7679 UNKNOWN, // E0080..E00FF
7680 INHERITED, // E0100..E01EF
7681 UNKNOWN // E01F0..10FFFF
7682 };
7683
7684 private static HashMap<String, Character.UnicodeScript> aliases;
7685 static {
7686 aliases = new HashMap<>((int)(142 / 0.75f + 1.0f));
7687 aliases.put("ADLM", ADLAM);
7688 aliases.put("AGHB", CAUCASIAN_ALBANIAN);
7689 aliases.put("AHOM", AHOM);
7690 aliases.put("ARAB", ARABIC);
7691 aliases.put("ARMI", IMPERIAL_ARAMAIC);
7692 aliases.put("ARMN", ARMENIAN);
7693 aliases.put("AVST", AVESTAN);
7694 aliases.put("BALI", BALINESE);
7695 aliases.put("BAMU", BAMUM);
7696 aliases.put("BASS", BASSA_VAH);
7697 aliases.put("BATK", BATAK);
7698 aliases.put("BENG", BENGALI);
7699 aliases.put("BHKS", BHAIKSUKI);
7700 aliases.put("BOPO", BOPOMOFO);
7701 aliases.put("BRAH", BRAHMI);
7702 aliases.put("BRAI", BRAILLE);
7703 aliases.put("BUGI", BUGINESE);
7704 aliases.put("BUHD", BUHID);
7705 aliases.put("CAKM", CHAKMA);
7706 aliases.put("CANS", CANADIAN_ABORIGINAL);
7707 aliases.put("CARI", CARIAN);
7708 aliases.put("CHAM", CHAM);
7709 aliases.put("CHER", CHEROKEE);
7710 aliases.put("COPT", COPTIC);
7711 aliases.put("CPRT", CYPRIOT);
7712 aliases.put("CYRL", CYRILLIC);
7713 aliases.put("DEVA", DEVANAGARI);
7714 aliases.put("DSRT", DESERET);
7715 aliases.put("DUPL", DUPLOYAN);
7716 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
7717 aliases.put("ELBA", ELBASAN);
7718 aliases.put("ETHI", ETHIOPIC);
7719 aliases.put("GEOR", GEORGIAN);
7720 aliases.put("GLAG", GLAGOLITIC);
7721 aliases.put("GONM", MASARAM_GONDI);
7722 aliases.put("GOTH", GOTHIC);
7723 aliases.put("GRAN", GRANTHA);
7724 aliases.put("GREK", GREEK);
7725 aliases.put("GUJR", GUJARATI);
7726 aliases.put("GURU", GURMUKHI);
7727 aliases.put("HANG", HANGUL);
7728 aliases.put("HANI", HAN);
7729 aliases.put("HANO", HANUNOO);
7730 aliases.put("HATR", HATRAN);
7731 aliases.put("HEBR", HEBREW);
7732 aliases.put("HIRA", HIRAGANA);
7733 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
7734 aliases.put("HMNG", PAHAWH_HMONG);
7735 // it appears we don't have the KATAKANA_OR_HIRAGANA
7736 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
7737 aliases.put("HUNG", OLD_HUNGARIAN);
7738 aliases.put("ITAL", OLD_ITALIC);
7739 aliases.put("JAVA", JAVANESE);
7740 aliases.put("KALI", KAYAH_LI);
7741 aliases.put("KANA", KATAKANA);
7742 aliases.put("KHAR", KHAROSHTHI);
7743 aliases.put("KHMR", KHMER);
7744 aliases.put("KHOJ", KHOJKI);
7745 aliases.put("KNDA", KANNADA);
7746 aliases.put("KTHI", KAITHI);
7747 aliases.put("LANA", TAI_THAM);
7748 aliases.put("LAOO", LAO);
7749 aliases.put("LATN", LATIN);
7750 aliases.put("LEPC", LEPCHA);
7751 aliases.put("LIMB", LIMBU);
7752 aliases.put("LINA", LINEAR_A);
7753 aliases.put("LINB", LINEAR_B);
7754 aliases.put("LISU", LISU);
7755 aliases.put("LYCI", LYCIAN);
7756 aliases.put("LYDI", LYDIAN);
7757 aliases.put("MAHJ", MAHAJANI);
7758 aliases.put("MARC", MARCHEN);
7759 aliases.put("MAND", MANDAIC);
7760 aliases.put("MANI", MANICHAEAN);
7761 aliases.put("MEND", MENDE_KIKAKUI);
7762 aliases.put("MERC", MEROITIC_CURSIVE);
7763 aliases.put("MERO", MEROITIC_HIEROGLYPHS);
7764 aliases.put("MLYM", MALAYALAM);
7765 aliases.put("MODI", MODI);
7766 aliases.put("MONG", MONGOLIAN);
7767 aliases.put("MROO", MRO);
7768 aliases.put("MTEI", MEETEI_MAYEK);
7769 aliases.put("MULT", MULTANI);
7770 aliases.put("MYMR", MYANMAR);
7771 aliases.put("NARB", OLD_NORTH_ARABIAN);
7772 aliases.put("NBAT", NABATAEAN);
7773 aliases.put("NEWA", NEWA);
7774 aliases.put("NKOO", NKO);
7775 aliases.put("NSHU", NUSHU);
7776 aliases.put("OGAM", OGHAM);
7777 aliases.put("OLCK", OL_CHIKI);
7778 aliases.put("ORKH", OLD_TURKIC);
7779 aliases.put("ORYA", ORIYA);
7780 aliases.put("OSGE", OSAGE);
7781 aliases.put("OSMA", OSMANYA);
7782 aliases.put("PALM", PALMYRENE);
7783 aliases.put("PAUC", PAU_CIN_HAU);
7784 aliases.put("PERM", OLD_PERMIC);
7785 aliases.put("PHAG", PHAGS_PA);
7786 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
7787 aliases.put("PHLP", PSALTER_PAHLAVI);
7788 aliases.put("PHNX", PHOENICIAN);
7789 aliases.put("PLRD", MIAO);
7790 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
7791 aliases.put("RJNG", REJANG);
7792 aliases.put("RUNR", RUNIC);
7793 aliases.put("SAMR", SAMARITAN);
7794 aliases.put("SARB", OLD_SOUTH_ARABIAN);
7795 aliases.put("SAUR", SAURASHTRA);
7796 aliases.put("SGNW", SIGNWRITING);
7797 aliases.put("SHAW", SHAVIAN);
7798 aliases.put("SHRD", SHARADA);
7799 aliases.put("SIDD", SIDDHAM);
7800 aliases.put("SIND", KHUDAWADI);
7801 aliases.put("SINH", SINHALA);
7802 aliases.put("SORA", SORA_SOMPENG);
7803 aliases.put("SOYO", SOYOMBO);
7804 aliases.put("SUND", SUNDANESE);
7805 aliases.put("SYLO", SYLOTI_NAGRI);
7806 aliases.put("SYRC", SYRIAC);
7807 aliases.put("TAGB", TAGBANWA);
7808 aliases.put("TAKR", TAKRI);
7809 aliases.put("TALE", TAI_LE);
7810 aliases.put("TALU", NEW_TAI_LUE);
7811 aliases.put("TAML", TAMIL);
7812 aliases.put("TANG", TANGUT);
7813 aliases.put("TAVT", TAI_VIET);
7814 aliases.put("TELU", TELUGU);
7815 aliases.put("TFNG", TIFINAGH);
7816 aliases.put("TGLG", TAGALOG);
7817 aliases.put("THAA", THAANA);
7818 aliases.put("THAI", THAI);
7819 aliases.put("TIBT", TIBETAN);
7820 aliases.put("TIRH", TIRHUTA);
7821 aliases.put("UGAR", UGARITIC);
7822 aliases.put("VAII", VAI);
7823 aliases.put("WARA", WARANG_CITI);
7824 aliases.put("XPEO", OLD_PERSIAN);
7825 aliases.put("XSUX", CUNEIFORM);
7826 aliases.put("YIII", YI);
7827 aliases.put("ZANB", ZANABAZAR_SQUARE);
7828 aliases.put("ZINH", INHERITED);
7829 aliases.put("ZYYY", COMMON);
7830 aliases.put("ZZZZ", UNKNOWN);
7831 }
7832
7833 /**
7834 * Returns the enum constant representing the Unicode script of which
7835 * the given character (Unicode code point) is assigned to.
7836 *
7837 * @param codePoint the character (Unicode code point) in question.
7838 * @return The {@code UnicodeScript} constant representing the
7839 * Unicode script of which this character is assigned to.
7840 *
7841 * @throws IllegalArgumentException if the specified
7842 * {@code codePoint} is an invalid Unicode code point.
7843 * @see Character#isValidCodePoint(int)
7844 *
7845 */
7846 public static UnicodeScript of(int codePoint) {
7847 if (!isValidCodePoint(codePoint))
7848 throw new IllegalArgumentException(
7849 String.format("Not a valid Unicode code point: 0x%X", codePoint));
7850 int type = getType(codePoint);
7851 // leave SURROGATE and PRIVATE_USE for table lookup
7852 if (type == UNASSIGNED)
7853 return UNKNOWN;
7854 int index = Arrays.binarySearch(scriptStarts, codePoint);
7855 if (index < 0)
7856 index = -index - 2;
7857 return scripts[index];
7858 }
7859
7860 /**
7861 * Returns the UnicodeScript constant with the given Unicode script
7862 * name or the script name alias. Script names and their aliases are
7863 * determined by The Unicode Standard. The files {@code Scripts<version>.txt}
7864 * and {@code PropertyValueAliases<version>.txt} define script names
7865 * and the script name aliases for a particular version of the
7866 * standard. The {@link Character} class specifies the version of
7867 * the standard that it supports.
7868 * <p>
7869 * Character case is ignored for all of the valid script names.
7870 * The en_US locale's case mapping rules are used to provide
7871 * case-insensitive string comparisons for script name validation.
7872 *
7873 * @param scriptName A {@code UnicodeScript} name.
7874 * @return The {@code UnicodeScript} constant identified
7875 * by {@code scriptName}
7876 * @throws IllegalArgumentException if {@code scriptName} is an
7877 * invalid name
7878 * @throws NullPointerException if {@code scriptName} is null
7879 */
7880 public static final UnicodeScript forName(String scriptName) {
7881 scriptName = scriptName.toUpperCase(Locale.ENGLISH);
7882 //.replace(' ', '_'));
7883 UnicodeScript sc = aliases.get(scriptName);
7884 if (sc != null)
7885 return sc;
7886 return valueOf(scriptName);
7887 }
7888 }
7889
7890 /**
7891 * The value of the {@code Character}.
7892 *
7893 * @serial
7894 */
7895 private final char value;
7896
7897 /** use serialVersionUID from JDK 1.0.2 for interoperability */
7898 private static final long serialVersionUID = 3786198910865385080L;
7899
7900 /**
7901 * Constructs a newly allocated {@code Character} object that
7902 * represents the specified {@code char} value.
7903 *
7904 * @param value the value to be represented by the
7905 * {@code Character} object.
7906 *
7907 * @deprecated
7908 * It is rarely appropriate to use this constructor. The static factory
7909 * {@link #valueOf(char)} is generally a better choice, as it is
7910 * likely to yield significantly better space and time performance.
7911 */
7912 @Deprecated(since="9")
7913 public Character(char value) {
7914 this.value = value;
7915 }
7916
7917 private static class CharacterCache {
7918 private CharacterCache(){}
7919
7920 static final Character cache[] = new Character[127 + 1];
7921
7922 static {
7923 for (int i = 0; i < cache.length; i++)
7924 cache[i] = new Character((char)i);
7925 }
7926 }
7927
7928 /**
7929 * Returns a {@code Character} instance representing the specified
7930 * {@code char} value.
7931 * If a new {@code Character} instance is not required, this method
7932 * should generally be used in preference to the constructor
7933 * {@link #Character(char)}, as this method is likely to yield
7934 * significantly better space and time performance by caching
7935 * frequently requested values.
7936 *
7937 * This method will always cache values in the range {@code
7938 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
7939 * cache other values outside of this range.
7940 *
7941 * @param c a char value.
7942 * @return a {@code Character} instance representing {@code c}.
7943 * @since 1.5
7944 */
7945 @HotSpotIntrinsicCandidate
7946 public static Character valueOf(char c) {
7947 if (c <= 127) { // must cache
7948 return CharacterCache.cache[(int)c];
7949 }
7950 return new Character(c);
7951 }
7952
7953 /**
7954 * Returns the value of this {@code Character} object.
7955 * @return the primitive {@code char} value represented by
7956 * this object.
7957 */
7958 @HotSpotIntrinsicCandidate
7959 public char charValue() {
7960 return value;
7961 }
7962
7963 /**
7964 * Returns a hash code for this {@code Character}; equal to the result
7965 * of invoking {@code charValue()}.
7966 *
7967 * @return a hash code value for this {@code Character}
7968 */
7969 @Override
7970 public int hashCode() {
7971 return Character.hashCode(value);
7972 }
7973
7974 /**
7975 * Returns a hash code for a {@code char} value; compatible with
7976 * {@code Character.hashCode()}.
7977 *
7978 * @since 1.8
7979 *
7980 * @param value The {@code char} for which to return a hash code.
7981 * @return a hash code value for a {@code char} value.
7982 */
7983 public static int hashCode(char value) {
7984 return (int)value;
7985 }
7986
7987 /**
7988 * Compares this object against the specified object.
7989 * The result is {@code true} if and only if the argument is not
7990 * {@code null} and is a {@code Character} object that
7991 * represents the same {@code char} value as this object.
7992 *
7993 * @param obj the object to compare with.
7994 * @return {@code true} if the objects are the same;
7995 * {@code false} otherwise.
7996 */
7997 public boolean equals(Object obj) {
7998 if (obj instanceof Character) {
7999 return value == ((Character)obj).charValue();
8000 }
8001 return false;
8002 }
8003
8004 /**
8005 * Returns a {@code String} object representing this
8006 * {@code Character}'s value. The result is a string of
8007 * length 1 whose sole component is the primitive
8008 * {@code char} value represented by this
8009 * {@code Character} object.
8010 *
8011 * @return a string representation of this object.
8012 */
8013 public String toString() {
8014 char buf[] = {value};
8015 return String.valueOf(buf);
8016 }
8017
8018 /**
8019 * Returns a {@code String} object representing the
8020 * specified {@code char}. The result is a string of length
8021 * 1 consisting solely of the specified {@code char}.
8022 *
8023 * @apiNote This method cannot handle <a
8024 * href="#supplementary"> supplementary characters</a>. To support
8025 * all Unicode characters, including supplementary characters, use
8026 * the {@link #toString(int)} method.
8027 *
8028 * @param c the {@code char} to be converted
8029 * @return the string representation of the specified {@code char}
8030 * @since 1.4
8031 */
8032 public static String toString(char c) {
8033 return String.valueOf(c);
8034 }
8035
8036 /**
8037 * Returns a {@code String} object representing the
8038 * specified character (Unicode code point). The result is a string of
8039 * length 1 or 2, consisting solely of the specified {@code codePoint}.
8040 *
8041 * @param codePoint the {@code codePoint} to be converted
8042 * @return the string representation of the specified {@code codePoint}
8043 * @throws IllegalArgumentException if the specified
8044 * {@code codePoint} is not a {@linkplain #isValidCodePoint
8045 * valid Unicode code point}.
8046 * @since 11
8047 */
8048 public static String toString(int codePoint) {
8049 return String.valueOfCodePoint(codePoint);
8050 }
8051
8052 /**
8053 * Determines whether the specified code point is a valid
8054 * <a href="http://www.unicode.org/glossary/#code_point">
8055 * Unicode code point value</a>.
8056 *
8057 * @param codePoint the Unicode code point to be tested
8058 * @return {@code true} if the specified code point value is between
8059 * {@link #MIN_CODE_POINT} and
8060 * {@link #MAX_CODE_POINT} inclusive;
8061 * {@code false} otherwise.
8062 * @since 1.5
8063 */
8064 public static boolean isValidCodePoint(int codePoint) {
8065 // Optimized form of:
8066 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
8067 int plane = codePoint >>> 16;
8068 return plane < ((MAX_CODE_POINT + 1) >>> 16);
8069 }
8070
8071 /**
8072 * Determines whether the specified character (Unicode code point)
8073 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
8074 * Such code points can be represented using a single {@code char}.
8075 *
8076 * @param codePoint the character (Unicode code point) to be tested
8077 * @return {@code true} if the specified code point is between
8078 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
8079 * {@code false} otherwise.
8080 * @since 1.7
8081 */
8082 public static boolean isBmpCodePoint(int codePoint) {
8083 return codePoint >>> 16 == 0;
8084 // Optimized form of:
8085 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
8086 // We consistently use logical shift (>>>) to facilitate
8087 // additional runtime optimizations.
8088 }
8089
8090 /**
8091 * Determines whether the specified character (Unicode code point)
8092 * is in the <a href="#supplementary">supplementary character</a> range.
8093 *
8094 * @param codePoint the character (Unicode code point) to be tested
8095 * @return {@code true} if the specified code point is between
8096 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
8097 * {@link #MAX_CODE_POINT} inclusive;
8098 * {@code false} otherwise.
8099 * @since 1.5
8100 */
8101 public static boolean isSupplementaryCodePoint(int codePoint) {
8102 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
8103 && codePoint < MAX_CODE_POINT + 1;
8104 }
8105
8106 /**
8107 * Determines if the given {@code char} value is a
8108 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8109 * Unicode high-surrogate code unit</a>
8110 * (also known as <i>leading-surrogate code unit</i>).
8111 *
8112 * <p>Such values do not represent characters by themselves,
8113 * but are used in the representation of
8114 * <a href="#supplementary">supplementary characters</a>
8115 * in the UTF-16 encoding.
8116 *
8117 * @param ch the {@code char} value to be tested.
8118 * @return {@code true} if the {@code char} value is between
8119 * {@link #MIN_HIGH_SURROGATE} and
8120 * {@link #MAX_HIGH_SURROGATE} inclusive;
8121 * {@code false} otherwise.
8122 * @see Character#isLowSurrogate(char)
8123 * @see Character.UnicodeBlock#of(int)
8124 * @since 1.5
8125 */
8126 public static boolean isHighSurrogate(char ch) {
8127 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
8128 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
8129 }
8130
8131 /**
8132 * Determines if the given {@code char} value is a
8133 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8134 * Unicode low-surrogate code unit</a>
8135 * (also known as <i>trailing-surrogate code unit</i>).
8136 *
8137 * <p>Such values do not represent characters by themselves,
8138 * but are used in the representation of
8139 * <a href="#supplementary">supplementary characters</a>
8140 * in the UTF-16 encoding.
8141 *
8142 * @param ch the {@code char} value to be tested.
8143 * @return {@code true} if the {@code char} value is between
8144 * {@link #MIN_LOW_SURROGATE} and
8145 * {@link #MAX_LOW_SURROGATE} inclusive;
8146 * {@code false} otherwise.
8147 * @see Character#isHighSurrogate(char)
8148 * @since 1.5
8149 */
8150 public static boolean isLowSurrogate(char ch) {
8151 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
8152 }
8153
8154 /**
8155 * Determines if the given {@code char} value is a Unicode
8156 * <i>surrogate code unit</i>.
8157 *
8158 * <p>Such values do not represent characters by themselves,
8159 * but are used in the representation of
8160 * <a href="#supplementary">supplementary characters</a>
8161 * in the UTF-16 encoding.
8162 *
8163 * <p>A char value is a surrogate code unit if and only if it is either
8164 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
8165 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
8166 *
8167 * @param ch the {@code char} value to be tested.
8168 * @return {@code true} if the {@code char} value is between
8169 * {@link #MIN_SURROGATE} and
8170 * {@link #MAX_SURROGATE} inclusive;
8171 * {@code false} otherwise.
8172 * @since 1.7
8173 */
8174 public static boolean isSurrogate(char ch) {
8175 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
8176 }
8177
8178 /**
8179 * Determines whether the specified pair of {@code char}
8180 * values is a valid
8181 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8182 * Unicode surrogate pair</a>.
8183
8184 * <p>This method is equivalent to the expression:
8185 * <blockquote><pre>{@code
8186 * isHighSurrogate(high) && isLowSurrogate(low)
8187 * }</pre></blockquote>
8188 *
8189 * @param high the high-surrogate code value to be tested
8190 * @param low the low-surrogate code value to be tested
8191 * @return {@code true} if the specified high and
8192 * low-surrogate code values represent a valid surrogate pair;
8193 * {@code false} otherwise.
8194 * @since 1.5
8195 */
8196 public static boolean isSurrogatePair(char high, char low) {
8197 return isHighSurrogate(high) && isLowSurrogate(low);
8198 }
8199
8200 /**
8201 * Determines the number of {@code char} values needed to
8202 * represent the specified character (Unicode code point). If the
8203 * specified character is equal to or greater than 0x10000, then
8204 * the method returns 2. Otherwise, the method returns 1.
8205 *
8206 * <p>This method doesn't validate the specified character to be a
8207 * valid Unicode code point. The caller must validate the
8208 * character value using {@link #isValidCodePoint(int) isValidCodePoint}
8209 * if necessary.
8210 *
8211 * @param codePoint the character (Unicode code point) to be tested.
8212 * @return 2 if the character is a valid supplementary character; 1 otherwise.
8213 * @see Character#isSupplementaryCodePoint(int)
8214 * @since 1.5
8215 */
8216 public static int charCount(int codePoint) {
8217 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
8218 }
8219
8220 /**
8221 * Converts the specified surrogate pair to its supplementary code
8222 * point value. This method does not validate the specified
8223 * surrogate pair. The caller must validate it using {@link
8224 * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
8225 *
8226 * @param high the high-surrogate code unit
8227 * @param low the low-surrogate code unit
8228 * @return the supplementary code point composed from the
8229 * specified surrogate pair.
8230 * @since 1.5
8231 */
8232 public static int toCodePoint(char high, char low) {
8233 // Optimized form of:
8234 // return ((high - MIN_HIGH_SURROGATE) << 10)
8235 // + (low - MIN_LOW_SURROGATE)
8236 // + MIN_SUPPLEMENTARY_CODE_POINT;
8237 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
8238 - (MIN_HIGH_SURROGATE << 10)
8239 - MIN_LOW_SURROGATE);
8240 }
8241
8242 /**
8243 * Returns the code point at the given index of the
8244 * {@code CharSequence}. If the {@code char} value at
8245 * the given index in the {@code CharSequence} is in the
8246 * high-surrogate range, the following index is less than the
8247 * length of the {@code CharSequence}, and the
8248 * {@code char} value at the following index is in the
8249 * low-surrogate range, then the supplementary code point
8250 * corresponding to this surrogate pair is returned. Otherwise,
8251 * the {@code char} value at the given index is returned.
8252 *
8253 * @param seq a sequence of {@code char} values (Unicode code
8254 * units)
8255 * @param index the index to the {@code char} values (Unicode
8256 * code units) in {@code seq} to be converted
8257 * @return the Unicode code point at the given index
8258 * @throws NullPointerException if {@code seq} is null.
8259 * @throws IndexOutOfBoundsException if the value
8260 * {@code index} is negative or not less than
8261 * {@link CharSequence#length() seq.length()}.
8262 * @since 1.5
8263 */
8264 public static int codePointAt(CharSequence seq, int index) {
8265 char c1 = seq.charAt(index);
8266 if (isHighSurrogate(c1) && ++index < seq.length()) {
8267 char c2 = seq.charAt(index);
8268 if (isLowSurrogate(c2)) {
8269 return toCodePoint(c1, c2);
8270 }
8271 }
8272 return c1;
8273 }
8274
8275 /**
8276 * Returns the code point at the given index of the
8277 * {@code char} array. If the {@code char} value at
8278 * the given index in the {@code char} array is in the
8279 * high-surrogate range, the following index is less than the
8280 * length of the {@code char} array, and the
8281 * {@code char} value at the following index is in the
8282 * low-surrogate range, then the supplementary code point
8283 * corresponding to this surrogate pair is returned. Otherwise,
8284 * the {@code char} value at the given index is returned.
8285 *
8286 * @param a the {@code char} array
8287 * @param index the index to the {@code char} values (Unicode
8288 * code units) in the {@code char} array to be converted
8289 * @return the Unicode code point at the given index
8290 * @throws NullPointerException if {@code a} is null.
8291 * @throws IndexOutOfBoundsException if the value
8292 * {@code index} is negative or not less than
8293 * the length of the {@code char} array.
8294 * @since 1.5
8295 */
8296 public static int codePointAt(char[] a, int index) {
8297 return codePointAtImpl(a, index, a.length);
8298 }
8299
8300 /**
8301 * Returns the code point at the given index of the
8302 * {@code char} array, where only array elements with
8303 * {@code index} less than {@code limit} can be used. If
8304 * the {@code char} value at the given index in the
8305 * {@code char} array is in the high-surrogate range, the
8306 * following index is less than the {@code limit}, and the
8307 * {@code char} value at the following index is in the
8308 * low-surrogate range, then the supplementary code point
8309 * corresponding to this surrogate pair is returned. Otherwise,
8310 * the {@code char} value at the given index is returned.
8311 *
8312 * @param a the {@code char} array
8313 * @param index the index to the {@code char} values (Unicode
8314 * code units) in the {@code char} array to be converted
8315 * @param limit the index after the last array element that
8316 * can be used in the {@code char} array
8317 * @return the Unicode code point at the given index
8318 * @throws NullPointerException if {@code a} is null.
8319 * @throws IndexOutOfBoundsException if the {@code index}
8320 * argument is negative or not less than the {@code limit}
8321 * argument, or if the {@code limit} argument is negative or
8322 * greater than the length of the {@code char} array.
8323 * @since 1.5
8324 */
8325 public static int codePointAt(char[] a, int index, int limit) {
8326 if (index >= limit || limit < 0 || limit > a.length) {
8327 throw new IndexOutOfBoundsException();
8328 }
8329 return codePointAtImpl(a, index, limit);
8330 }
8331
8332 // throws ArrayIndexOutOfBoundsException if index out of bounds
8333 static int codePointAtImpl(char[] a, int index, int limit) {
8334 char c1 = a[index];
8335 if (isHighSurrogate(c1) && ++index < limit) {
8336 char c2 = a[index];
8337 if (isLowSurrogate(c2)) {
8338 return toCodePoint(c1, c2);
8339 }
8340 }
8341 return c1;
8342 }
8343
8344 /**
8345 * Returns the code point preceding the given index of the
8346 * {@code CharSequence}. If the {@code char} value at
8347 * {@code (index - 1)} in the {@code CharSequence} is in
8348 * the low-surrogate range, {@code (index - 2)} is not
8349 * negative, and the {@code char} value at {@code (index - 2)}
8350 * in the {@code CharSequence} is in the
8351 * high-surrogate range, then the supplementary code point
8352 * corresponding to this surrogate pair is returned. Otherwise,
8353 * the {@code char} value at {@code (index - 1)} is
8354 * returned.
8355 *
8356 * @param seq the {@code CharSequence} instance
8357 * @param index the index following the code point that should be returned
8358 * @return the Unicode code point value before the given index.
8359 * @throws NullPointerException if {@code seq} is null.
8360 * @throws IndexOutOfBoundsException if the {@code index}
8361 * argument is less than 1 or greater than {@link
8362 * CharSequence#length() seq.length()}.
8363 * @since 1.5
8364 */
8365 public static int codePointBefore(CharSequence seq, int index) {
8366 char c2 = seq.charAt(--index);
8367 if (isLowSurrogate(c2) && index > 0) {
8368 char c1 = seq.charAt(--index);
8369 if (isHighSurrogate(c1)) {
8370 return toCodePoint(c1, c2);
8371 }
8372 }
8373 return c2;
8374 }
8375
8376 /**
8377 * Returns the code point preceding the given index of the
8378 * {@code char} array. If the {@code char} value at
8379 * {@code (index - 1)} in the {@code char} array is in
8380 * the low-surrogate range, {@code (index - 2)} is not
8381 * negative, and the {@code char} value at {@code (index - 2)}
8382 * in the {@code char} array is in the
8383 * high-surrogate range, then the supplementary code point
8384 * corresponding to this surrogate pair is returned. Otherwise,
8385 * the {@code char} value at {@code (index - 1)} is
8386 * returned.
8387 *
8388 * @param a the {@code char} array
8389 * @param index the index following the code point that should be returned
8390 * @return the Unicode code point value before the given index.
8391 * @throws NullPointerException if {@code a} is null.
8392 * @throws IndexOutOfBoundsException if the {@code index}
8393 * argument is less than 1 or greater than the length of the
8394 * {@code char} array
8395 * @since 1.5
8396 */
8397 public static int codePointBefore(char[] a, int index) {
8398 return codePointBeforeImpl(a, index, 0);
8399 }
8400
8401 /**
8402 * Returns the code point preceding the given index of the
8403 * {@code char} array, where only array elements with
8404 * {@code index} greater than or equal to {@code start}
8405 * can be used. If the {@code char} value at {@code (index - 1)}
8406 * in the {@code char} array is in the
8407 * low-surrogate range, {@code (index - 2)} is not less than
8408 * {@code start}, and the {@code char} value at
8409 * {@code (index - 2)} in the {@code char} array is in
8410 * the high-surrogate range, then the supplementary code point
8411 * corresponding to this surrogate pair is returned. Otherwise,
8412 * the {@code char} value at {@code (index - 1)} is
8413 * returned.
8414 *
8415 * @param a the {@code char} array
8416 * @param index the index following the code point that should be returned
8417 * @param start the index of the first array element in the
8418 * {@code char} array
8419 * @return the Unicode code point value before the given index.
8420 * @throws NullPointerException if {@code a} is null.
8421 * @throws IndexOutOfBoundsException if the {@code index}
8422 * argument is not greater than the {@code start} argument or
8423 * is greater than the length of the {@code char} array, or
8424 * if the {@code start} argument is negative or not less than
8425 * the length of the {@code char} array.
8426 * @since 1.5
8427 */
8428 public static int codePointBefore(char[] a, int index, int start) {
8429 if (index <= start || start < 0 || start >= a.length) {
8430 throw new IndexOutOfBoundsException();
8431 }
8432 return codePointBeforeImpl(a, index, start);
8433 }
8434
8435 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
8436 static int codePointBeforeImpl(char[] a, int index, int start) {
8437 char c2 = a[--index];
8438 if (isLowSurrogate(c2) && index > start) {
8439 char c1 = a[--index];
8440 if (isHighSurrogate(c1)) {
8441 return toCodePoint(c1, c2);
8442 }
8443 }
8444 return c2;
8445 }
8446
8447 /**
8448 * Returns the leading surrogate (a
8449 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8450 * high surrogate code unit</a>) of the
8451 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8452 * surrogate pair</a>
8453 * representing the specified supplementary character (Unicode
8454 * code point) in the UTF-16 encoding. If the specified character
8455 * is not a
8456 * <a href="Character.html#supplementary">supplementary character</a>,
8457 * an unspecified {@code char} is returned.
8458 *
8459 * <p>If
8460 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8461 * is {@code true}, then
8462 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
8463 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
8464 * are also always {@code true}.
8465 *
8466 * @param codePoint a supplementary character (Unicode code point)
8467 * @return the leading surrogate code unit used to represent the
8468 * character in the UTF-16 encoding
8469 * @since 1.7
8470 */
8471 public static char highSurrogate(int codePoint) {
8472 return (char) ((codePoint >>> 10)
8473 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
8474 }
8475
8476 /**
8477 * Returns the trailing surrogate (a
8478 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8479 * low surrogate code unit</a>) of the
8480 * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8481 * surrogate pair</a>
8482 * representing the specified supplementary character (Unicode
8483 * code point) in the UTF-16 encoding. If the specified character
8484 * is not a
8485 * <a href="Character.html#supplementary">supplementary character</a>,
8486 * an unspecified {@code char} is returned.
8487 *
8488 * <p>If
8489 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
8490 * is {@code true}, then
8491 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
8492 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
8493 * are also always {@code true}.
8494 *
8495 * @param codePoint a supplementary character (Unicode code point)
8496 * @return the trailing surrogate code unit used to represent the
8497 * character in the UTF-16 encoding
8498 * @since 1.7
8499 */
8500 public static char lowSurrogate(int codePoint) {
8501 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
8502 }
8503
8504 /**
8505 * Converts the specified character (Unicode code point) to its
8506 * UTF-16 representation. If the specified code point is a BMP
8507 * (Basic Multilingual Plane or Plane 0) value, the same value is
8508 * stored in {@code dst[dstIndex]}, and 1 is returned. If the
8509 * specified code point is a supplementary character, its
8510 * surrogate values are stored in {@code dst[dstIndex]}
8511 * (high-surrogate) and {@code dst[dstIndex+1]}
8512 * (low-surrogate), and 2 is returned.
8513 *
8514 * @param codePoint the character (Unicode code point) to be converted.
8515 * @param dst an array of {@code char} in which the
8516 * {@code codePoint}'s UTF-16 value is stored.
8517 * @param dstIndex the start index into the {@code dst}
8518 * array where the converted value is stored.
8519 * @return 1 if the code point is a BMP code point, 2 if the
8520 * code point is a supplementary code point.
8521 * @throws IllegalArgumentException if the specified
8522 * {@code codePoint} is not a valid Unicode code point.
8523 * @throws NullPointerException if the specified {@code dst} is null.
8524 * @throws IndexOutOfBoundsException if {@code dstIndex}
8525 * is negative or not less than {@code dst.length}, or if
8526 * {@code dst} at {@code dstIndex} doesn't have enough
8527 * array element(s) to store the resulting {@code char}
8528 * value(s). (If {@code dstIndex} is equal to
8529 * {@code dst.length-1} and the specified
8530 * {@code codePoint} is a supplementary character, the
8531 * high-surrogate value is not stored in
8532 * {@code dst[dstIndex]}.)
8533 * @since 1.5
8534 */
8535 public static int toChars(int codePoint, char[] dst, int dstIndex) {
8536 if (isBmpCodePoint(codePoint)) {
8537 dst[dstIndex] = (char) codePoint;
8538 return 1;
8539 } else if (isValidCodePoint(codePoint)) {
8540 toSurrogates(codePoint, dst, dstIndex);
8541 return 2;
8542 } else {
8543 throw new IllegalArgumentException(
8544 String.format("Not a valid Unicode code point: 0x%X", codePoint));
8545 }
8546 }
8547
8548 /**
8549 * Converts the specified character (Unicode code point) to its
8550 * UTF-16 representation stored in a {@code char} array. If
8551 * the specified code point is a BMP (Basic Multilingual Plane or
8552 * Plane 0) value, the resulting {@code char} array has
8553 * the same value as {@code codePoint}. If the specified code
8554 * point is a supplementary code point, the resulting
8555 * {@code char} array has the corresponding surrogate pair.
8556 *
8557 * @param codePoint a Unicode code point
8558 * @return a {@code char} array having
8559 * {@code codePoint}'s UTF-16 representation.
8560 * @throws IllegalArgumentException if the specified
8561 * {@code codePoint} is not a valid Unicode code point.
8562 * @since 1.5
8563 */
8564 public static char[] toChars(int codePoint) {
8565 if (isBmpCodePoint(codePoint)) {
8566 return new char[] { (char) codePoint };
8567 } else if (isValidCodePoint(codePoint)) {
8568 char[] result = new char[2];
8569 toSurrogates(codePoint, result, 0);
8570 return result;
8571 } else {
8572 throw new IllegalArgumentException(
8573 String.format("Not a valid Unicode code point: 0x%X", codePoint));
8574 }
8575 }
8576
8577 static void toSurrogates(int codePoint, char[] dst, int index) {
8578 // We write elements "backwards" to guarantee all-or-nothing
8579 dst[index+1] = lowSurrogate(codePoint);
8580 dst[index] = highSurrogate(codePoint);
8581 }
8582
8583 /**
8584 * Returns the number of Unicode code points in the text range of
8585 * the specified char sequence. The text range begins at the
8586 * specified {@code beginIndex} and extends to the
8587 * {@code char} at index {@code endIndex - 1}. Thus the
8588 * length (in {@code char}s) of the text range is
8589 * {@code endIndex-beginIndex}. Unpaired surrogates within
8590 * the text range count as one code point each.
8591 *
8592 * @param seq the char sequence
8593 * @param beginIndex the index to the first {@code char} of
8594 * the text range.
8595 * @param endIndex the index after the last {@code char} of
8596 * the text range.
8597 * @return the number of Unicode code points in the specified text
8598 * range
8599 * @throws NullPointerException if {@code seq} is null.
8600 * @throws IndexOutOfBoundsException if the
8601 * {@code beginIndex} is negative, or {@code endIndex}
8602 * is larger than the length of the given sequence, or
8603 * {@code beginIndex} is larger than {@code endIndex}.
8604 * @since 1.5
8605 */
8606 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
8607 int length = seq.length();
8608 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
8609 throw new IndexOutOfBoundsException();
8610 }
8611 int n = endIndex - beginIndex;
8612 for (int i = beginIndex; i < endIndex; ) {
8613 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
8614 isLowSurrogate(seq.charAt(i))) {
8615 n--;
8616 i++;
8617 }
8618 }
8619 return n;
8620 }
8621
8622 /**
8623 * Returns the number of Unicode code points in a subarray of the
8624 * {@code char} array argument. The {@code offset}
8625 * argument is the index of the first {@code char} of the
8626 * subarray and the {@code count} argument specifies the
8627 * length of the subarray in {@code char}s. Unpaired
8628 * surrogates within the subarray count as one code point each.
8629 *
8630 * @param a the {@code char} array
8631 * @param offset the index of the first {@code char} in the
8632 * given {@code char} array
8633 * @param count the length of the subarray in {@code char}s
8634 * @return the number of Unicode code points in the specified subarray
8635 * @throws NullPointerException if {@code a} is null.
8636 * @throws IndexOutOfBoundsException if {@code offset} or
8637 * {@code count} is negative, or if {@code offset +
8638 * count} is larger than the length of the given array.
8639 * @since 1.5
8640 */
8641 public static int codePointCount(char[] a, int offset, int count) {
8642 if (count > a.length - offset || offset < 0 || count < 0) {
8643 throw new IndexOutOfBoundsException();
8644 }
8645 return codePointCountImpl(a, offset, count);
8646 }
8647
8648 static int codePointCountImpl(char[] a, int offset, int count) {
8649 int endIndex = offset + count;
8650 int n = count;
8651 for (int i = offset; i < endIndex; ) {
8652 if (isHighSurrogate(a[i++]) && i < endIndex &&
8653 isLowSurrogate(a[i])) {
8654 n--;
8655 i++;
8656 }
8657 }
8658 return n;
8659 }
8660
8661 /**
8662 * Returns the index within the given char sequence that is offset
8663 * from the given {@code index} by {@code codePointOffset}
8664 * code points. Unpaired surrogates within the text range given by
8665 * {@code index} and {@code codePointOffset} count as
8666 * one code point each.
8667 *
8668 * @param seq the char sequence
8669 * @param index the index to be offset
8670 * @param codePointOffset the offset in code points
8671 * @return the index within the char sequence
8672 * @throws NullPointerException if {@code seq} is null.
8673 * @throws IndexOutOfBoundsException if {@code index}
8674 * is negative or larger then the length of the char sequence,
8675 * or if {@code codePointOffset} is positive and the
8676 * subsequence starting with {@code index} has fewer than
8677 * {@code codePointOffset} code points, or if
8678 * {@code codePointOffset} is negative and the subsequence
8679 * before {@code index} has fewer than the absolute value
8680 * of {@code codePointOffset} code points.
8681 * @since 1.5
8682 */
8683 public static int offsetByCodePoints(CharSequence seq, int index,
8684 int codePointOffset) {
8685 int length = seq.length();
8686 if (index < 0 || index > length) {
8687 throw new IndexOutOfBoundsException();
8688 }
8689
8690 int x = index;
8691 if (codePointOffset >= 0) {
8692 int i;
8693 for (i = 0; x < length && i < codePointOffset; i++) {
8694 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
8695 isLowSurrogate(seq.charAt(x))) {
8696 x++;
8697 }
8698 }
8699 if (i < codePointOffset) {
8700 throw new IndexOutOfBoundsException();
8701 }
8702 } else {
8703 int i;
8704 for (i = codePointOffset; x > 0 && i < 0; i++) {
8705 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
8706 isHighSurrogate(seq.charAt(x-1))) {
8707 x--;
8708 }
8709 }
8710 if (i < 0) {
8711 throw new IndexOutOfBoundsException();
8712 }
8713 }
8714 return x;
8715 }
8716
8717 /**
8718 * Returns the index within the given {@code char} subarray
8719 * that is offset from the given {@code index} by
8720 * {@code codePointOffset} code points. The
8721 * {@code start} and {@code count} arguments specify a
8722 * subarray of the {@code char} array. Unpaired surrogates
8723 * within the text range given by {@code index} and
8724 * {@code codePointOffset} count as one code point each.
8725 *
8726 * @param a the {@code char} array
8727 * @param start the index of the first {@code char} of the
8728 * subarray
8729 * @param count the length of the subarray in {@code char}s
8730 * @param index the index to be offset
8731 * @param codePointOffset the offset in code points
8732 * @return the index within the subarray
8733 * @throws NullPointerException if {@code a} is null.
8734 * @throws IndexOutOfBoundsException
8735 * if {@code start} or {@code count} is negative,
8736 * or if {@code start + count} is larger than the length of
8737 * the given array,
8738 * or if {@code index} is less than {@code start} or
8739 * larger then {@code start + count},
8740 * or if {@code codePointOffset} is positive and the text range
8741 * starting with {@code index} and ending with {@code start + count - 1}
8742 * has fewer than {@code codePointOffset} code
8743 * points,
8744 * or if {@code codePointOffset} is negative and the text range
8745 * starting with {@code start} and ending with {@code index - 1}
8746 * has fewer than the absolute value of
8747 * {@code codePointOffset} code points.
8748 * @since 1.5
8749 */
8750 public static int offsetByCodePoints(char[] a, int start, int count,
8751 int index, int codePointOffset) {
8752 if (count > a.length-start || start < 0 || count < 0
8753 || index < start || index > start+count) {
8754 throw new IndexOutOfBoundsException();
8755 }
8756 return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
8757 }
8758
8759 static int offsetByCodePointsImpl(char[]a, int start, int count,
8760 int index, int codePointOffset) {
8761 int x = index;
8762 if (codePointOffset >= 0) {
8763 int limit = start + count;
8764 int i;
8765 for (i = 0; x < limit && i < codePointOffset; i++) {
8766 if (isHighSurrogate(a[x++]) && x < limit &&
8767 isLowSurrogate(a[x])) {
8768 x++;
8769 }
8770 }
8771 if (i < codePointOffset) {
8772 throw new IndexOutOfBoundsException();
8773 }
8774 } else {
8775 int i;
8776 for (i = codePointOffset; x > start && i < 0; i++) {
8777 if (isLowSurrogate(a[--x]) && x > start &&
8778 isHighSurrogate(a[x-1])) {
8779 x--;
8780 }
8781 }
8782 if (i < 0) {
8783 throw new IndexOutOfBoundsException();
8784 }
8785 }
8786 return x;
8787 }
8788
8789 /**
8790 * Determines if the specified character is a lowercase character.
8791 * <p>
8792 * A character is lowercase if its general category type, provided
8793 * by {@code Character.getType(ch)}, is
8794 * {@code LOWERCASE_LETTER}, or it has contributory property
8795 * Other_Lowercase as defined by the Unicode Standard.
8796 * <p>
8797 * The following are examples of lowercase characters:
8798 * <blockquote><pre>
8799 * a b c d e f g h i j k l m n o p q r s t u v w x y z
8800 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
8801 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
8802 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
8803 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
8804 * </pre></blockquote>
8805 * <p> Many other Unicode characters are lowercase too.
8806 *
8807 * <p><b>Note:</b> This method cannot handle <a
8808 * href="#supplementary"> supplementary characters</a>. To support
8809 * all Unicode characters, including supplementary characters, use
8810 * the {@link #isLowerCase(int)} method.
8811 *
8812 * @param ch the character to be tested.
8813 * @return {@code true} if the character is lowercase;
8814 * {@code false} otherwise.
8815 * @see Character#isLowerCase(char)
8816 * @see Character#isTitleCase(char)
8817 * @see Character#toLowerCase(char)
8818 * @see Character#getType(char)
8819 */
8820 public static boolean isLowerCase(char ch) {
8821 return isLowerCase((int)ch);
8822 }
8823
8824 /**
8825 * Determines if the specified character (Unicode code point) is a
8826 * lowercase character.
8827 * <p>
8828 * A character is lowercase if its general category type, provided
8829 * by {@link Character#getType getType(codePoint)}, is
8830 * {@code LOWERCASE_LETTER}, or it has contributory property
8831 * Other_Lowercase as defined by the Unicode Standard.
8832 * <p>
8833 * The following are examples of lowercase characters:
8834 * <blockquote><pre>
8835 * a b c d e f g h i j k l m n o p q r s t u v w x y z
8836 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6'
8837 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
8838 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
8839 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
8840 * </pre></blockquote>
8841 * <p> Many other Unicode characters are lowercase too.
8842 *
8843 * @param codePoint the character (Unicode code point) to be tested.
8844 * @return {@code true} if the character is lowercase;
8845 * {@code false} otherwise.
8846 * @see Character#isLowerCase(int)
8847 * @see Character#isTitleCase(int)
8848 * @see Character#toLowerCase(int)
8849 * @see Character#getType(int)
8850 * @since 1.5
8851 */
8852 public static boolean isLowerCase(int codePoint) {
8853 return CharacterData.of(codePoint).isLowerCase(codePoint) ||
8854 CharacterData.of(codePoint).isOtherLowercase(codePoint);
8855 }
8856
8857 /**
8858 * Determines if the specified character is an uppercase character.
8859 * <p>
8860 * A character is uppercase if its general category type, provided by
8861 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
8862 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
8863 * <p>
8864 * The following are examples of uppercase characters:
8865 * <blockquote><pre>
8866 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
8867 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
8868 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
8869 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
8870 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
8871 * </pre></blockquote>
8872 * <p> Many other Unicode characters are uppercase too.
8873 *
8874 * <p><b>Note:</b> This method cannot handle <a
8875 * href="#supplementary"> supplementary characters</a>. To support
8876 * all Unicode characters, including supplementary characters, use
8877 * the {@link #isUpperCase(int)} method.
8878 *
8879 * @param ch the character to be tested.
8880 * @return {@code true} if the character is uppercase;
8881 * {@code false} otherwise.
8882 * @see Character#isLowerCase(char)
8883 * @see Character#isTitleCase(char)
8884 * @see Character#toUpperCase(char)
8885 * @see Character#getType(char)
8886 * @since 1.0
8887 */
8888 public static boolean isUpperCase(char ch) {
8889 return isUpperCase((int)ch);
8890 }
8891
8892 /**
8893 * Determines if the specified character (Unicode code point) is an uppercase character.
8894 * <p>
8895 * A character is uppercase if its general category type, provided by
8896 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
8897 * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
8898 * <p>
8899 * The following are examples of uppercase characters:
8900 * <blockquote><pre>
8901 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
8902 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
8903 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
8904 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
8905 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
8906 * </pre></blockquote>
8907 * <p> Many other Unicode characters are uppercase too.
8908 *
8909 * @param codePoint the character (Unicode code point) to be tested.
8910 * @return {@code true} if the character is uppercase;
8911 * {@code false} otherwise.
8912 * @see Character#isLowerCase(int)
8913 * @see Character#isTitleCase(int)
8914 * @see Character#toUpperCase(int)
8915 * @see Character#getType(int)
8916 * @since 1.5
8917 */
8918 public static boolean isUpperCase(int codePoint) {
8919 return CharacterData.of(codePoint).isUpperCase(codePoint) ||
8920 CharacterData.of(codePoint).isOtherUppercase(codePoint);
8921 }
8922
8923 /**
8924 * Determines if the specified character is a titlecase character.
8925 * <p>
8926 * A character is a titlecase character if its general
8927 * category type, provided by {@code Character.getType(ch)},
8928 * is {@code TITLECASE_LETTER}.
8929 * <p>
8930 * Some characters look like pairs of Latin letters. For example, there
8931 * is an uppercase letter that looks like "LJ" and has a corresponding
8932 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
8933 * is the appropriate form to use when rendering a word in lowercase
8934 * with initial capitals, as for a book title.
8935 * <p>
8936 * These are some of the Unicode characters for which this method returns
8937 * {@code true}:
8938 * <ul>
8939 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
8940 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
8941 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
8942 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
8943 * </ul>
8944 * <p> Many other Unicode characters are titlecase too.
8945 *
8946 * <p><b>Note:</b> This method cannot handle <a
8947 * href="#supplementary"> supplementary characters</a>. To support
8948 * all Unicode characters, including supplementary characters, use
8949 * the {@link #isTitleCase(int)} method.
8950 *
8951 * @param ch the character to be tested.
8952 * @return {@code true} if the character is titlecase;
8953 * {@code false} otherwise.
8954 * @see Character#isLowerCase(char)
8955 * @see Character#isUpperCase(char)
8956 * @see Character#toTitleCase(char)
8957 * @see Character#getType(char)
8958 * @since 1.0.2
8959 */
8960 public static boolean isTitleCase(char ch) {
8961 return isTitleCase((int)ch);
8962 }
8963
8964 /**
8965 * Determines if the specified character (Unicode code point) is a titlecase character.
8966 * <p>
8967 * A character is a titlecase character if its general
8968 * category type, provided by {@link Character#getType(int) getType(codePoint)},
8969 * is {@code TITLECASE_LETTER}.
8970 * <p>
8971 * Some characters look like pairs of Latin letters. For example, there
8972 * is an uppercase letter that looks like "LJ" and has a corresponding
8973 * lowercase letter that looks like "lj". A third form, which looks like "Lj",
8974 * is the appropriate form to use when rendering a word in lowercase
8975 * with initial capitals, as for a book title.
8976 * <p>
8977 * These are some of the Unicode characters for which this method returns
8978 * {@code true}:
8979 * <ul>
8980 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
8981 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
8982 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
8983 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
8984 * </ul>
8985 * <p> Many other Unicode characters are titlecase too.
8986 *
8987 * @param codePoint the character (Unicode code point) to be tested.
8988 * @return {@code true} if the character is titlecase;
8989 * {@code false} otherwise.
8990 * @see Character#isLowerCase(int)
8991 * @see Character#isUpperCase(int)
8992 * @see Character#toTitleCase(int)
8993 * @see Character#getType(int)
8994 * @since 1.5
8995 */
8996 public static boolean isTitleCase(int codePoint) {
8997 return getType(codePoint) == Character.TITLECASE_LETTER;
8998 }
8999
9000 /**
9001 * Determines if the specified character is a digit.
9002 * <p>
9003 * A character is a digit if its general category type, provided
9004 * by {@code Character.getType(ch)}, is
9005 * {@code DECIMAL_DIGIT_NUMBER}.
9006 * <p>
9007 * Some Unicode character ranges that contain digits:
9008 * <ul>
9009 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9010 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9011 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9012 * Arabic-Indic digits
9013 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9014 * Extended Arabic-Indic digits
9015 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9016 * Devanagari digits
9017 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9018 * Fullwidth digits
9019 * </ul>
9020 *
9021 * Many other character ranges contain digits as well.
9022 *
9023 * <p><b>Note:</b> This method cannot handle <a
9024 * href="#supplementary"> supplementary characters</a>. To support
9025 * all Unicode characters, including supplementary characters, use
9026 * the {@link #isDigit(int)} method.
9027 *
9028 * @param ch the character to be tested.
9029 * @return {@code true} if the character is a digit;
9030 * {@code false} otherwise.
9031 * @see Character#digit(char, int)
9032 * @see Character#forDigit(int, int)
9033 * @see Character#getType(char)
9034 */
9035 public static boolean isDigit(char ch) {
9036 return isDigit((int)ch);
9037 }
9038
9039 /**
9040 * Determines if the specified character (Unicode code point) is a digit.
9041 * <p>
9042 * A character is a digit if its general category type, provided
9043 * by {@link Character#getType(int) getType(codePoint)}, is
9044 * {@code DECIMAL_DIGIT_NUMBER}.
9045 * <p>
9046 * Some Unicode character ranges that contain digits:
9047 * <ul>
9048 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9049 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9050 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9051 * Arabic-Indic digits
9052 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9053 * Extended Arabic-Indic digits
9054 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9055 * Devanagari digits
9056 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9057 * Fullwidth digits
9058 * </ul>
9059 *
9060 * Many other character ranges contain digits as well.
9061 *
9062 * @param codePoint the character (Unicode code point) to be tested.
9063 * @return {@code true} if the character is a digit;
9064 * {@code false} otherwise.
9065 * @see Character#forDigit(int, int)
9066 * @see Character#getType(int)
9067 * @since 1.5
9068 */
9069 public static boolean isDigit(int codePoint) {
9070 return CharacterData.of(codePoint).isDigit(codePoint);
9071 }
9072
9073 /**
9074 * Determines if a character is defined in Unicode.
9075 * <p>
9076 * A character is defined if at least one of the following is true:
9077 * <ul>
9078 * <li>It has an entry in the UnicodeData file.
9079 * <li>It has a value in a range defined by the UnicodeData file.
9080 * </ul>
9081 *
9082 * <p><b>Note:</b> This method cannot handle <a
9083 * href="#supplementary"> supplementary characters</a>. To support
9084 * all Unicode characters, including supplementary characters, use
9085 * the {@link #isDefined(int)} method.
9086 *
9087 * @param ch the character to be tested
9088 * @return {@code true} if the character has a defined meaning
9089 * in Unicode; {@code false} otherwise.
9090 * @see Character#isDigit(char)
9091 * @see Character#isLetter(char)
9092 * @see Character#isLetterOrDigit(char)
9093 * @see Character#isLowerCase(char)
9094 * @see Character#isTitleCase(char)
9095 * @see Character#isUpperCase(char)
9096 * @since 1.0.2
9097 */
9098 public static boolean isDefined(char ch) {
9099 return isDefined((int)ch);
9100 }
9101
9102 /**
9103 * Determines if a character (Unicode code point) is defined in Unicode.
9104 * <p>
9105 * A character is defined if at least one of the following is true:
9106 * <ul>
9107 * <li>It has an entry in the UnicodeData file.
9108 * <li>It has a value in a range defined by the UnicodeData file.
9109 * </ul>
9110 *
9111 * @param codePoint the character (Unicode code point) to be tested.
9112 * @return {@code true} if the character has a defined meaning
9113 * in Unicode; {@code false} otherwise.
9114 * @see Character#isDigit(int)
9115 * @see Character#isLetter(int)
9116 * @see Character#isLetterOrDigit(int)
9117 * @see Character#isLowerCase(int)
9118 * @see Character#isTitleCase(int)
9119 * @see Character#isUpperCase(int)
9120 * @since 1.5
9121 */
9122 public static boolean isDefined(int codePoint) {
9123 return getType(codePoint) != Character.UNASSIGNED;
9124 }
9125
9126 /**
9127 * Determines if the specified character is a letter.
9128 * <p>
9129 * A character is considered to be a letter if its general
9130 * category type, provided by {@code Character.getType(ch)},
9131 * is any of the following:
9132 * <ul>
9133 * <li> {@code UPPERCASE_LETTER}
9134 * <li> {@code LOWERCASE_LETTER}
9135 * <li> {@code TITLECASE_LETTER}
9136 * <li> {@code MODIFIER_LETTER}
9137 * <li> {@code OTHER_LETTER}
9138 * </ul>
9139 *
9140 * Not all letters have case. Many characters are
9141 * letters but are neither uppercase nor lowercase nor titlecase.
9142 *
9143 * <p><b>Note:</b> This method cannot handle <a
9144 * href="#supplementary"> supplementary characters</a>. To support
9145 * all Unicode characters, including supplementary characters, use
9146 * the {@link #isLetter(int)} method.
9147 *
9148 * @param ch the character to be tested.
9149 * @return {@code true} if the character is a letter;
9150 * {@code false} otherwise.
9151 * @see Character#isDigit(char)
9152 * @see Character#isJavaIdentifierStart(char)
9153 * @see Character#isJavaLetter(char)
9154 * @see Character#isJavaLetterOrDigit(char)
9155 * @see Character#isLetterOrDigit(char)
9156 * @see Character#isLowerCase(char)
9157 * @see Character#isTitleCase(char)
9158 * @see Character#isUnicodeIdentifierStart(char)
9159 * @see Character#isUpperCase(char)
9160 */
9161 public static boolean isLetter(char ch) {
9162 return isLetter((int)ch);
9163 }
9164
9165 /**
9166 * Determines if the specified character (Unicode code point) is a letter.
9167 * <p>
9168 * A character is considered to be a letter if its general
9169 * category type, provided by {@link Character#getType(int) getType(codePoint)},
9170 * is any of the following:
9171 * <ul>
9172 * <li> {@code UPPERCASE_LETTER}
9173 * <li> {@code LOWERCASE_LETTER}
9174 * <li> {@code TITLECASE_LETTER}
9175 * <li> {@code MODIFIER_LETTER}
9176 * <li> {@code OTHER_LETTER}
9177 * </ul>
9178 *
9179 * Not all letters have case. Many characters are
9180 * letters but are neither uppercase nor lowercase nor titlecase.
9181 *
9182 * @param codePoint the character (Unicode code point) to be tested.
9183 * @return {@code true} if the character is a letter;
9184 * {@code false} otherwise.
9185 * @see Character#isDigit(int)
9186 * @see Character#isJavaIdentifierStart(int)
9187 * @see Character#isLetterOrDigit(int)
9188 * @see Character#isLowerCase(int)
9189 * @see Character#isTitleCase(int)
9190 * @see Character#isUnicodeIdentifierStart(int)
9191 * @see Character#isUpperCase(int)
9192 * @since 1.5
9193 */
9194 public static boolean isLetter(int codePoint) {
9195 return ((((1 << Character.UPPERCASE_LETTER) |
9196 (1 << Character.LOWERCASE_LETTER) |
9197 (1 << Character.TITLECASE_LETTER) |
9198 (1 << Character.MODIFIER_LETTER) |
9199 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
9200 != 0;
9201 }
9202
9203 /**
9204 * Determines if the specified character is a letter or digit.
9205 * <p>
9206 * A character is considered to be a letter or digit if either
9207 * {@code Character.isLetter(char ch)} or
9208 * {@code Character.isDigit(char ch)} returns
9209 * {@code true} for the character.
9210 *
9211 * <p><b>Note:</b> This method cannot handle <a
9212 * href="#supplementary"> supplementary characters</a>. To support
9213 * all Unicode characters, including supplementary characters, use
9214 * the {@link #isLetterOrDigit(int)} method.
9215 *
9216 * @param ch the character to be tested.
9217 * @return {@code true} if the character is a letter or digit;
9218 * {@code false} otherwise.
9219 * @see Character#isDigit(char)
9220 * @see Character#isJavaIdentifierPart(char)
9221 * @see Character#isJavaLetter(char)
9222 * @see Character#isJavaLetterOrDigit(char)
9223 * @see Character#isLetter(char)
9224 * @see Character#isUnicodeIdentifierPart(char)
9225 * @since 1.0.2
9226 */
9227 public static boolean isLetterOrDigit(char ch) {
9228 return isLetterOrDigit((int)ch);
9229 }
9230
9231 /**
9232 * Determines if the specified character (Unicode code point) is a letter or digit.
9233 * <p>
9234 * A character is considered to be a letter or digit if either
9235 * {@link #isLetter(int) isLetter(codePoint)} or
9236 * {@link #isDigit(int) isDigit(codePoint)} returns
9237 * {@code true} for the character.
9238 *
9239 * @param codePoint the character (Unicode code point) to be tested.
9240 * @return {@code true} if the character is a letter or digit;
9241 * {@code false} otherwise.
9242 * @see Character#isDigit(int)
9243 * @see Character#isJavaIdentifierPart(int)
9244 * @see Character#isLetter(int)
9245 * @see Character#isUnicodeIdentifierPart(int)
9246 * @since 1.5
9247 */
9248 public static boolean isLetterOrDigit(int codePoint) {
9249 return ((((1 << Character.UPPERCASE_LETTER) |
9250 (1 << Character.LOWERCASE_LETTER) |
9251 (1 << Character.TITLECASE_LETTER) |
9252 (1 << Character.MODIFIER_LETTER) |
9253 (1 << Character.OTHER_LETTER) |
9254 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
9255 != 0;
9256 }
9257
9258 /**
9259 * Determines if the specified character is permissible as the first
9260 * character in a Java identifier.
9261 * <p>
9262 * A character may start a Java identifier if and only if
9263 * one of the following conditions is true:
9264 * <ul>
9265 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9266 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9267 * <li> {@code ch} is a currency symbol (such as {@code '$'})
9268 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9269 * </ul>
9270 *
9271 * These conditions are tested against the character information from version
9272 * 10.0 of the Unicode Standard.
9273 *
9274 * @param ch the character to be tested.
9275 * @return {@code true} if the character may start a Java
9276 * identifier; {@code false} otherwise.
9277 * @see Character#isJavaLetterOrDigit(char)
9278 * @see Character#isJavaIdentifierStart(char)
9279 * @see Character#isJavaIdentifierPart(char)
9280 * @see Character#isLetter(char)
9281 * @see Character#isLetterOrDigit(char)
9282 * @see Character#isUnicodeIdentifierStart(char)
9283 * @since 1.0.2
9284 * @deprecated Replaced by isJavaIdentifierStart(char).
9285 */
9286 @Deprecated(since="1.1")
9287 public static boolean isJavaLetter(char ch) {
9288 return isJavaIdentifierStart(ch);
9289 }
9290
9291 /**
9292 * Determines if the specified character may be part of a Java
9293 * identifier as other than the first character.
9294 * <p>
9295 * A character may be part of a Java identifier if and only if one
9296 * of the following conditions is true:
9297 * <ul>
9298 * <li> it is a letter
9299 * <li> it is a currency symbol (such as {@code '$'})
9300 * <li> it is a connecting punctuation character (such as {@code '_'})
9301 * <li> it is a digit
9302 * <li> it is a numeric letter (such as a Roman numeral character)
9303 * <li> it is a combining mark
9304 * <li> it is a non-spacing mark
9305 * <li> {@code isIdentifierIgnorable} returns
9306 * {@code true} for the character.
9307 * </ul>
9308 *
9309 * These conditions are tested against the character information from version
9310 * 10.0 of the Unicode Standard.
9311 *
9312 * @param ch the character to be tested.
9313 * @return {@code true} if the character may be part of a
9314 * Java identifier; {@code false} otherwise.
9315 * @see Character#isJavaLetter(char)
9316 * @see Character#isJavaIdentifierStart(char)
9317 * @see Character#isJavaIdentifierPart(char)
9318 * @see Character#isLetter(char)
9319 * @see Character#isLetterOrDigit(char)
9320 * @see Character#isUnicodeIdentifierPart(char)
9321 * @see Character#isIdentifierIgnorable(char)
9322 * @since 1.0.2
9323 * @deprecated Replaced by isJavaIdentifierPart(char).
9324 */
9325 @Deprecated(since="1.1")
9326 public static boolean isJavaLetterOrDigit(char ch) {
9327 return isJavaIdentifierPart(ch);
9328 }
9329
9330 /**
9331 * Determines if the specified character (Unicode code point) is an alphabet.
9332 * <p>
9333 * A character is considered to be alphabetic if its general category type,
9334 * provided by {@link Character#getType(int) getType(codePoint)}, is any of
9335 * the following:
9336 * <ul>
9337 * <li> {@code UPPERCASE_LETTER}
9338 * <li> {@code LOWERCASE_LETTER}
9339 * <li> {@code TITLECASE_LETTER}
9340 * <li> {@code MODIFIER_LETTER}
9341 * <li> {@code OTHER_LETTER}
9342 * <li> {@code LETTER_NUMBER}
9343 * </ul>
9344 * or it has contributory property Other_Alphabetic as defined by the
9345 * Unicode Standard.
9346 *
9347 * @param codePoint the character (Unicode code point) to be tested.
9348 * @return {@code true} if the character is a Unicode alphabet
9349 * character, {@code false} otherwise.
9350 * @since 1.7
9351 */
9352 public static boolean isAlphabetic(int codePoint) {
9353 return (((((1 << Character.UPPERCASE_LETTER) |
9354 (1 << Character.LOWERCASE_LETTER) |
9355 (1 << Character.TITLECASE_LETTER) |
9356 (1 << Character.MODIFIER_LETTER) |
9357 (1 << Character.OTHER_LETTER) |
9358 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
9359 CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
9360 }
9361
9362 /**
9363 * Determines if the specified character (Unicode code point) is a CJKV
9364 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
9365 * the Unicode Standard.
9366 *
9367 * @param codePoint the character (Unicode code point) to be tested.
9368 * @return {@code true} if the character is a Unicode ideograph
9369 * character, {@code false} otherwise.
9370 * @since 1.7
9371 */
9372 public static boolean isIdeographic(int codePoint) {
9373 return CharacterData.of(codePoint).isIdeographic(codePoint);
9374 }
9375
9376 /**
9377 * Determines if the specified character is
9378 * permissible as the first character in a Java identifier.
9379 * <p>
9380 * A character may start a Java identifier if and only if
9381 * one of the following conditions is true:
9382 * <ul>
9383 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9384 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
9385 * <li> {@code ch} is a currency symbol (such as {@code '$'})
9386 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
9387 * </ul>
9388 *
9389 * These conditions are tested against the character information from version
9390 * 10.0 of the Unicode Standard.
9391 *
9392 * <p><b>Note:</b> This method cannot handle <a
9393 * href="#supplementary"> supplementary characters</a>. To support
9394 * all Unicode characters, including supplementary characters, use
9395 * the {@link #isJavaIdentifierStart(int)} method.
9396 *
9397 * @param ch the character to be tested.
9398 * @return {@code true} if the character may start a Java identifier;
9399 * {@code false} otherwise.
9400 * @see Character#isJavaIdentifierPart(char)
9401 * @see Character#isLetter(char)
9402 * @see Character#isUnicodeIdentifierStart(char)
9403 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9404 * @since 1.1
9405 */
9406 public static boolean isJavaIdentifierStart(char ch) {
9407 return isJavaIdentifierStart((int)ch);
9408 }
9409
9410 /**
9411 * Determines if the character (Unicode code point) is
9412 * permissible as the first character in a Java identifier.
9413 * <p>
9414 * A character may start a Java identifier if and only if
9415 * one of the following conditions is true:
9416 * <ul>
9417 * <li> {@link #isLetter(int) isLetter(codePoint)}
9418 * returns {@code true}
9419 * <li> {@link #getType(int) getType(codePoint)}
9420 * returns {@code LETTER_NUMBER}
9421 * <li> the referenced character is a currency symbol (such as {@code '$'})
9422 * <li> the referenced character is a connecting punctuation character
9423 * (such as {@code '_'}).
9424 * </ul>
9425 *
9426 * These conditions are tested against the character information from version
9427 * 10.0 of the Unicode Standard.
9428 *
9429 * @param codePoint the character (Unicode code point) to be tested.
9430 * @return {@code true} if the character may start a Java identifier;
9431 * {@code false} otherwise.
9432 * @see Character#isJavaIdentifierPart(int)
9433 * @see Character#isLetter(int)
9434 * @see Character#isUnicodeIdentifierStart(int)
9435 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9436 * @since 1.5
9437 */
9438 public static boolean isJavaIdentifierStart(int codePoint) {
9439 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
9440 }
9441
9442 /**
9443 * Determines if the specified character may be part of a Java
9444 * identifier as other than the first character.
9445 * <p>
9446 * A character may be part of a Java identifier if any of the following
9447 * conditions are true:
9448 * <ul>
9449 * <li> it is a letter
9450 * <li> it is a currency symbol (such as {@code '$'})
9451 * <li> it is a connecting punctuation character (such as {@code '_'})
9452 * <li> it is a digit
9453 * <li> it is a numeric letter (such as a Roman numeral character)
9454 * <li> it is a combining mark
9455 * <li> it is a non-spacing mark
9456 * <li> {@code isIdentifierIgnorable} returns
9457 * {@code true} for the character
9458 * </ul>
9459 *
9460 * These conditions are tested against the character information from version
9461 * 10.0 of the Unicode Standard.
9462 *
9463 * <p><b>Note:</b> This method cannot handle <a
9464 * href="#supplementary"> supplementary characters</a>. To support
9465 * all Unicode characters, including supplementary characters, use
9466 * the {@link #isJavaIdentifierPart(int)} method.
9467 *
9468 * @param ch the character to be tested.
9469 * @return {@code true} if the character may be part of a
9470 * Java identifier; {@code false} otherwise.
9471 * @see Character#isIdentifierIgnorable(char)
9472 * @see Character#isJavaIdentifierStart(char)
9473 * @see Character#isLetterOrDigit(char)
9474 * @see Character#isUnicodeIdentifierPart(char)
9475 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9476 * @since 1.1
9477 */
9478 public static boolean isJavaIdentifierPart(char ch) {
9479 return isJavaIdentifierPart((int)ch);
9480 }
9481
9482 /**
9483 * Determines if the character (Unicode code point) may be part of a Java
9484 * identifier as other than the first character.
9485 * <p>
9486 * A character may be part of a Java identifier if any of the following
9487 * conditions are true:
9488 * <ul>
9489 * <li> it is a letter
9490 * <li> it is a currency symbol (such as {@code '$'})
9491 * <li> it is a connecting punctuation character (such as {@code '_'})
9492 * <li> it is a digit
9493 * <li> it is a numeric letter (such as a Roman numeral character)
9494 * <li> it is a combining mark
9495 * <li> it is a non-spacing mark
9496 * <li> {@link #isIdentifierIgnorable(int)
9497 * isIdentifierIgnorable(codePoint)} returns {@code true} for
9498 * the code point
9499 * </ul>
9500 *
9501 * These conditions are tested against the character information from version
9502 * 10.0 of the Unicode Standard.
9503 *
9504 * @param codePoint the character (Unicode code point) to be tested.
9505 * @return {@code true} if the character may be part of a
9506 * Java identifier; {@code false} otherwise.
9507 * @see Character#isIdentifierIgnorable(int)
9508 * @see Character#isJavaIdentifierStart(int)
9509 * @see Character#isLetterOrDigit(int)
9510 * @see Character#isUnicodeIdentifierPart(int)
9511 * @see javax.lang.model.SourceVersion#isIdentifier(CharSequence)
9512 * @since 1.5
9513 */
9514 public static boolean isJavaIdentifierPart(int codePoint) {
9515 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
9516 }
9517
9518 /**
9519 * Determines if the specified character is permissible as the
9520 * first character in a Unicode identifier.
9521 * <p>
9522 * A character may start a Unicode identifier if and only if
9523 * one of the following conditions is true:
9524 * <ul>
9525 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
9526 * <li> {@link #getType(char) getType(ch)} returns
9527 * {@code LETTER_NUMBER}.
9528 * </ul>
9529 *
9530 * <p><b>Note:</b> This method cannot handle <a
9531 * href="#supplementary"> supplementary characters</a>. To support
9532 * all Unicode characters, including supplementary characters, use
9533 * the {@link #isUnicodeIdentifierStart(int)} method.
9534 *
9535 * @param ch the character to be tested.
9536 * @return {@code true} if the character may start a Unicode
9537 * identifier; {@code false} otherwise.
9538 * @see Character#isJavaIdentifierStart(char)
9539 * @see Character#isLetter(char)
9540 * @see Character#isUnicodeIdentifierPart(char)
9541 * @since 1.1
9542 */
9543 public static boolean isUnicodeIdentifierStart(char ch) {
9544 return isUnicodeIdentifierStart((int)ch);
9545 }
9546
9547 /**
9548 * Determines if the specified character (Unicode code point) is permissible as the
9549 * first character in a Unicode identifier.
9550 * <p>
9551 * A character may start a Unicode identifier if and only if
9552 * one of the following conditions is true:
9553 * <ul>
9554 * <li> {@link #isLetter(int) isLetter(codePoint)}
9555 * returns {@code true}
9556 * <li> {@link #getType(int) getType(codePoint)}
9557 * returns {@code LETTER_NUMBER}.
9558 * </ul>
9559 * @param codePoint the character (Unicode code point) to be tested.
9560 * @return {@code true} if the character may start a Unicode
9561 * identifier; {@code false} otherwise.
9562 * @see Character#isJavaIdentifierStart(int)
9563 * @see Character#isLetter(int)
9564 * @see Character#isUnicodeIdentifierPart(int)
9565 * @since 1.5
9566 */
9567 public static boolean isUnicodeIdentifierStart(int codePoint) {
9568 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
9569 }
9570
9571 /**
9572 * Determines if the specified character may be part of a Unicode
9573 * identifier as other than the first character.
9574 * <p>
9575 * A character may be part of a Unicode identifier if and only if
9576 * one of the following statements is true:
9577 * <ul>
9578 * <li> it is a letter
9579 * <li> it is a connecting punctuation character (such as {@code '_'})
9580 * <li> it is a digit
9581 * <li> it is a numeric letter (such as a Roman numeral character)
9582 * <li> it is a combining mark
9583 * <li> it is a non-spacing mark
9584 * <li> {@code isIdentifierIgnorable} returns
9585 * {@code true} for this character.
9586 * </ul>
9587 *
9588 * <p><b>Note:</b> This method cannot handle <a
9589 * href="#supplementary"> supplementary characters</a>. To support
9590 * all Unicode characters, including supplementary characters, use
9591 * the {@link #isUnicodeIdentifierPart(int)} method.
9592 *
9593 * @param ch the character to be tested.
9594 * @return {@code true} if the character may be part of a
9595 * Unicode identifier; {@code false} otherwise.
9596 * @see Character#isIdentifierIgnorable(char)
9597 * @see Character#isJavaIdentifierPart(char)
9598 * @see Character#isLetterOrDigit(char)
9599 * @see Character#isUnicodeIdentifierStart(char)
9600 * @since 1.1
9601 */
9602 public static boolean isUnicodeIdentifierPart(char ch) {
9603 return isUnicodeIdentifierPart((int)ch);
9604 }
9605
9606 /**
9607 * Determines if the specified character (Unicode code point) may be part of a Unicode
9608 * identifier as other than the first character.
9609 * <p>
9610 * A character may be part of a Unicode identifier if and only if
9611 * one of the following statements is true:
9612 * <ul>
9613 * <li> it is a letter
9614 * <li> it is a connecting punctuation character (such as {@code '_'})
9615 * <li> it is a digit
9616 * <li> it is a numeric letter (such as a Roman numeral character)
9617 * <li> it is a combining mark
9618 * <li> it is a non-spacing mark
9619 * <li> {@code isIdentifierIgnorable} returns
9620 * {@code true} for this character.
9621 * </ul>
9622 * @param codePoint the character (Unicode code point) to be tested.
9623 * @return {@code true} if the character may be part of a
9624 * Unicode identifier; {@code false} otherwise.
9625 * @see Character#isIdentifierIgnorable(int)
9626 * @see Character#isJavaIdentifierPart(int)
9627 * @see Character#isLetterOrDigit(int)
9628 * @see Character#isUnicodeIdentifierStart(int)
9629 * @since 1.5
9630 */
9631 public static boolean isUnicodeIdentifierPart(int codePoint) {
9632 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
9633 }
9634
9635 /**
9636 * Determines if the specified character should be regarded as
9637 * an ignorable character in a Java identifier or a Unicode identifier.
9638 * <p>
9639 * The following Unicode characters are ignorable in a Java identifier
9640 * or a Unicode identifier:
9641 * <ul>
9642 * <li>ISO control characters that are not whitespace
9643 * <ul>
9644 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
9645 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
9646 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
9647 * </ul>
9648 *
9649 * <li>all characters that have the {@code FORMAT} general
9650 * category value
9651 * </ul>
9652 *
9653 * <p><b>Note:</b> This method cannot handle <a
9654 * href="#supplementary"> supplementary characters</a>. To support
9655 * all Unicode characters, including supplementary characters, use
9656 * the {@link #isIdentifierIgnorable(int)} method.
9657 *
9658 * @param ch the character to be tested.
9659 * @return {@code true} if the character is an ignorable control
9660 * character that may be part of a Java or Unicode identifier;
9661 * {@code false} otherwise.
9662 * @see Character#isJavaIdentifierPart(char)
9663 * @see Character#isUnicodeIdentifierPart(char)
9664 * @since 1.1
9665 */
9666 public static boolean isIdentifierIgnorable(char ch) {
9667 return isIdentifierIgnorable((int)ch);
9668 }
9669
9670 /**
9671 * Determines if the specified character (Unicode code point) should be regarded as
9672 * an ignorable character in a Java identifier or a Unicode identifier.
9673 * <p>
9674 * The following Unicode characters are ignorable in a Java identifier
9675 * or a Unicode identifier:
9676 * <ul>
9677 * <li>ISO control characters that are not whitespace
9678 * <ul>
9679 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
9680 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
9681 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
9682 * </ul>
9683 *
9684 * <li>all characters that have the {@code FORMAT} general
9685 * category value
9686 * </ul>
9687 *
9688 * @param codePoint the character (Unicode code point) to be tested.
9689 * @return {@code true} if the character is an ignorable control
9690 * character that may be part of a Java or Unicode identifier;
9691 * {@code false} otherwise.
9692 * @see Character#isJavaIdentifierPart(int)
9693 * @see Character#isUnicodeIdentifierPart(int)
9694 * @since 1.5
9695 */
9696 public static boolean isIdentifierIgnorable(int codePoint) {
9697 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
9698 }
9699
9700 /**
9701 * Converts the character argument to lowercase using case
9702 * mapping information from the UnicodeData file.
9703 * <p>
9704 * Note that
9705 * {@code Character.isLowerCase(Character.toLowerCase(ch))}
9706 * does not always return {@code true} for some ranges of
9707 * characters, particularly those that are symbols or ideographs.
9708 *
9709 * <p>In general, {@link String#toLowerCase()} should be used to map
9710 * characters to lowercase. {@code String} case mapping methods
9711 * have several benefits over {@code Character} case mapping methods.
9712 * {@code String} case mapping methods can perform locale-sensitive
9713 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9714 * the {@code Character} case mapping methods cannot.
9715 *
9716 * <p><b>Note:</b> This method cannot handle <a
9717 * href="#supplementary"> supplementary characters</a>. To support
9718 * all Unicode characters, including supplementary characters, use
9719 * the {@link #toLowerCase(int)} method.
9720 *
9721 * @param ch the character to be converted.
9722 * @return the lowercase equivalent of the character, if any;
9723 * otherwise, the character itself.
9724 * @see Character#isLowerCase(char)
9725 * @see String#toLowerCase()
9726 */
9727 public static char toLowerCase(char ch) {
9728 return (char)toLowerCase((int)ch);
9729 }
9730
9731 /**
9732 * Converts the character (Unicode code point) argument to
9733 * lowercase using case mapping information from the UnicodeData
9734 * file.
9735 *
9736 * <p> Note that
9737 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
9738 * does not always return {@code true} for some ranges of
9739 * characters, particularly those that are symbols or ideographs.
9740 *
9741 * <p>In general, {@link String#toLowerCase()} should be used to map
9742 * characters to lowercase. {@code String} case mapping methods
9743 * have several benefits over {@code Character} case mapping methods.
9744 * {@code String} case mapping methods can perform locale-sensitive
9745 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9746 * the {@code Character} case mapping methods cannot.
9747 *
9748 * @param codePoint the character (Unicode code point) to be converted.
9749 * @return the lowercase equivalent of the character (Unicode code
9750 * point), if any; otherwise, the character itself.
9751 * @see Character#isLowerCase(int)
9752 * @see String#toLowerCase()
9753 *
9754 * @since 1.5
9755 */
9756 public static int toLowerCase(int codePoint) {
9757 return CharacterData.of(codePoint).toLowerCase(codePoint);
9758 }
9759
9760 /**
9761 * Converts the character argument to uppercase using case mapping
9762 * information from the UnicodeData file.
9763 * <p>
9764 * Note that
9765 * {@code Character.isUpperCase(Character.toUpperCase(ch))}
9766 * does not always return {@code true} for some ranges of
9767 * characters, particularly those that are symbols or ideographs.
9768 *
9769 * <p>In general, {@link String#toUpperCase()} should be used to map
9770 * characters to uppercase. {@code String} case mapping methods
9771 * have several benefits over {@code Character} case mapping methods.
9772 * {@code String} case mapping methods can perform locale-sensitive
9773 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9774 * the {@code Character} case mapping methods cannot.
9775 *
9776 * <p><b>Note:</b> This method cannot handle <a
9777 * href="#supplementary"> supplementary characters</a>. To support
9778 * all Unicode characters, including supplementary characters, use
9779 * the {@link #toUpperCase(int)} method.
9780 *
9781 * @param ch the character to be converted.
9782 * @return the uppercase equivalent of the character, if any;
9783 * otherwise, the character itself.
9784 * @see Character#isUpperCase(char)
9785 * @see String#toUpperCase()
9786 */
9787 public static char toUpperCase(char ch) {
9788 return (char)toUpperCase((int)ch);
9789 }
9790
9791 /**
9792 * Converts the character (Unicode code point) argument to
9793 * uppercase using case mapping information from the UnicodeData
9794 * file.
9795 *
9796 * <p>Note that
9797 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
9798 * does not always return {@code true} for some ranges of
9799 * characters, particularly those that are symbols or ideographs.
9800 *
9801 * <p>In general, {@link String#toUpperCase()} should be used to map
9802 * characters to uppercase. {@code String} case mapping methods
9803 * have several benefits over {@code Character} case mapping methods.
9804 * {@code String} case mapping methods can perform locale-sensitive
9805 * mappings, context-sensitive mappings, and 1:M character mappings, whereas
9806 * the {@code Character} case mapping methods cannot.
9807 *
9808 * @param codePoint the character (Unicode code point) to be converted.
9809 * @return the uppercase equivalent of the character, if any;
9810 * otherwise, the character itself.
9811 * @see Character#isUpperCase(int)
9812 * @see String#toUpperCase()
9813 *
9814 * @since 1.5
9815 */
9816 public static int toUpperCase(int codePoint) {
9817 return CharacterData.of(codePoint).toUpperCase(codePoint);
9818 }
9819
9820 /**
9821 * Converts the character argument to titlecase using case mapping
9822 * information from the UnicodeData file. If a character has no
9823 * explicit titlecase mapping and is not itself a titlecase char
9824 * according to UnicodeData, then the uppercase mapping is
9825 * returned as an equivalent titlecase mapping. If the
9826 * {@code char} argument is already a titlecase
9827 * {@code char}, the same {@code char} value will be
9828 * returned.
9829 * <p>
9830 * Note that
9831 * {@code Character.isTitleCase(Character.toTitleCase(ch))}
9832 * does not always return {@code true} for some ranges of
9833 * characters.
9834 *
9835 * <p><b>Note:</b> This method cannot handle <a
9836 * href="#supplementary"> supplementary characters</a>. To support
9837 * all Unicode characters, including supplementary characters, use
9838 * the {@link #toTitleCase(int)} method.
9839 *
9840 * @param ch the character to be converted.
9841 * @return the titlecase equivalent of the character, if any;
9842 * otherwise, the character itself.
9843 * @see Character#isTitleCase(char)
9844 * @see Character#toLowerCase(char)
9845 * @see Character#toUpperCase(char)
9846 * @since 1.0.2
9847 */
9848 public static char toTitleCase(char ch) {
9849 return (char)toTitleCase((int)ch);
9850 }
9851
9852 /**
9853 * Converts the character (Unicode code point) argument to titlecase using case mapping
9854 * information from the UnicodeData file. If a character has no
9855 * explicit titlecase mapping and is not itself a titlecase char
9856 * according to UnicodeData, then the uppercase mapping is
9857 * returned as an equivalent titlecase mapping. If the
9858 * character argument is already a titlecase
9859 * character, the same character value will be
9860 * returned.
9861 *
9862 * <p>Note that
9863 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
9864 * does not always return {@code true} for some ranges of
9865 * characters.
9866 *
9867 * @param codePoint the character (Unicode code point) to be converted.
9868 * @return the titlecase equivalent of the character, if any;
9869 * otherwise, the character itself.
9870 * @see Character#isTitleCase(int)
9871 * @see Character#toLowerCase(int)
9872 * @see Character#toUpperCase(int)
9873 * @since 1.5
9874 */
9875 public static int toTitleCase(int codePoint) {
9876 return CharacterData.of(codePoint).toTitleCase(codePoint);
9877 }
9878
9879 /**
9880 * Returns the numeric value of the character {@code ch} in the
9881 * specified radix.
9882 * <p>
9883 * If the radix is not in the range {@code MIN_RADIX} ≤
9884 * {@code radix} ≤ {@code MAX_RADIX} or if the
9885 * value of {@code ch} is not a valid digit in the specified
9886 * radix, {@code -1} is returned. A character is a valid digit
9887 * if at least one of the following is true:
9888 * <ul>
9889 * <li>The method {@code isDigit} is {@code true} of the character
9890 * and the Unicode decimal digit value of the character (or its
9891 * single-character decomposition) is less than the specified radix.
9892 * In this case the decimal digit value is returned.
9893 * <li>The character is one of the uppercase Latin letters
9894 * {@code 'A'} through {@code 'Z'} and its code is less than
9895 * {@code radix + 'A' - 10}.
9896 * In this case, {@code ch - 'A' + 10}
9897 * is returned.
9898 * <li>The character is one of the lowercase Latin letters
9899 * {@code 'a'} through {@code 'z'} and its code is less than
9900 * {@code radix + 'a' - 10}.
9901 * In this case, {@code ch - 'a' + 10}
9902 * is returned.
9903 * <li>The character is one of the fullwidth uppercase Latin letters A
9904 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
9905 * and its code is less than
9906 * {@code radix + '\u005CuFF21' - 10}.
9907 * In this case, {@code ch - '\u005CuFF21' + 10}
9908 * is returned.
9909 * <li>The character is one of the fullwidth lowercase Latin letters a
9910 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
9911 * and its code is less than
9912 * {@code radix + '\u005CuFF41' - 10}.
9913 * In this case, {@code ch - '\u005CuFF41' + 10}
9914 * is returned.
9915 * </ul>
9916 *
9917 * <p><b>Note:</b> This method cannot handle <a
9918 * href="#supplementary"> supplementary characters</a>. To support
9919 * all Unicode characters, including supplementary characters, use
9920 * the {@link #digit(int, int)} method.
9921 *
9922 * @param ch the character to be converted.
9923 * @param radix the radix.
9924 * @return the numeric value represented by the character in the
9925 * specified radix.
9926 * @see Character#forDigit(int, int)
9927 * @see Character#isDigit(char)
9928 */
9929 public static int digit(char ch, int radix) {
9930 return digit((int)ch, radix);
9931 }
9932
9933 /**
9934 * Returns the numeric value of the specified character (Unicode
9935 * code point) in the specified radix.
9936 *
9937 * <p>If the radix is not in the range {@code MIN_RADIX} ≤
9938 * {@code radix} ≤ {@code MAX_RADIX} or if the
9939 * character is not a valid digit in the specified
9940 * radix, {@code -1} is returned. A character is a valid digit
9941 * if at least one of the following is true:
9942 * <ul>
9943 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
9944 * and the Unicode decimal digit value of the character (or its
9945 * single-character decomposition) is less than the specified radix.
9946 * In this case the decimal digit value is returned.
9947 * <li>The character is one of the uppercase Latin letters
9948 * {@code 'A'} through {@code 'Z'} and its code is less than
9949 * {@code radix + 'A' - 10}.
9950 * In this case, {@code codePoint - 'A' + 10}
9951 * is returned.
9952 * <li>The character is one of the lowercase Latin letters
9953 * {@code 'a'} through {@code 'z'} and its code is less than
9954 * {@code radix + 'a' - 10}.
9955 * In this case, {@code codePoint - 'a' + 10}
9956 * is returned.
9957 * <li>The character is one of the fullwidth uppercase Latin letters A
9958 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
9959 * and its code is less than
9960 * {@code radix + '\u005CuFF21' - 10}.
9961 * In this case,
9962 * {@code codePoint - '\u005CuFF21' + 10}
9963 * is returned.
9964 * <li>The character is one of the fullwidth lowercase Latin letters a
9965 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
9966 * and its code is less than
9967 * {@code radix + '\u005CuFF41'- 10}.
9968 * In this case,
9969 * {@code codePoint - '\u005CuFF41' + 10}
9970 * is returned.
9971 * </ul>
9972 *
9973 * @param codePoint the character (Unicode code point) to be converted.
9974 * @param radix the radix.
9975 * @return the numeric value represented by the character in the
9976 * specified radix.
9977 * @see Character#forDigit(int, int)
9978 * @see Character#isDigit(int)
9979 * @since 1.5
9980 */
9981 public static int digit(int codePoint, int radix) {
9982 return CharacterData.of(codePoint).digit(codePoint, radix);
9983 }
9984
9985 /**
9986 * Returns the {@code int} value that the specified Unicode
9987 * character represents. For example, the character
9988 * {@code '\u005Cu216C'} (the roman numeral fifty) will return
9989 * an int with a value of 50.
9990 * <p>
9991 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
9992 * {@code '\u005Cu005A'}), lowercase
9993 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
9994 * full width variant ({@code '\u005CuFF21'} through
9995 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
9996 * {@code '\u005CuFF5A'}) forms have numeric values from 10
9997 * through 35. This is independent of the Unicode specification,
9998 * which does not assign numeric values to these {@code char}
9999 * values.
10000 * <p>
10001 * If the character does not have a numeric value, then -1 is returned.
10002 * If the character has a numeric value that cannot be represented as a
10003 * nonnegative integer (for example, a fractional value), then -2
10004 * is returned.
10005 *
10006 * <p><b>Note:</b> This method cannot handle <a
10007 * href="#supplementary"> supplementary characters</a>. To support
10008 * all Unicode characters, including supplementary characters, use
10009 * the {@link #getNumericValue(int)} method.
10010 *
10011 * @param ch the character to be converted.
10012 * @return the numeric value of the character, as a nonnegative {@code int}
10013 * value; -2 if the character has a numeric value but the value
10014 * can not be represented as a nonnegative {@code int} value;
10015 * -1 if the character has no numeric value.
10016 * @see Character#forDigit(int, int)
10017 * @see Character#isDigit(char)
10018 * @since 1.1
10019 */
10020 public static int getNumericValue(char ch) {
10021 return getNumericValue((int)ch);
10022 }
10023
10024 /**
10025 * Returns the {@code int} value that the specified
10026 * character (Unicode code point) represents. For example, the character
10027 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
10028 * an {@code int} with a value of 50.
10029 * <p>
10030 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
10031 * {@code '\u005Cu005A'}), lowercase
10032 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
10033 * full width variant ({@code '\u005CuFF21'} through
10034 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10035 * {@code '\u005CuFF5A'}) forms have numeric values from 10
10036 * through 35. This is independent of the Unicode specification,
10037 * which does not assign numeric values to these {@code char}
10038 * values.
10039 * <p>
10040 * If the character does not have a numeric value, then -1 is returned.
10041 * If the character has a numeric value that cannot be represented as a
10042 * nonnegative integer (for example, a fractional value), then -2
10043 * is returned.
10044 *
10045 * @param codePoint the character (Unicode code point) to be converted.
10046 * @return the numeric value of the character, as a nonnegative {@code int}
10047 * value; -2 if the character has a numeric value but the value
10048 * can not be represented as a nonnegative {@code int} value;
10049 * -1 if the character has no numeric value.
10050 * @see Character#forDigit(int, int)
10051 * @see Character#isDigit(int)
10052 * @since 1.5
10053 */
10054 public static int getNumericValue(int codePoint) {
10055 return CharacterData.of(codePoint).getNumericValue(codePoint);
10056 }
10057
10058 /**
10059 * Determines if the specified character is ISO-LATIN-1 white space.
10060 * This method returns {@code true} for the following five
10061 * characters only:
10062 * <table class="striped">
10063 * <caption style="display:none">truechars</caption>
10064 * <thead>
10065 * <tr><th scope="col">Character
10066 * <th scope="col">Code
10067 * <th scope="col">Name
10068 * </thead>
10069 * <tbody>
10070 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td>
10071 * <td>{@code HORIZONTAL TABULATION}</td></tr>
10072 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td>
10073 * <td>{@code NEW LINE}</td></tr>
10074 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td>
10075 * <td>{@code FORM FEED}</td></tr>
10076 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td>
10077 * <td>{@code CARRIAGE RETURN}</td></tr>
10078 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td>
10079 * <td>{@code SPACE}</td></tr>
10080 * </tbody>
10081 * </table>
10082 *
10083 * @param ch the character to be tested.
10084 * @return {@code true} if the character is ISO-LATIN-1 white
10085 * space; {@code false} otherwise.
10086 * @see Character#isSpaceChar(char)
10087 * @see Character#isWhitespace(char)
10088 * @deprecated Replaced by isWhitespace(char).
10089 */
10090 @Deprecated(since="1.1")
10091 public static boolean isSpace(char ch) {
10092 return (ch <= 0x0020) &&
10093 (((((1L << 0x0009) |
10094 (1L << 0x000A) |
10095 (1L << 0x000C) |
10096 (1L << 0x000D) |
10097 (1L << 0x0020)) >> ch) & 1L) != 0);
10098 }
10099
10100
10101 /**
10102 * Determines if the specified character is a Unicode space character.
10103 * A character is considered to be a space character if and only if
10104 * it is specified to be a space character by the Unicode Standard. This
10105 * method returns true if the character's general category type is any of
10106 * the following:
10107 * <ul>
10108 * <li> {@code SPACE_SEPARATOR}
10109 * <li> {@code LINE_SEPARATOR}
10110 * <li> {@code PARAGRAPH_SEPARATOR}
10111 * </ul>
10112 *
10113 * <p><b>Note:</b> This method cannot handle <a
10114 * href="#supplementary"> supplementary characters</a>. To support
10115 * all Unicode characters, including supplementary characters, use
10116 * the {@link #isSpaceChar(int)} method.
10117 *
10118 * @param ch the character to be tested.
10119 * @return {@code true} if the character is a space character;
10120 * {@code false} otherwise.
10121 * @see Character#isWhitespace(char)
10122 * @since 1.1
10123 */
10124 public static boolean isSpaceChar(char ch) {
10125 return isSpaceChar((int)ch);
10126 }
10127
10128 /**
10129 * Determines if the specified character (Unicode code point) is a
10130 * Unicode space character. A character is considered to be a
10131 * space character if and only if it is specified to be a space
10132 * character by the Unicode Standard. This method returns true if
10133 * the character's general category type is any of the following:
10134 *
10135 * <ul>
10136 * <li> {@link #SPACE_SEPARATOR}
10137 * <li> {@link #LINE_SEPARATOR}
10138 * <li> {@link #PARAGRAPH_SEPARATOR}
10139 * </ul>
10140 *
10141 * @param codePoint the character (Unicode code point) to be tested.
10142 * @return {@code true} if the character is a space character;
10143 * {@code false} otherwise.
10144 * @see Character#isWhitespace(int)
10145 * @since 1.5
10146 */
10147 public static boolean isSpaceChar(int codePoint) {
10148 return ((((1 << Character.SPACE_SEPARATOR) |
10149 (1 << Character.LINE_SEPARATOR) |
10150 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
10151 != 0;
10152 }
10153
10154 /**
10155 * Determines if the specified character is white space according to Java.
10156 * A character is a Java whitespace character if and only if it satisfies
10157 * one of the following criteria:
10158 * <ul>
10159 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
10160 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
10161 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
10162 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10163 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10164 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10165 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10166 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10167 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10168 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10169 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10170 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10171 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10172 * </ul>
10173 *
10174 * <p><b>Note:</b> This method cannot handle <a
10175 * href="#supplementary"> supplementary characters</a>. To support
10176 * all Unicode characters, including supplementary characters, use
10177 * the {@link #isWhitespace(int)} method.
10178 *
10179 * @param ch the character to be tested.
10180 * @return {@code true} if the character is a Java whitespace
10181 * character; {@code false} otherwise.
10182 * @see Character#isSpaceChar(char)
10183 * @since 1.1
10184 */
10185 public static boolean isWhitespace(char ch) {
10186 return isWhitespace((int)ch);
10187 }
10188
10189 /**
10190 * Determines if the specified character (Unicode code point) is
10191 * white space according to Java. A character is a Java
10192 * whitespace character if and only if it satisfies one of the
10193 * following criteria:
10194 * <ul>
10195 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
10196 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
10197 * but is not also a non-breaking space ({@code '\u005Cu00A0'},
10198 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
10199 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
10200 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
10201 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
10202 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
10203 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
10204 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
10205 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
10206 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
10207 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
10208 * </ul>
10209 *
10210 * @param codePoint the character (Unicode code point) to be tested.
10211 * @return {@code true} if the character is a Java whitespace
10212 * character; {@code false} otherwise.
10213 * @see Character#isSpaceChar(int)
10214 * @since 1.5
10215 */
10216 public static boolean isWhitespace(int codePoint) {
10217 return CharacterData.of(codePoint).isWhitespace(codePoint);
10218 }
10219
10220 /**
10221 * Determines if the specified character is an ISO control
10222 * character. A character is considered to be an ISO control
10223 * character if its code is in the range {@code '\u005Cu0000'}
10224 * through {@code '\u005Cu001F'} or in the range
10225 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10226 *
10227 * <p><b>Note:</b> This method cannot handle <a
10228 * href="#supplementary"> supplementary characters</a>. To support
10229 * all Unicode characters, including supplementary characters, use
10230 * the {@link #isISOControl(int)} method.
10231 *
10232 * @param ch the character to be tested.
10233 * @return {@code true} if the character is an ISO control character;
10234 * {@code false} otherwise.
10235 *
10236 * @see Character#isSpaceChar(char)
10237 * @see Character#isWhitespace(char)
10238 * @since 1.1
10239 */
10240 public static boolean isISOControl(char ch) {
10241 return isISOControl((int)ch);
10242 }
10243
10244 /**
10245 * Determines if the referenced character (Unicode code point) is an ISO control
10246 * character. A character is considered to be an ISO control
10247 * character if its code is in the range {@code '\u005Cu0000'}
10248 * through {@code '\u005Cu001F'} or in the range
10249 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
10250 *
10251 * @param codePoint the character (Unicode code point) to be tested.
10252 * @return {@code true} if the character is an ISO control character;
10253 * {@code false} otherwise.
10254 * @see Character#isSpaceChar(int)
10255 * @see Character#isWhitespace(int)
10256 * @since 1.5
10257 */
10258 public static boolean isISOControl(int codePoint) {
10259 // Optimized form of:
10260 // (codePoint >= 0x00 && codePoint <= 0x1F) ||
10261 // (codePoint >= 0x7F && codePoint <= 0x9F);
10262 return codePoint <= 0x9F &&
10263 (codePoint >= 0x7F || (codePoint >>> 5 == 0));
10264 }
10265
10266 /**
10267 * Returns a value indicating a character's general category.
10268 *
10269 * <p><b>Note:</b> This method cannot handle <a
10270 * href="#supplementary"> supplementary characters</a>. To support
10271 * all Unicode characters, including supplementary characters, use
10272 * the {@link #getType(int)} method.
10273 *
10274 * @param ch the character to be tested.
10275 * @return a value of type {@code int} representing the
10276 * character's general category.
10277 * @see Character#COMBINING_SPACING_MARK
10278 * @see Character#CONNECTOR_PUNCTUATION
10279 * @see Character#CONTROL
10280 * @see Character#CURRENCY_SYMBOL
10281 * @see Character#DASH_PUNCTUATION
10282 * @see Character#DECIMAL_DIGIT_NUMBER
10283 * @see Character#ENCLOSING_MARK
10284 * @see Character#END_PUNCTUATION
10285 * @see Character#FINAL_QUOTE_PUNCTUATION
10286 * @see Character#FORMAT
10287 * @see Character#INITIAL_QUOTE_PUNCTUATION
10288 * @see Character#LETTER_NUMBER
10289 * @see Character#LINE_SEPARATOR
10290 * @see Character#LOWERCASE_LETTER
10291 * @see Character#MATH_SYMBOL
10292 * @see Character#MODIFIER_LETTER
10293 * @see Character#MODIFIER_SYMBOL
10294 * @see Character#NON_SPACING_MARK
10295 * @see Character#OTHER_LETTER
10296 * @see Character#OTHER_NUMBER
10297 * @see Character#OTHER_PUNCTUATION
10298 * @see Character#OTHER_SYMBOL
10299 * @see Character#PARAGRAPH_SEPARATOR
10300 * @see Character#PRIVATE_USE
10301 * @see Character#SPACE_SEPARATOR
10302 * @see Character#START_PUNCTUATION
10303 * @see Character#SURROGATE
10304 * @see Character#TITLECASE_LETTER
10305 * @see Character#UNASSIGNED
10306 * @see Character#UPPERCASE_LETTER
10307 * @since 1.1
10308 */
10309 public static int getType(char ch) {
10310 return getType((int)ch);
10311 }
10312
10313 /**
10314 * Returns a value indicating a character's general category.
10315 *
10316 * @param codePoint the character (Unicode code point) to be tested.
10317 * @return a value of type {@code int} representing the
10318 * character's general category.
10319 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
10320 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
10321 * @see Character#CONTROL CONTROL
10322 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
10323 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION
10324 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
10325 * @see Character#ENCLOSING_MARK ENCLOSING_MARK
10326 * @see Character#END_PUNCTUATION END_PUNCTUATION
10327 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
10328 * @see Character#FORMAT FORMAT
10329 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
10330 * @see Character#LETTER_NUMBER LETTER_NUMBER
10331 * @see Character#LINE_SEPARATOR LINE_SEPARATOR
10332 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER
10333 * @see Character#MATH_SYMBOL MATH_SYMBOL
10334 * @see Character#MODIFIER_LETTER MODIFIER_LETTER
10335 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
10336 * @see Character#NON_SPACING_MARK NON_SPACING_MARK
10337 * @see Character#OTHER_LETTER OTHER_LETTER
10338 * @see Character#OTHER_NUMBER OTHER_NUMBER
10339 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
10340 * @see Character#OTHER_SYMBOL OTHER_SYMBOL
10341 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
10342 * @see Character#PRIVATE_USE PRIVATE_USE
10343 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR
10344 * @see Character#START_PUNCTUATION START_PUNCTUATION
10345 * @see Character#SURROGATE SURROGATE
10346 * @see Character#TITLECASE_LETTER TITLECASE_LETTER
10347 * @see Character#UNASSIGNED UNASSIGNED
10348 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER
10349 * @since 1.5
10350 */
10351 public static int getType(int codePoint) {
10352 return CharacterData.of(codePoint).getType(codePoint);
10353 }
10354
10355 /**
10356 * Determines the character representation for a specific digit in
10357 * the specified radix. If the value of {@code radix} is not a
10358 * valid radix, or the value of {@code digit} is not a valid
10359 * digit in the specified radix, the null character
10360 * ({@code '\u005Cu0000'}) is returned.
10361 * <p>
10362 * The {@code radix} argument is valid if it is greater than or
10363 * equal to {@code MIN_RADIX} and less than or equal to
10364 * {@code MAX_RADIX}. The {@code digit} argument is valid if
10365 * {@code 0 <= digit < radix}.
10366 * <p>
10367 * If the digit is less than 10, then
10368 * {@code '0' + digit} is returned. Otherwise, the value
10369 * {@code 'a' + digit - 10} is returned.
10370 *
10371 * @param digit the number to convert to a character.
10372 * @param radix the radix.
10373 * @return the {@code char} representation of the specified digit
10374 * in the specified radix.
10375 * @see Character#MIN_RADIX
10376 * @see Character#MAX_RADIX
10377 * @see Character#digit(char, int)
10378 */
10379 public static char forDigit(int digit, int radix) {
10380 if ((digit >= radix) || (digit < 0)) {
10381 return '\0';
10382 }
10383 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
10384 return '\0';
10385 }
10386 if (digit < 10) {
10387 return (char)('0' + digit);
10388 }
10389 return (char)('a' - 10 + digit);
10390 }
10391
10392 /**
10393 * Returns the Unicode directionality property for the given
10394 * character. Character directionality is used to calculate the
10395 * visual ordering of text. The directionality value of undefined
10396 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
10397 *
10398 * <p><b>Note:</b> This method cannot handle <a
10399 * href="#supplementary"> supplementary characters</a>. To support
10400 * all Unicode characters, including supplementary characters, use
10401 * the {@link #getDirectionality(int)} method.
10402 *
10403 * @param ch {@code char} for which the directionality property
10404 * is requested.
10405 * @return the directionality property of the {@code char} value.
10406 *
10407 * @see Character#DIRECTIONALITY_UNDEFINED
10408 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
10409 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
10410 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10411 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
10412 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10413 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10414 * @see Character#DIRECTIONALITY_ARABIC_NUMBER
10415 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10416 * @see Character#DIRECTIONALITY_NONSPACING_MARK
10417 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
10418 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
10419 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
10420 * @see Character#DIRECTIONALITY_WHITESPACE
10421 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
10422 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10423 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10424 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10425 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10426 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10427 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10428 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10429 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
10430 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10431 * @since 1.4
10432 */
10433 public static byte getDirectionality(char ch) {
10434 return getDirectionality((int)ch);
10435 }
10436
10437 /**
10438 * Returns the Unicode directionality property for the given
10439 * character (Unicode code point). Character directionality is
10440 * used to calculate the visual ordering of text. The
10441 * directionality value of undefined character is {@link
10442 * #DIRECTIONALITY_UNDEFINED}.
10443 *
10444 * @param codePoint the character (Unicode code point) for which
10445 * the directionality property is requested.
10446 * @return the directionality property of the character.
10447 *
10448 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
10449 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
10450 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
10451 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
10452 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
10453 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
10454 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
10455 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
10456 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
10457 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
10458 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
10459 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
10460 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
10461 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
10462 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
10463 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
10464 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
10465 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
10466 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
10467 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
10468 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
10469 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
10470 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
10471 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
10472 * @since 1.5
10473 */
10474 public static byte getDirectionality(int codePoint) {
10475 return CharacterData.of(codePoint).getDirectionality(codePoint);
10476 }
10477
10478 /**
10479 * Determines whether the character is mirrored according to the
10480 * Unicode specification. Mirrored characters should have their
10481 * glyphs horizontally mirrored when displayed in text that is
10482 * right-to-left. For example, {@code '\u005Cu0028'} LEFT
10483 * PARENTHESIS is semantically defined to be an <i>opening
10484 * parenthesis</i>. This will appear as a "(" in text that is
10485 * left-to-right but as a ")" in text that is right-to-left.
10486 *
10487 * <p><b>Note:</b> This method cannot handle <a
10488 * href="#supplementary"> supplementary characters</a>. To support
10489 * all Unicode characters, including supplementary characters, use
10490 * the {@link #isMirrored(int)} method.
10491 *
10492 * @param ch {@code char} for which the mirrored property is requested
10493 * @return {@code true} if the char is mirrored, {@code false}
10494 * if the {@code char} is not mirrored or is not defined.
10495 * @since 1.4
10496 */
10497 public static boolean isMirrored(char ch) {
10498 return isMirrored((int)ch);
10499 }
10500
10501 /**
10502 * Determines whether the specified character (Unicode code point)
10503 * is mirrored according to the Unicode specification. Mirrored
10504 * characters should have their glyphs horizontally mirrored when
10505 * displayed in text that is right-to-left. For example,
10506 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
10507 * defined to be an <i>opening parenthesis</i>. This will appear
10508 * as a "(" in text that is left-to-right but as a ")" in text
10509 * that is right-to-left.
10510 *
10511 * @param codePoint the character (Unicode code point) to be tested.
10512 * @return {@code true} if the character is mirrored, {@code false}
10513 * if the character is not mirrored or is not defined.
10514 * @since 1.5
10515 */
10516 public static boolean isMirrored(int codePoint) {
10517 return CharacterData.of(codePoint).isMirrored(codePoint);
10518 }
10519
10520 /**
10521 * Compares two {@code Character} objects numerically.
10522 *
10523 * @param anotherCharacter the {@code Character} to be compared.
10524
10525 * @return the value {@code 0} if the argument {@code Character}
10526 * is equal to this {@code Character}; a value less than
10527 * {@code 0} if this {@code Character} is numerically less
10528 * than the {@code Character} argument; and a value greater than
10529 * {@code 0} if this {@code Character} is numerically greater
10530 * than the {@code Character} argument (unsigned comparison).
10531 * Note that this is strictly a numerical comparison; it is not
10532 * locale-dependent.
10533 * @since 1.2
10534 */
10535 public int compareTo(Character anotherCharacter) {
10536 return compare(this.value, anotherCharacter.value);
10537 }
10538
10539 /**
10540 * Compares two {@code char} values numerically.
10541 * The value returned is identical to what would be returned by:
10542 * <pre>
10543 * Character.valueOf(x).compareTo(Character.valueOf(y))
10544 * </pre>
10545 *
10546 * @param x the first {@code char} to compare
10547 * @param y the second {@code char} to compare
10548 * @return the value {@code 0} if {@code x == y};
10549 * a value less than {@code 0} if {@code x < y}; and
10550 * a value greater than {@code 0} if {@code x > y}
10551 * @since 1.7
10552 */
10553 public static int compare(char x, char y) {
10554 return x - y;
10555 }
10556
10557 /**
10558 * Converts the character (Unicode code point) argument to uppercase using
10559 * information from the UnicodeData file.
10560 *
10561 * @param codePoint the character (Unicode code point) to be converted.
10562 * @return either the uppercase equivalent of the character, if
10563 * any, or an error flag ({@code Character.ERROR})
10564 * that indicates that a 1:M {@code char} mapping exists.
10565 * @see Character#isLowerCase(char)
10566 * @see Character#isUpperCase(char)
10567 * @see Character#toLowerCase(char)
10568 * @see Character#toTitleCase(char)
10569 * @since 1.4
10570 */
10571 static int toUpperCaseEx(int codePoint) {
10572 assert isValidCodePoint(codePoint);
10573 return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
10574 }
10575
10576 /**
10577 * Converts the character (Unicode code point) argument to uppercase using case
10578 * mapping information from the SpecialCasing file in the Unicode
10579 * specification. If a character has no explicit uppercase
10580 * mapping, then the {@code char} itself is returned in the
10581 * {@code char[]}.
10582 *
10583 * @param codePoint the character (Unicode code point) to be converted.
10584 * @return a {@code char[]} with the uppercased character.
10585 * @since 1.4
10586 */
10587 static char[] toUpperCaseCharArray(int codePoint) {
10588 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
10589 assert isBmpCodePoint(codePoint);
10590 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
10591 }
10592
10593 /**
10594 * The number of bits used to represent a {@code char} value in unsigned
10595 * binary form, constant {@code 16}.
10596 *
10597 * @since 1.5
10598 */
10599 public static final int SIZE = 16;
10600
10601 /**
10602 * The number of bytes used to represent a {@code char} value in unsigned
10603 * binary form.
10604 *
10605 * @since 1.8
10606 */
10607 public static final int BYTES = SIZE / Byte.SIZE;
10608
10609 /**
10610 * Returns the value obtained by reversing the order of the bytes in the
10611 * specified {@code char} value.
10612 *
10613 * @param ch The {@code char} of which to reverse the byte order.
10614 * @return the value obtained by reversing (or, equivalently, swapping)
10615 * the bytes in the specified {@code char} value.
10616 * @since 1.5
10617 */
10618 @HotSpotIntrinsicCandidate
10619 public static char reverseBytes(char ch) {
10620 return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
10621 }
10622
10623 /**
10624 * Returns the Unicode name of the specified character
10625 * {@code codePoint}, or null if the code point is
10626 * {@link #UNASSIGNED unassigned}.
10627 * <p>
10628 * Note: if the specified character is not assigned a name by
10629 * the <i>UnicodeData</i> file (part of the Unicode Character
10630 * Database maintained by the Unicode Consortium), the returned
10631 * name is the same as the result of expression.
10632 *
10633 * <blockquote>{@code
10634 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
10635 * + " "
10636 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10637 *
10638 * }</blockquote>
10639 *
10640 * @param codePoint the character (Unicode code point)
10641 *
10642 * @return the Unicode name of the specified character, or null if
10643 * the code point is unassigned.
10644 *
10645 * @throws IllegalArgumentException if the specified
10646 * {@code codePoint} is not a valid Unicode
10647 * code point.
10648 *
10649 * @since 1.7
10650 */
10651 public static String getName(int codePoint) {
10652 if (!isValidCodePoint(codePoint)) {
10653 throw new IllegalArgumentException(
10654 String.format("Not a valid Unicode code point: 0x%X", codePoint));
10655 }
10656 String name = CharacterName.getInstance().getName(codePoint);
10657 if (name != null)
10658 return name;
10659 if (getType(codePoint) == UNASSIGNED)
10660 return null;
10661 UnicodeBlock block = UnicodeBlock.of(codePoint);
10662 if (block != null)
10663 return block.toString().replace('_', ' ') + " "
10664 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10665 // should never come here
10666 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10667 }
10668
10669 /**
10670 * Returns the code point value of the Unicode character specified by
10671 * the given Unicode character name.
10672 * <p>
10673 * Note: if a character is not assigned a name by the <i>UnicodeData</i>
10674 * file (part of the Unicode Character Database maintained by the Unicode
10675 * Consortium), its name is defined as the result of expression
10676 *
10677 * <blockquote>{@code
10678 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
10679 * + " "
10680 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
10681 *
10682 * }</blockquote>
10683 * <p>
10684 * The {@code name} matching is case insensitive, with any leading and
10685 * trailing whitespace character removed.
10686 *
10687 * @param name the Unicode character name
10688 *
10689 * @return the code point value of the character specified by its name.
10690 *
10691 * @throws IllegalArgumentException if the specified {@code name}
10692 * is not a valid Unicode character name.
10693 * @throws NullPointerException if {@code name} is {@code null}
10694 *
10695 * @since 9
10696 */
10697 public static int codePointOf(String name) {
10698 name = name.trim().toUpperCase(Locale.ROOT);
10699 int cp = CharacterName.getInstance().getCodePoint(name);
10700 if (cp != -1)
10701 return cp;
10702 try {
10703 int off = name.lastIndexOf(' ');
10704 if (off != -1) {
10705 cp = Integer.parseInt(name, off + 1, name.length(), 16);
10706 if (isValidCodePoint(cp) && name.equals(getName(cp)))
10707 return cp;
10708 }
10709 } catch (Exception x) {}
10710 throw new IllegalArgumentException("Unrecognized character name :" + name);
10711 }
10712 }
10713