Monitoring JavaMelody sur /demo

1 /*

2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.

3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

4  *

5  * This code is free software; you can redistribute it and/or modify it

6  * under the terms of the GNU General Public License version 2 only, as

7  * published by the Free Software Foundation.  Oracle designates this

8  * particular file as subject to the "Classpath" exception as provided

9  * by Oracle in the LICENSE file that accompanied this code.

10  *

11  * This code is distributed in the hope that it will be useful, but WITHOUT

12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

14  * version 2 for more details (a copy is included in the LICENSE file that

15  * accompanied this code).

16  *

17  * You should have received a copy of the GNU General Public License version

18  * 2 along with this work; if not, write to the Free Software Foundation,

19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

20  *

21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

22  * or visit www.oracle.com if you need additional information or have any

23  * questions.

24  */

25 

26 package java.lang;

27 

28 import java.text.BreakIterator;

29 import java.util.HashSet;

30 import java.util.Hashtable;

31 import java.util.Iterator;

32 import java.util.Locale;

33 import sun.text.Normalizer;

34 

35 

36 /**

37  * This is a utility class for <code>String.toLowerCase()</code> and

38  * <code>String.toUpperCase()</code>, that handles special casing with

39  * conditions.  In other words, it handles the mappings with conditions

40  * that are defined in

41  * <a href="http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt">Special

42  * Casing Properties</a> file.

43  * <p>

44  * Note that the unconditional case mappings (including 1:M mappings)

45  * are handled in <code>Character.toLower/UpperCase()</code>.

46  */

47 final class ConditionalSpecialCasing {

48 

49     // context conditions.

50     static final int FINAL_CASED =              1;

51     static final int AFTER_SOFT_DOTTED =        2;

52     static final int MORE_ABOVE =               3;

53     static final int AFTER_I =                  4;

54     static final int NOT_BEFORE_DOT =           5;

55 

56     // combining class definitions

57     static final int COMBINING_CLASS_ABOVE = 230;

58 

59     // Special case mapping entries

60     static Entry[] entry = {

61         //# ================================================================================

62         //# Conditional mappings

63         //# ================================================================================

64         new Entry(0x03A3, new char[]{0x03C2}, new char[]{0x03A3}, null, FINAL_CASED), // # GREEK CAPITAL LETTER SIGMA

65         new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, null, 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE

66 

67         //# ================================================================================

68         //# Locale-sensitive mappings

69         //# ================================================================================

70         //# Lithuanian

71         new Entry(0x0307, new char[]{0x0307}, new char[]{}, "lt",  AFTER_SOFT_DOTTED), // # COMBINING DOT ABOVE

72         new Entry(0x0049, new char[]{0x0069, 0x0307}, new char[]{0x0049}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I

73         new Entry(0x004A, new char[]{0x006A, 0x0307}, new char[]{0x004A}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER J

74         new Entry(0x012E, new char[]{0x012F, 0x0307}, new char[]{0x012E}, "lt", MORE_ABOVE), // # LATIN CAPITAL LETTER I WITH OGONEK

75         new Entry(0x00CC, new char[]{0x0069, 0x0307, 0x0300}, new char[]{0x00CC}, "lt", 0), // # LATIN CAPITAL LETTER I WITH GRAVE

76         new Entry(0x00CD, new char[]{0x0069, 0x0307, 0x0301}, new char[]{0x00CD}, "lt", 0), // # LATIN CAPITAL LETTER I WITH ACUTE

77         new Entry(0x0128, new char[]{0x0069, 0x0307, 0x0303}, new char[]{0x0128}, "lt", 0), // # LATIN CAPITAL LETTER I WITH TILDE

78 

79         //# ================================================================================

80         //# Turkish and Azeri

81         new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE

82         new Entry(0x0130, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE

83         new Entry(0x0307, new char[]{}, new char[]{0x0307}, "tr", AFTER_I), // # COMBINING DOT ABOVE

84         new Entry(0x0307, new char[]{}, new char[]{0x0307}, "az", AFTER_I), // # COMBINING DOT ABOVE

85         new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I

86         new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "az", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I

87         new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN SMALL LETTER I

88         new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "az", 0)  // # LATIN SMALL LETTER I

89     };

90 

91     // A hash table that contains the above entries

92     static Hashtable<Integer, HashSet<Entry>> entryTable = new Hashtable<>();

93     static {

94         // create hashtable from the entry

95         for (Entry cur : entry) {

96             Integer cp = cur.getCodePoint();

97             HashSet<Entry> set = entryTable.get(cp);

98             if (set == null) {

99                 set = new HashSet<>();

100                 entryTable.put(cp, set);

101             }

102             set.add(cur);

103         }

104     }

105 

106     static int toLowerCaseEx(String src, int index, Locale locale) {

107         char[] result = lookUpTable(src, index, locale, true);

108 

109         if (result != null) {

110             if (result.length == 1) {

111                 return result[0];

112             } else {

113                 return Character.ERROR;

114             }

115         } else {

116             // default to Character class' one

117             return Character.toLowerCase(src.codePointAt(index));

118         }

119     }

120 

121     static int toUpperCaseEx(String src, int index, Locale locale) {

122         char[] result = lookUpTable(src, index, locale, false);

123 

124         if (result != null) {

125             if (result.length == 1) {

126                 return result[0];

127             } else {

128                 return Character.ERROR;

129             }

130         } else {

131             // default to Character class' one

132             return Character.toUpperCaseEx(src.codePointAt(index));

133         }

134     }

135 

136     static char[] toLowerCaseCharArray(String src, int index, Locale locale) {

137         return lookUpTable(src, index, locale, true);

138     }

139 

140     static char[] toUpperCaseCharArray(String src, int index, Locale locale) {

141         char[] result = lookUpTable(src, index, locale, false);

142         if (result != null) {

143             return result;

144         } else {

145             return Character.toUpperCaseCharArray(src.codePointAt(index));

146         }

147     }

148 

149     private static char[] lookUpTable(String src, int index, Locale locale, boolean bLowerCasing) {

150         HashSet<Entry> set = entryTable.get(src.codePointAt(index));

151         char[] ret = null;

152 

153         if (set != null) {

154             Iterator<Entry> iter = set.iterator();

155             String currentLang = locale.getLanguage();

156             while (iter.hasNext()) {

157                 Entry entry = iter.next();

158                 String conditionLang = entry.getLanguage();

159                 if (((conditionLang == null) || (conditionLang.equals(currentLang))) &&

160                         isConditionMet(src, index, locale, entry.getCondition())) {

161                     ret = bLowerCasing ? entry.getLowerCase() : entry.getUpperCase();

162                     if (conditionLang != null) {

163                         break;

164                     }

165                 }

166             }

167         }

168 

169         return ret;

170     }

171 

172     private static boolean isConditionMet(String src, int index, Locale locale, int condition) {

173         switch (condition) {

174         case FINAL_CASED:

175             return isFinalCased(src, index, locale);

176 

177         case AFTER_SOFT_DOTTED:

178             return isAfterSoftDotted(src, index);

179 

180         case MORE_ABOVE:

181             return isMoreAbove(src, index);

182 

183         case AFTER_I:

184             return isAfterI(src, index);

185 

186         case NOT_BEFORE_DOT:

187             return !isBeforeDot(src, index);

188 

189         default:

190             return true;

191         }

192     }

193 

194     /**

195      * Implements the "Final_Cased" condition

196      *

197      * Specification: Within the closest word boundaries containing C, there is a cased

198      * letter before C, and there is no cased letter after C.

199      *

200      * Regular Expression:

201      *   Before C: [{cased==true}][{wordBoundary!=true}]*

202      *   After C: !([{wordBoundary!=true}]*[{cased}])

203      */

204     private static boolean isFinalCased(String src, int index, Locale locale) {

205         BreakIterator wordBoundary = BreakIterator.getWordInstance(locale);

206         wordBoundary.setText(src);

207         int ch;

208 

209         // Look for a preceding 'cased' letter

210         for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i);

211                 i -= Character.charCount(ch)) {

212 

213             ch = src.codePointBefore(i);

214             if (isCased(ch)) {

215 

216                 int len = src.length();

217                 // Check that there is no 'cased' letter after the index

218                 for (i = index + Character.charCount(src.codePointAt(index));

219                         (i < len) && !wordBoundary.isBoundary(i);

220                         i += Character.charCount(ch)) {

221 

222                     ch = src.codePointAt(i);

223                     if (isCased(ch)) {

224                         return false;

225                     }

226                 }

227 

228                 return true;

229             }

230         }

231 

232         return false;

233     }

234 

235     /**

236      * Implements the "After_I" condition

237      *

238      * Specification: The last preceding base character was an uppercase I,

239      * and there is no intervening combining character class 230 (ABOVE).

240      *

241      * Regular Expression:

242      *   Before C: [I]([{cc!=230}&{cc!=0}])*

243      */

244     private static boolean isAfterI(String src, int index) {

245         int ch;

246         int cc;

247 

248         // Look for the last preceding base character

249         for (int i = index; i > 0; i -= Character.charCount(ch)) {

250 

251             ch = src.codePointBefore(i);

252 

253             if (ch == 'I') {

254                 return true;

255             } else {

256                 cc = Normalizer.getCombiningClass(ch);

257                 if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {

258                     return false;

259                 }

260             }

261         }

262 

263         return false;

264     }

265 

266     /**

267      * Implements the "After_Soft_Dotted" condition

268      *

269      * Specification: The last preceding character with combining class

270      * of zero before C was Soft_Dotted, and there is no intervening

271      * combining character class 230 (ABOVE).

272      *

273      * Regular Expression:

274      *   Before C: [{Soft_Dotted==true}]([{cc!=230}&{cc!=0}])*

275      */

276     private static boolean isAfterSoftDotted(String src, int index) {

277         int ch;

278         int cc;

279 

280         // Look for the last preceding character

281         for (int i = index; i > 0; i -= Character.charCount(ch)) {

282 

283             ch = src.codePointBefore(i);

284 

285             if (isSoftDotted(ch)) {

286                 return true;

287             } else {

288                 cc = Normalizer.getCombiningClass(ch);

289                 if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {

290                     return false;

291                 }

292             }

293         }

294 

295         return false;

296     }

297 

298     /**

299      * Implements the "More_Above" condition

300      *

301      * Specification: C is followed by one or more characters of combining

302      * class 230 (ABOVE) in the combining character sequence.

303      *

304      * Regular Expression:

305      *   After C: [{cc!=0}]*[{cc==230}]

306      */

307     private static boolean isMoreAbove(String src, int index) {

308         int ch;

309         int cc;

310         int len = src.length();

311 

312         // Look for a following ABOVE combining class character

313         for (int i = index + Character.charCount(src.codePointAt(index));

314                 i < len; i += Character.charCount(ch)) {

315 

316             ch = src.codePointAt(i);

317             cc = Normalizer.getCombiningClass(ch);

318 

319             if (cc == COMBINING_CLASS_ABOVE) {

320                 return true;

321             } else if (cc == 0) {

322                 return false;

323             }

324         }

325 

326         return false;

327     }

328 

329     /**

330      * Implements the "Before_Dot" condition

331      *

332      * Specification: C is followed by <code>U+0307 COMBINING DOT ABOVE</code>.

333      * Any sequence of characters with a combining class that is

334      * neither 0 nor 230 may intervene between the current character

335      * and the combining dot above.

336      *

337      * Regular Expression:

338      *   After C: ([{cc!=230}&{cc!=0}])*[\u0307]

339      */

340     private static boolean isBeforeDot(String src, int index) {

341         int ch;

342         int cc;

343         int len = src.length();

344 

345         // Look for a following COMBINING DOT ABOVE

346         for (int i = index + Character.charCount(src.codePointAt(index));

347                 i < len; i += Character.charCount(ch)) {

348 

349             ch = src.codePointAt(i);

350 

351             if (ch == '\u0307') {

352                 return true;

353             } else {

354                 cc = Normalizer.getCombiningClass(ch);

355                 if ((cc == 0) || (cc == COMBINING_CLASS_ABOVE)) {

356                     return false;

357                 }

358             }

359         }

360 

361         return false;

362     }

363 

364     /**

365      * Examines whether a character is 'cased'.

366      *

367      * A character C is defined to be 'cased' if and only if at least one of

368      * following are true for C: uppercase==true, or lowercase==true, or

369      * general_category==titlecase_letter.

370      *

371      * The uppercase and lowercase property values are specified in the data

372      * file DerivedCoreProperties.txt in the Unicode Character Database.

373      */

374     private static boolean isCased(int ch) {

375         int type = Character.getType(ch);

376         if (type == Character.LOWERCASE_LETTER ||

377                 type == Character.UPPERCASE_LETTER ||

378                 type == Character.TITLECASE_LETTER) {

379             return true;

380         } else {

381             // Check for Other_Lowercase and Other_Uppercase

382             //

383             if ((ch >= 0x02B0) && (ch <= 0x02B8)) {

384                 // MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y

385                 return true;

386             } else if ((ch >= 0x02C0) && (ch <= 0x02C1)) {

387                 // MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP

388                 return true;

389             } else if ((ch >= 0x02E0) && (ch <= 0x02E4)) {

390                 // MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP

391                 return true;

392             } else if (ch == 0x0345) {

393                 // COMBINING GREEK YPOGEGRAMMENI

394                 return true;

395             } else if (ch == 0x037A) {

396                 // GREEK YPOGEGRAMMENI

397                 return true;

398             } else if ((ch >= 0x1D2C) && (ch <= 0x1D61)) {

399                 // MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI

400                 return true;

401             } else if ((ch >= 0x2160) && (ch <= 0x217F)) {

402                 // ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND

403                 // SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND

404                 return true;

405             } else if ((ch >= 0x24B6) && (ch <= 0x24E9)) {

406                 // CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z

407                 // CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z

408                 return true;

409             } else {

410                 return false;

411             }

412         }

413     }

414 

415     private static boolean isSoftDotted(int ch) {

416         switch (ch) {

417         case 0x0069: // Soft_Dotted # L&       LATIN SMALL LETTER I

418         case 0x006A: // Soft_Dotted # L&       LATIN SMALL LETTER J

419         case 0x012F: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH OGONEK

420         case 0x0268: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH STROKE

421         case 0x0456: // Soft_Dotted # L&       CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I

422         case 0x0458: // Soft_Dotted # L&       CYRILLIC SMALL LETTER JE

423         case 0x1D62: // Soft_Dotted # L&       LATIN SUBSCRIPT SMALL LETTER I

424         case 0x1E2D: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH TILDE BELOW

425         case 0x1ECB: // Soft_Dotted # L&       LATIN SMALL LETTER I WITH DOT BELOW

426         case 0x2071: // Soft_Dotted # L&       SUPERSCRIPT LATIN SMALL LETTER I

427             return true;

428         default:

429             return false;

430         }

431     }

432 

433     /**

434      * An internal class that represents an entry in the Special Casing Properties.

435      */

436     static class Entry {

437         int ch;

438         char [] lower;

439         char [] upper;

440         String lang;

441         int condition;

442 

443         Entry(int ch, char[] lower, char[] upper, String lang, int condition) {

444             this.ch = ch;

445             this.lower = lower;

446             this.upper = upper;

447             this.lang = lang;

448             this.condition = condition;

449         }

450 

451         int getCodePoint() {

452             return ch;

453         }

454 

455         char[] getLowerCase() {

456             return lower;

457         }

458 

459         char[] getUpperCase() {

460             return upper;

461         }

462 

463         String getLanguage() {

464             return lang;

465         }

466 

467         int getCondition() {

468             return condition;

469         }

470     }

471 }

472