1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  */

17 package org.apache.tomcat.util.buf;
18
19 import java.nio.charset.Charset;
20 import java.nio.charset.CharsetDecoder;
21 import java.nio.charset.CharsetEncoder;
22 import java.util.Locale;
23 import java.util.concurrent.ConcurrentHashMap;
24 import java.util.concurrent.ConcurrentMap;
25
26 public class CharsetCache {
27
28     /* Note: Package private to enable testing without reflection */
29     static final String[] INITIAL_CHARSETS = new String[] { "iso-8859-1""utf-8" };
30
31     /*
32      *  Note: Package private to enable testing without reflection
33      */

34     static final String[] LAZY_CHARSETS = new String[] {
35             // Initial set from Oracle JDK 8 u192
36             "037""1006""1025""1026""1046""1047""1089""1097""1098""1112""1122""1123""1124",
37             "1140""1141""1142""1143""1144""1145""1146""1147""1148""1149""1166""1364""1381",
38             "1383""273""277""278""280""284""285""290""297""300""33722""420""424""437""500",
39             "5601""646""737""775""813""834""838""850""852""855""856""857""858""860""861",
40             "862""863""864""865""866""868""869""870""871""874""875""8859_13""8859_15""8859_2",
41             "8859_3""8859_4""8859_5""8859_6""8859_7""8859_8""8859_9""912""913""914""915""916",
42             "918""920""921""922""923""930""933""935""937""939""942""942c""943""943c""948",
43             "949""949c""950""964""970""ansi-1251""ansi_x3.4-1968""ansi_x3.4-1986""arabic""ascii",
44             "ascii7""asmo-708""big5""big5-hkscs""big5-hkscs""big5-hkscs-2001""big5-hkscs:unicode3.0",
45             "big5_hkscs""big5_hkscs_2001""big5_solaris""big5hk""big5hk-2001""big5hkscs""big5hkscs-2001",
46             "ccsid00858""ccsid01140""ccsid01141""ccsid01142""ccsid01143""ccsid01144""ccsid01145",
47             "ccsid01146""ccsid01147""ccsid01148""ccsid01149""cesu-8""cesu8""cns11643""compound_text",
48             "cp-ar""cp-gr""cp-is""cp00858""cp01140""cp01141""cp01142""cp01143""cp01144""cp01145",
49             "cp01146""cp01147""cp01148""cp01149""cp037""cp1006""cp1025""cp1026""cp1046""cp1047",
50             "cp1089""cp1097""cp1098""cp1112""cp1122""cp1123""cp1124""cp1140""cp1141""cp1142",
51             "cp1143""cp1144""cp1145""cp1146""cp1147""cp1148""cp1149""cp1166""cp1250""cp1251",
52             "cp1252""cp1253""cp1254""cp1255""cp1256""cp1257""cp1258""cp1364""cp1381""cp1383""cp273",
53             "cp277""cp278""cp280""cp284""cp285""cp290""cp297""cp300""cp33722""cp367""cp420",
54             "cp424""cp437""cp500""cp50220""cp50221""cp5346""cp5347""cp5348""cp5349""cp5350""cp5353",
55             "cp737""cp775""cp813""cp833""cp834""cp838""cp850""cp852""cp855""cp856""cp857""cp858",
56             "cp860""cp861""cp862""cp863""cp864""cp865""cp866""cp868""cp869""cp870""cp871""cp874",
57             "cp875""cp912""cp913""cp914""cp915""cp916""cp918""cp920""cp921""cp922""cp923""cp930",
58             "cp933""cp935""cp936""cp937""cp939""cp942""cp942c""cp943""cp943c""cp948""cp949",
59             "cp949c""cp950""cp964""cp970""cpibm284""cpibm285""cpibm297""cpibm37""cs-ebcdic-cp-ca",
60             "cs-ebcdic-cp-nl""cs-ebcdic-cp-us""cs-ebcdic-cp-wt""csascii""csbig5""cscesu-8""cseuckr",
61             "cseucpkdfmtjapanese""cshalfwidthkatakana""csibm037""csibm278""csibm284""csibm285""csibm290",
62             "csibm297""csibm420""csibm424""csibm500""csibm857""csibm860""csibm861""csibm862""csibm863",
63             "csibm864""csibm865""csibm866""csibm868""csibm869""csibm870""csibm871""csiso153gost1976874",
64             "csiso159jisx02121990""csiso2022cn""csiso2022jp""csiso2022jp2""csiso2022kr""csiso87jisx0208",
65             "csisolatin0""csisolatin2""csisolatin3""csisolatin4""csisolatin5""csisolatin9",
66             "csisolatinarabic""csisolatincyrillic""csisolatingreek""csisolatinhebrew""csjisencoding""cskoi8r",
67             "cspc850multilingual""cspc862latinhebrew""cspc8codepage437""cspcp852""cspcp855""csshiftjis",
68             "cswindows31j""cyrillic""default""ebcdic-cp-ar1""ebcdic-cp-ar2""ebcdic-cp-bh""ebcdic-cp-ca",
69             "ebcdic-cp-ch""ebcdic-cp-fr""ebcdic-cp-gb""ebcdic-cp-he""ebcdic-cp-is""ebcdic-cp-nl",
70             "ebcdic-cp-roece""ebcdic-cp-se""ebcdic-cp-us""ebcdic-cp-wt""ebcdic-cp-yu""ebcdic-de-273+euro",
71             "ebcdic-dk-277+euro""ebcdic-es-284+euro""ebcdic-fi-278+euro""ebcdic-fr-277+euro""ebcdic-gb",
72             "ebcdic-gb-285+euro""ebcdic-international-500+euro""ebcdic-it-280+euro""ebcdic-jp-kana",
73             "ebcdic-no-277+euro""ebcdic-s-871+euro""ebcdic-se-278+euro""ebcdic-sv""ebcdic-us-037+euro",
74             "ecma-114""ecma-118""elot_928""euc-cn""euc-jp""euc-jp-linux""euc-kr""euc-tw""euc_cn",
75             "euc_jp""euc_jp_linux""euc_jp_solaris""euc_kr""euc_tw""euccn""eucjis""eucjp""eucjp-open",
76             "euckr""euctw""extended_unix_code_packed_format_for_japanese""gb18030""gb18030-2000""gb2312",
77             "gb2312""gb2312-1980""gb2312-80""gbk""greek""greek8""hebrew""ibm-037""ibm-1006""ibm-1025",
78             "ibm-1026""ibm-1046""ibm-1047""ibm-1089""ibm-1097""ibm-1098""ibm-1112""ibm-1122""ibm-1123",
79             "ibm-1124""ibm-1166""ibm-1364""ibm-1381""ibm-1383""ibm-273""ibm-277""ibm-278""ibm-280",
80             "ibm-284""ibm-285""ibm-290""ibm-297""ibm-300""ibm-33722""ibm-33722_vascii_vpua""ibm-37",
81             "ibm-420""ibm-424""ibm-437""ibm-500""ibm-5050""ibm-737""ibm-775""ibm-813""ibm-833",
82             "ibm-834""ibm-838""ibm-850""ibm-852""ibm-855""ibm-856""ibm-857""ibm-860""ibm-861",
83             "ibm-862""ibm-863""ibm-864""ibm-865""ibm-866""ibm-868""ibm-869""ibm-870""ibm-871",
84             "ibm-874""ibm-875""ibm-912""ibm-913""ibm-914""ibm-915""ibm-916""ibm-918""ibm-920",
85             "ibm-921""ibm-922""ibm-923""ibm-930""ibm-933""ibm-935""ibm-937""ibm-939""ibm-942",
86             "ibm-942c""ibm-943""ibm-943c""ibm-948""ibm-949""ibm-949c""ibm-950""ibm-964""ibm-970",
87             "ibm-euckr""ibm-thai""ibm00858""ibm01140""ibm01141""ibm01142""ibm01143""ibm01144""ibm01145",
88             "ibm01146""ibm01147""ibm01148""ibm01149""ibm037""ibm037""ibm1006""ibm1025""ibm1026",
89             "ibm1026""ibm1046""ibm1047""ibm1089""ibm1097""ibm1098""ibm1112""ibm1122""ibm1123",
90             "ibm1124""ibm1166""ibm1364""ibm1381""ibm1383""ibm273""ibm273""ibm277""ibm277""ibm278",
91             "ibm278""ibm280""ibm280""ibm284""ibm284""ibm285""ibm285""ibm290""ibm290""ibm297",
92             "ibm297""ibm300""ibm33722""ibm367""ibm420""ibm420""ibm424""ibm424""ibm437""ibm437",
93             "ibm500""ibm500""ibm737""ibm775""ibm775""ibm813""ibm833""ibm834""ibm838""ibm850",
94             "ibm850""ibm852""ibm852""ibm855""ibm855""ibm856""ibm857""ibm857""ibm860""ibm860",
95             "ibm861""ibm861""ibm862""ibm862""ibm863""ibm863""ibm864""ibm864""ibm865""ibm865",
96             "ibm866""ibm866""ibm868""ibm868""ibm869""ibm869""ibm870""ibm870""ibm871""ibm871",
97             "ibm874""ibm875""ibm912""ibm913""ibm914""ibm915""ibm916""ibm918""ibm920""ibm921",
98             "ibm922""ibm923""ibm930""ibm933""ibm935""ibm937""ibm939""ibm942""ibm942c""ibm943",
99             "ibm943c""ibm948""ibm949""ibm949c""ibm950""ibm964""ibm970""iscii""iscii91",
100             "iso-10646-ucs-2""iso-2022-cn""iso-2022-cn-cns""iso-2022-cn-gb""iso-2022-jp""iso-2022-jp-2",
101             "iso-2022-kr""iso-8859-11""iso-8859-13""iso-8859-15""iso-8859-15""iso-8859-2""iso-8859-3",
102             "iso-8859-4""iso-8859-5""iso-8859-6""iso-8859-7""iso-8859-8""iso-8859-9""iso-ir-101",
103             "iso-ir-109""iso-ir-110""iso-ir-126""iso-ir-127""iso-ir-138""iso-ir-144""iso-ir-148",
104             "iso-ir-153""iso-ir-159""iso-ir-6""iso-ir-87""iso2022cn""iso2022cn_cns""iso2022cn_gb",
105             "iso2022jp""iso2022jp2""iso2022kr""iso646-us""iso8859-13""iso8859-15""iso8859-2""iso8859-3",
106             "iso8859-4""iso8859-5""iso8859-6""iso8859-7""iso8859-8""iso8859-9""iso8859_11""iso8859_13",
107             "iso8859_15""iso8859_15_fdis""iso8859_2""iso8859_3""iso8859_4""iso8859_5""iso8859_6",
108             "iso8859_7""iso8859_8""iso8859_9""iso_646.irv:1983""iso_646.irv:1991""iso_8859-13""iso_8859-15",
109             "iso_8859-2""iso_8859-2:1987""iso_8859-3""iso_8859-3:1988""iso_8859-4""iso_8859-4:1988",
110             "iso_8859-5""iso_8859-5:1988""iso_8859-6""iso_8859-6:1987""iso_8859-7""iso_8859-7:1987",
111             "iso_8859-8""iso_8859-8:1988""iso_8859-9""iso_8859-9:1989""jis""jis0201""jis0208""jis0212",
112             "jis_c6226-1983""jis_encoding""jis_x0201""jis_x0201""jis_x0208-1983""jis_x0212-1990",
113             "jis_x0212-1990""jisautodetect""johab""koi8""koi8-r""koi8-u""koi8_r""koi8_u",
114             "ks_c_5601-1987""ksc5601""ksc5601-1987""ksc5601-1992""ksc5601_1987""ksc5601_1992""ksc_5601",
115             "l2""l3""l4""l5""l9""latin0""latin2""latin3""latin4""latin5""latin9""macarabic",
116             "maccentraleurope""maccroatian""maccyrillic""macdingbat""macgreek""machebrew""maciceland",
117             "macroman""macromania""macsymbol""macthai""macturkish""macukraine""ms-874""ms1361""ms50220",
118             "ms50221""ms874""ms932""ms936""ms949""ms950""ms950_hkscs""ms950_hkscs_xp""ms_936""ms_949",
119             "ms_kanji""pc-multilingual-850+euro""pck""shift-jis""shift_jis""shift_jis""sjis",
120             "st_sev_358-88""sun_eu_greek""tis-620""tis620""tis620.2533""unicode""unicodebig",
121             "unicodebigunmarked""unicodelittle""unicodelittleunmarked""us""us-ascii""utf-16""utf-16be",
122             "utf-16le""utf-32""utf-32be""utf-32be-bom""utf-32le""utf-32le-bom""utf16""utf32""utf_16",
123             "utf_16be""utf_16le""utf_32""utf_32be""utf_32be_bom""utf_32le""utf_32le_bom""windows-1250",
124             "windows-1251""windows-1252""windows-1253""windows-1254""windows-1255""windows-1256",
125             "windows-1257""windows-1258""windows-31j""windows-437""windows-874""windows-932""windows-936",
126             "windows-949""windows-950""windows-iso2022jp""windows949""x-big5-hkscs-2001""x-big5-solaris",
127             "x-compound-text""x-compound_text""x-euc-cn""x-euc-jp""x-euc-jp-linux""x-euc-tw""x-eucjp",
128             "x-eucjp-open""x-ibm1006""x-ibm1025""x-ibm1046""x-ibm1097""x-ibm1098""x-ibm1112""x-ibm1122",
129             "x-ibm1123""x-ibm1124""x-ibm1166""x-ibm1364""x-ibm1381""x-ibm1383""x-ibm300""x-ibm33722",
130             "x-ibm737""x-ibm833""x-ibm834""x-ibm856""x-ibm874""x-ibm875""x-ibm921""x-ibm922""x-ibm930",
131             "x-ibm933""x-ibm935""x-ibm937""x-ibm939""x-ibm942""x-ibm942c""x-ibm943""x-ibm943c",
132             "x-ibm948""x-ibm949""x-ibm949c""x-ibm950""x-ibm964""x-ibm970""x-iscii91""x-iso-2022-cn-cns",
133             "x-iso-2022-cn-gb""x-iso-8859-11""x-jis0208""x-jisautodetect""x-johab""x-macarabic",
134             "x-maccentraleurope""x-maccroatian""x-maccyrillic""x-macdingbat""x-macgreek""x-machebrew",
135             "x-maciceland""x-macroman""x-macromania""x-macsymbol""x-macthai""x-macturkish""x-macukraine",
136             "x-ms932_0213""x-ms950-hkscs""x-ms950-hkscs-xp""x-mswin-936""x-pck""x-sjis""x-sjis_0213",
137             "x-utf-16be""x-utf-16le""x-utf-16le-bom""x-utf-32be""x-utf-32be-bom""x-utf-32le",
138             "x-utf-32le-bom""x-windows-50220""x-windows-50221""x-windows-874""x-windows-949""x-windows-950",
139             "x-windows-iso2022jp""x0201""x0208""x0212""x11-compound_text",
140             // Added from Oracle JDK 10.0.2
141             "csiso885915""csiso885916""iso-8859-16""iso-ir-226""iso_8859-16""iso_8859-16:2001""l10",
142             "latin-9""latin10""ms932-0213""ms932:2004""ms932_0213""shift_jis:2004""shift_jis_0213:2004",
143             "sjis-0213""sjis:2004""sjis_0213""sjis_0213:2004""windows-932-0213""windows-932:2004",
144             // Added from OpenJDK 11.0.1
145             "932""cp932""cpeuccn""ibm-1252""ibm-932""ibm-euccn""ibm1252""ibm932""ibmeuccn""x-ibm932",
146             // Added from OpenJDK 12 ea28
147             "1129""cp1129""ibm-1129""ibm-euctw""ibm1129""x-ibm1129",
148             // Added from OpenJDK 13 ea15
149             "29626c""833""cp29626c""ibm-1140""ibm-1141""ibm-1142""ibm-1143""ibm-1144""ibm-1145",
150             "ibm-1146""ibm-1147""ibm-1148""ibm-1149""ibm-29626c""ibm-858""ibm-eucjp""ibm1140""ibm1141",
151             "ibm1142""ibm1143""ibm1144""ibm1145""ibm1146""ibm1147""ibm1148""ibm1149""ibm29626c",
152             "ibm858""x-ibm29626c",
153             // Added from HPE JVM 1.8.0.17-hp-ux
154             "cp1051""cp1386""cshproman8""hp-roman8""ibm-1051""r8""roman8""roman9"
155             };
156
157     private static final Charset DUMMY_CHARSET = new DummyCharset("Dummy",  null);
158
159     private ConcurrentMap<String,Charset> cache = new ConcurrentHashMap<>();
160
161     public CharsetCache() {
162         // Pre-populate the cache
163         for (String charsetName : INITIAL_CHARSETS) {
164             Charset charset = Charset.forName(charsetName);
165             addToCache(charsetName, charset);
166         }
167
168         for (String charsetName : LAZY_CHARSETS) {
169             addToCache(charsetName, DUMMY_CHARSET);
170         }
171     }
172
173
174     private void addToCache(String name, Charset charset) {
175         cache.put(name, charset);
176         for (String alias : charset.aliases()) {
177             cache.put(alias.toLowerCase(Locale.ENGLISH), charset);
178         }
179     }
180
181
182     public Charset getCharset(String charsetName) {
183         String lcCharsetName = charsetName.toLowerCase(Locale.ENGLISH);
184
185         Charset result = cache.get(lcCharsetName);
186
187         if (result == DUMMY_CHARSET) {
188             // Name is known but the Charset is not in the cache
189             Charset charset = Charset.forName(lcCharsetName);
190             if (charset == null) {
191                 // Charset not available in this JVM - remove cache entry
192                 cache.remove(lcCharsetName);
193                 result = null;
194             } else {
195                 // Charset is available - populate cache entry
196                 addToCache(lcCharsetName, charset);
197                 result = charset;
198             }
199         }
200
201         return result;
202     }
203
204
205     /*
206      * Placeholder Charset implementation for entries that will be loaded lazily
207      * into the cache.
208      */

209     private static class DummyCharset extends Charset {
210
211         protected DummyCharset(String canonicalName, String[] aliases) {
212             super(canonicalName, aliases);
213         }
214
215         @Override
216         public boolean contains(Charset cs) {
217             return false;
218         }
219
220         @Override
221         public CharsetDecoder newDecoder() {
222             return null;
223         }
224
225         @Override
226         public CharsetEncoder newEncoder() {
227             return null;
228         }
229     }
230 }
231