1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.tomcat.util.buf;
18
19 import java.io.IOException;
20 import java.io.UnsupportedEncodingException;
21 import java.nio.ByteBuffer;
22 import java.nio.CharBuffer;
23 import java.nio.charset.Charset;
24 import java.nio.charset.CharsetDecoder;
25 import java.nio.charset.CoderResult;
26 import java.nio.charset.CodingErrorAction;
27 import java.nio.charset.StandardCharsets;
28 import java.util.Locale;
29
30 import org.apache.tomcat.util.res.StringManager;
31
32 /**
33 * NIO based character decoder.
34 */
35 public class B2CConverter {
36
37 private static final StringManager sm = StringManager.getManager(B2CConverter.class);
38
39 private static final CharsetCache charsetCache = new CharsetCache();
40
41
42 // Protected so unit tests can use it
43 protected static final int LEFTOVER_SIZE = 9;
44
45 /**
46 * Obtain the Charset for the given encoding
47 *
48 * @param enc The name of the encoding for the required charset
49 *
50 * @return The Charset corresponding to the requested encoding
51 *
52 * @throws UnsupportedEncodingException If the requested Charset is not
53 * available
54 */
55 public static Charset getCharset(String enc) throws UnsupportedEncodingException {
56
57 // Encoding names should all be ASCII
58 String lowerCaseEnc = enc.toLowerCase(Locale.ENGLISH);
59
60 Charset charset = charsetCache.getCharset(lowerCaseEnc);
61
62 if (charset == null) {
63 // Pre-population of the cache means this must be invalid
64 throw new UnsupportedEncodingException(
65 sm.getString("b2cConverter.unknownEncoding", lowerCaseEnc));
66 }
67 return charset;
68 }
69
70
71 private final CharsetDecoder decoder;
72 private ByteBuffer bb = null;
73 private CharBuffer cb = null;
74
75 /**
76 * Leftover buffer used for incomplete characters.
77 */
78 private final ByteBuffer leftovers;
79
80 public B2CConverter(Charset charset) {
81 this(charset, false);
82 }
83
84 public B2CConverter(Charset charset, boolean replaceOnError) {
85 byte[] left = new byte[LEFTOVER_SIZE];
86 leftovers = ByteBuffer.wrap(left);
87 CodingErrorAction action;
88 if (replaceOnError) {
89 action = CodingErrorAction.REPLACE;
90 } else {
91 action = CodingErrorAction.REPORT;
92 }
93 // Special case. Use the Apache Harmony based UTF-8 decoder because it
94 // - a) rejects invalid sequences that the JVM decoder does not
95 // - b) fails faster for some invalid sequences
96 if (charset.equals(StandardCharsets.UTF_8)) {
97 decoder = new Utf8Decoder();
98 } else {
99 decoder = charset.newDecoder();
100 }
101 decoder.onMalformedInput(action);
102 decoder.onUnmappableCharacter(action);
103 }
104
105 /**
106 * Reset the decoder state.
107 */
108 public void recycle() {
109 decoder.reset();
110 leftovers.position(0);
111 }
112
113 /**
114 * Convert the given bytes to characters.
115 *
116 * @param bc byte input
117 * @param cc char output
118 * @param endOfInput Is this all of the available data
119 *
120 * @throws IOException If the conversion can not be completed
121 */
122 public void convert(ByteChunk bc, CharChunk cc, boolean endOfInput)
123 throws IOException {
124 if ((bb == null) || (bb.array() != bc.getBuffer())) {
125 // Create a new byte buffer if anything changed
126 bb = ByteBuffer.wrap(bc.getBuffer(), bc.getStart(), bc.getLength());
127 } else {
128 // Initialize the byte buffer
129 bb.limit(bc.getEnd());
130 bb.position(bc.getStart());
131 }
132 if ((cb == null) || (cb.array() != cc.getBuffer())) {
133 // Create a new char buffer if anything changed
134 cb = CharBuffer.wrap(cc.getBuffer(), cc.getEnd(),
135 cc.getBuffer().length - cc.getEnd());
136 } else {
137 // Initialize the char buffer
138 cb.limit(cc.getBuffer().length);
139 cb.position(cc.getEnd());
140 }
141 CoderResult result = null;
142 // Parse leftover if any are present
143 if (leftovers.position() > 0) {
144 int pos = cb.position();
145 // Loop until one char is decoded or there is a decoder error
146 do {
147 leftovers.put(bc.subtractB());
148 leftovers.flip();
149 result = decoder.decode(leftovers, cb, endOfInput);
150 leftovers.position(leftovers.limit());
151 leftovers.limit(leftovers.array().length);
152 } while (result.isUnderflow() && (cb.position() == pos));
153 if (result.isError() || result.isMalformed()) {
154 result.throwException();
155 }
156 bb.position(bc.getStart());
157 leftovers.position(0);
158 }
159 // Do the decoding and get the results into the byte chunk and the char
160 // chunk
161 result = decoder.decode(bb, cb, endOfInput);
162 if (result.isError() || result.isMalformed()) {
163 result.throwException();
164 } else if (result.isOverflow()) {
165 // Propagate current positions to the byte chunk and char chunk, if
166 // this continues the char buffer will get resized
167 bc.setOffset(bb.position());
168 cc.setEnd(cb.position());
169 } else if (result.isUnderflow()) {
170 // Propagate current positions to the byte chunk and char chunk
171 bc.setOffset(bb.position());
172 cc.setEnd(cb.position());
173 // Put leftovers in the leftovers byte buffer
174 if (bc.getLength() > 0) {
175 leftovers.limit(leftovers.array().length);
176 leftovers.position(bc.getLength());
177 bc.subtract(leftovers.array(), 0, bc.getLength());
178 }
179 }
180 }
181
182 /**
183 * Convert the given bytes to characters.
184 *
185 * @param bc byte input
186 * @param cc char output
187 * @param ic byte input channel
188 * @param endOfInput Is this all of the available data
189 *
190 * @throws IOException If the conversion can not be completed
191 */
192 public void convert(ByteBuffer bc, CharBuffer cc, ByteChunk.ByteInputChannel ic, boolean endOfInput)
193 throws IOException {
194 if ((bb == null) || (bb.array() != bc.array())) {
195 // Create a new byte buffer if anything changed
196 bb = ByteBuffer.wrap(bc.array(), bc.arrayOffset() + bc.position(), bc.remaining());
197 } else {
198 // Initialize the byte buffer
199 bb.limit(bc.limit());
200 bb.position(bc.position());
201 }
202 if ((cb == null) || (cb.array() != cc.array())) {
203 // Create a new char buffer if anything changed
204 cb = CharBuffer.wrap(cc.array(), cc.limit(), cc.capacity() - cc.limit());
205 } else {
206 // Initialize the char buffer
207 cb.limit(cc.capacity());
208 cb.position(cc.limit());
209 }
210 CoderResult result = null;
211 // Parse leftover if any are present
212 if (leftovers.position() > 0) {
213 int pos = cb.position();
214 // Loop until one char is decoded or there is a decoder error
215 do {
216 byte chr;
217 if (bc.remaining() == 0) {
218 int n = ic.realReadBytes();
219 chr = n < 0 ? -1 : bc.get();
220 } else {
221 chr = bc.get();
222 }
223 leftovers.put(chr);
224 leftovers.flip();
225 result = decoder.decode(leftovers, cb, endOfInput);
226 leftovers.position(leftovers.limit());
227 leftovers.limit(leftovers.array().length);
228 } while (result.isUnderflow() && (cb.position() == pos));
229 if (result.isError() || result.isMalformed()) {
230 result.throwException();
231 }
232 bb.position(bc.position());
233 leftovers.position(0);
234 }
235 // Do the decoding and get the results into the byte chunk and the char
236 // chunk
237 result = decoder.decode(bb, cb, endOfInput);
238 if (result.isError() || result.isMalformed()) {
239 result.throwException();
240 } else if (result.isOverflow()) {
241 // Propagate current positions to the byte chunk and char chunk, if
242 // this continues the char buffer will get resized
243 bc.position(bb.position());
244 cc.limit(cb.position());
245 } else if (result.isUnderflow()) {
246 // Propagate current positions to the byte chunk and char chunk
247 bc.position(bb.position());
248 cc.limit(cb.position());
249 // Put leftovers in the leftovers byte buffer
250 if (bc.remaining() > 0) {
251 leftovers.limit(leftovers.array().length);
252 leftovers.position(bc.remaining());
253 bc.get(leftovers.array(), 0, bc.remaining());
254 }
255 }
256 }
257
258
259 public Charset getCharset() {
260 return decoder.charset();
261 }
262 }
263