1 /*
2 * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 // -- This file was mechanically generated: Do not edit! -- //
27
28 package java.nio.charset;
29
30 import java.nio.Buffer;
31 import java.nio.ByteBuffer;
32 import java.nio.CharBuffer;
33 import java.nio.BufferOverflowException;
34 import java.nio.BufferUnderflowException;
35 import java.lang.ref.WeakReference;
36 import java.nio.charset.CoderMalfunctionError; // javadoc
37 import java.util.Arrays;
38
39
40 /**
41 * An engine that can transform a sequence of sixteen-bit Unicode characters into a sequence of
42 * bytes in a specific charset.
43 *
44 * <a id="steps"></a>
45 *
46 * <p> The input character sequence is provided in a character buffer or a series
47 * of such buffers. The output byte sequence is written to a byte buffer
48 * or a series of such buffers. An encoder should always be used by making
49 * the following sequence of method invocations, hereinafter referred to as an
50 * <i>encoding operation</i>:
51 *
52 * <ol>
53 *
54 * <li><p> Reset the encoder via the {@link #reset reset} method, unless it
55 * has not been used before; </p></li>
56 *
57 * <li><p> Invoke the {@link #encode encode} method zero or more times, as
58 * long as additional input may be available, passing {@code false} for the
59 * {@code endOfInput} argument and filling the input buffer and flushing the
60 * output buffer between invocations; </p></li>
61 *
62 * <li><p> Invoke the {@link #encode encode} method one final time, passing
63 * {@code true} for the {@code endOfInput} argument; and then </p></li>
64 *
65 * <li><p> Invoke the {@link #flush flush} method so that the encoder can
66 * flush any internal state to the output buffer. </p></li>
67 *
68 * </ol>
69 *
70 * Each invocation of the {@link #encode encode} method will encode as many
71 * characters as possible from the input buffer, writing the resulting bytes
72 * to the output buffer. The {@link #encode encode} method returns when more
73 * input is required, when there is not enough room in the output buffer, or
74 * when an encoding error has occurred. In each case a {@link CoderResult}
75 * object is returned to describe the reason for termination. An invoker can
76 * examine this object and fill the input buffer, flush the output buffer, or
77 * attempt to recover from an encoding error, as appropriate, and try again.
78 *
79 * <a id="ce"></a>
80 *
81 * <p> There are two general types of encoding errors. If the input character
82 * sequence is not a legal sixteen-bit Unicode sequence then the input is considered <i>malformed</i>. If
83 * the input character sequence is legal but cannot be mapped to a valid
84 * byte sequence in the given charset then an <i>unmappable character</i> has been encountered.
85 *
86 * <a id="cae"></a>
87 *
88 * <p> How an encoding error is handled depends upon the action requested for
89 * that type of error, which is described by an instance of the {@link
90 * CodingErrorAction} class. The possible error actions are to {@linkplain
91 * CodingErrorAction#IGNORE ignore} the erroneous input, {@linkplain
92 * CodingErrorAction#REPORT report} the error to the invoker via
93 * the returned {@link CoderResult} object, or {@linkplain CodingErrorAction#REPLACE
94 * replace} the erroneous input with the current value of the
95 * replacement byte array. The replacement
96 *
97
98 * is initially set to the encoder's default replacement, which often
99 * (but not always) has the initial value <code>{</code> <code>(byte)'?'</code> <code>}</code>;
100
101
102
103
104 *
105 * its value may be changed via the {@link #replaceWith(byte[])
106 * replaceWith} method.
107 *
108 * <p> The default action for malformed-input and unmappable-character errors
109 * is to {@linkplain CodingErrorAction#REPORT report} them. The
110 * malformed-input error action may be changed via the {@link
111 * #onMalformedInput(CodingErrorAction) onMalformedInput} method; the
112 * unmappable-character action may be changed via the {@link
113 * #onUnmappableCharacter(CodingErrorAction) onUnmappableCharacter} method.
114 *
115 * <p> This class is designed to handle many of the details of the encoding
116 * process, including the implementation of error actions. An encoder for a
117 * specific charset, which is a concrete subclass of this class, need only
118 * implement the abstract {@link #encodeLoop encodeLoop} method, which
119 * encapsulates the basic encoding loop. A subclass that maintains internal
120 * state should, additionally, override the {@link #implFlush implFlush} and
121 * {@link #implReset implReset} methods.
122 *
123 * <p> Instances of this class are not safe for use by multiple concurrent
124 * threads. </p>
125 *
126 *
127 * @author Mark Reinhold
128 * @author JSR-51 Expert Group
129 * @since 1.4
130 *
131 * @see ByteBuffer
132 * @see CharBuffer
133 * @see Charset
134 * @see CharsetDecoder
135 */
136
137 public abstract class CharsetEncoder {
138
139 private final Charset charset;
140 private final float averageBytesPerChar;
141 private final float maxBytesPerChar;
142
143 private byte[] replacement;
144 private CodingErrorAction malformedInputAction
145 = CodingErrorAction.REPORT;
146 private CodingErrorAction unmappableCharacterAction
147 = CodingErrorAction.REPORT;
148
149 // Internal states
150 //
151 private static final int ST_RESET = 0;
152 private static final int ST_CODING = 1;
153 private static final int ST_END = 2;
154 private static final int ST_FLUSHED = 3;
155
156 private int state = ST_RESET;
157
158 private static String stateNames[]
159 = { "RESET", "CODING", "CODING_END", "FLUSHED" };
160
161
162 /**
163 * Initializes a new encoder. The new encoder will have the given
164 * bytes-per-char and replacement values.
165 *
166 * @param cs
167 * The charset that created this encoder
168 *
169 * @param averageBytesPerChar
170 * A positive float value indicating the expected number of
171 * bytes that will be produced for each input character
172 *
173 * @param maxBytesPerChar
174 * A positive float value indicating the maximum number of
175 * bytes that will be produced for each input character
176 *
177 * @param replacement
178 * The initial replacement; must not be {@code null}, must have
179 * non-zero length, must not be longer than maxBytesPerChar,
180 * and must be {@linkplain #isLegalReplacement legal}
181 *
182 * @throws IllegalArgumentException
183 * If the preconditions on the parameters do not hold
184 */
185 protected
186 CharsetEncoder(Charset cs,
187 float averageBytesPerChar,
188 float maxBytesPerChar,
189 byte[] replacement)
190 {
191 this.charset = cs;
192 if (averageBytesPerChar <= 0.0f)
193 throw new IllegalArgumentException("Non-positive "
194 + "averageBytesPerChar");
195 if (maxBytesPerChar <= 0.0f)
196 throw new IllegalArgumentException("Non-positive "
197 + "maxBytesPerChar");
198 if (averageBytesPerChar > maxBytesPerChar)
199 throw new IllegalArgumentException("averageBytesPerChar"
200 + " exceeds "
201 + "maxBytesPerChar");
202 this.replacement = replacement;
203 this.averageBytesPerChar = averageBytesPerChar;
204 this.maxBytesPerChar = maxBytesPerChar;
205 replaceWith(replacement);
206 }
207
208 /**
209 * Initializes a new encoder. The new encoder will have the given
210 * bytes-per-char values and its replacement will be the
211 * byte array <code>{</code> <code>(byte)'?'</code> <code>}</code>.
212 *
213 * @param cs
214 * The charset that created this encoder
215 *
216 * @param averageBytesPerChar
217 * A positive float value indicating the expected number of
218 * bytes that will be produced for each input character
219 *
220 * @param maxBytesPerChar
221 * A positive float value indicating the maximum number of
222 * bytes that will be produced for each input character
223 *
224 * @throws IllegalArgumentException
225 * If the preconditions on the parameters do not hold
226 */
227 protected CharsetEncoder(Charset cs,
228 float averageBytesPerChar,
229 float maxBytesPerChar)
230 {
231 this(cs,
232 averageBytesPerChar, maxBytesPerChar,
233 new byte[] { (byte)'?' });
234 }
235
236 /**
237 * Returns the charset that created this encoder.
238 *
239 * @return This encoder's charset
240 */
241 public final Charset charset() {
242 return charset;
243 }
244
245 /**
246 * Returns this encoder's replacement value.
247 *
248 * @return This encoder's current replacement,
249 * which is never {@code null} and is never empty
250 */
251 public final byte[] replacement() {
252
253
254
255
256 return Arrays.copyOf(replacement, replacement.length);
257
258 }
259
260 /**
261 * Changes this encoder's replacement value.
262 *
263 * <p> This method invokes the {@link #implReplaceWith implReplaceWith}
264 * method, passing the new replacement, after checking that the new
265 * replacement is acceptable. </p>
266 *
267 * @param newReplacement The new replacement; must not be
268 * {@code null}, must have non-zero length,
269
270
271
272
273
274 * must not be longer than the value returned by the
275 * {@link #maxBytesPerChar() maxBytesPerChar} method, and
276 * must be {@link #isLegalReplacement legal}
277
278 *
279 * @return This encoder
280 *
281 * @throws IllegalArgumentException
282 * If the preconditions on the parameter do not hold
283 */
284 public final CharsetEncoder replaceWith(byte[] newReplacement) {
285 if (newReplacement == null)
286 throw new IllegalArgumentException("Null replacement");
287 int len = newReplacement.length;
288 if (len == 0)
289 throw new IllegalArgumentException("Empty replacement");
290 if (len > maxBytesPerChar)
291 throw new IllegalArgumentException("Replacement too long");
292
293
294
295
296 if (!isLegalReplacement(newReplacement))
297 throw new IllegalArgumentException("Illegal replacement");
298 this.replacement = Arrays.copyOf(newReplacement, newReplacement.length);
299
300 implReplaceWith(this.replacement);
301 return this;
302 }
303
304 /**
305 * Reports a change to this encoder's replacement value.
306 *
307 * <p> The default implementation of this method does nothing. This method
308 * should be overridden by encoders that require notification of changes to
309 * the replacement. </p>
310 *
311 * @param newReplacement The replacement value
312 */
313 protected void implReplaceWith(byte[] newReplacement) {
314 }
315
316
317
318 private WeakReference<CharsetDecoder> cachedDecoder = null;
319
320 /**
321 * Tells whether or not the given byte array is a legal replacement value
322 * for this encoder.
323 *
324 * <p> A replacement is legal if, and only if, it is a legal sequence of
325 * bytes in this encoder's charset; that is, it must be possible to decode
326 * the replacement into one or more sixteen-bit Unicode characters.
327 *
328 * <p> The default implementation of this method is not very efficient; it
329 * should generally be overridden to improve performance. </p>
330 *
331 * @param repl The byte array to be tested
332 *
333 * @return {@code true} if, and only if, the given byte array
334 * is a legal replacement value for this encoder
335 */
336 public boolean isLegalReplacement(byte[] repl) {
337 WeakReference<CharsetDecoder> wr = cachedDecoder;
338 CharsetDecoder dec = null;
339 if ((wr == null) || ((dec = wr.get()) == null)) {
340 dec = charset().newDecoder();
341 dec.onMalformedInput(CodingErrorAction.REPORT);
342 dec.onUnmappableCharacter(CodingErrorAction.REPORT);
343 cachedDecoder = new WeakReference<CharsetDecoder>(dec);
344 } else {
345 dec.reset();
346 }
347 ByteBuffer bb = ByteBuffer.wrap(repl);
348 CharBuffer cb = CharBuffer.allocate((int)(bb.remaining()
349 * dec.maxCharsPerByte()));
350 CoderResult cr = dec.decode(bb, cb, true);
351 return !cr.isError();
352 }
353
354
355
356 /**
357 * Returns this encoder's current action for malformed-input errors.
358 *
359 * @return The current malformed-input action, which is never {@code null}
360 */
361 public CodingErrorAction malformedInputAction() {
362 return malformedInputAction;
363 }
364
365 /**
366 * Changes this encoder's action for malformed-input errors.
367 *
368 * <p> This method invokes the {@link #implOnMalformedInput
369 * implOnMalformedInput} method, passing the new action. </p>
370 *
371 * @param newAction The new action; must not be {@code null}
372 *
373 * @return This encoder
374 *
375 * @throws IllegalArgumentException
376 * If the precondition on the parameter does not hold
377 */
378 public final CharsetEncoder onMalformedInput(CodingErrorAction newAction) {
379 if (newAction == null)
380 throw new IllegalArgumentException("Null action");
381 malformedInputAction = newAction;
382 implOnMalformedInput(newAction);
383 return this;
384 }
385
386 /**
387 * Reports a change to this encoder's malformed-input action.
388 *
389 * <p> The default implementation of this method does nothing. This method
390 * should be overridden by encoders that require notification of changes to
391 * the malformed-input action. </p>
392 *
393 * @param newAction The new action
394 */
395 protected void implOnMalformedInput(CodingErrorAction newAction) { }
396
397 /**
398 * Returns this encoder's current action for unmappable-character errors.
399 *
400 * @return The current unmappable-character action, which is never
401 * {@code null}
402 */
403 public CodingErrorAction unmappableCharacterAction() {
404 return unmappableCharacterAction;
405 }
406
407 /**
408 * Changes this encoder's action for unmappable-character errors.
409 *
410 * <p> This method invokes the {@link #implOnUnmappableCharacter
411 * implOnUnmappableCharacter} method, passing the new action. </p>
412 *
413 * @param newAction The new action; must not be {@code null}
414 *
415 * @return This encoder
416 *
417 * @throws IllegalArgumentException
418 * If the precondition on the parameter does not hold
419 */
420 public final CharsetEncoder onUnmappableCharacter(CodingErrorAction
421 newAction)
422 {
423 if (newAction == null)
424 throw new IllegalArgumentException("Null action");
425 unmappableCharacterAction = newAction;
426 implOnUnmappableCharacter(newAction);
427 return this;
428 }
429
430 /**
431 * Reports a change to this encoder's unmappable-character action.
432 *
433 * <p> The default implementation of this method does nothing. This method
434 * should be overridden by encoders that require notification of changes to
435 * the unmappable-character action. </p>
436 *
437 * @param newAction The new action
438 */
439 protected void implOnUnmappableCharacter(CodingErrorAction newAction) { }
440
441 /**
442 * Returns the average number of bytes that will be produced for each
443 * character of input. This heuristic value may be used to estimate the size
444 * of the output buffer required for a given input sequence.
445 *
446 * @return The average number of bytes produced
447 * per character of input
448 */
449 public final float averageBytesPerChar() {
450 return averageBytesPerChar;
451 }
452
453 /**
454 * Returns the maximum number of bytes that will be produced for each
455 * character of input. This value may be used to compute the worst-case size
456 * of the output buffer required for a given input sequence.
457 *
458 * @return The maximum number of bytes that will be produced per
459 * character of input
460 */
461 public final float maxBytesPerChar() {
462 return maxBytesPerChar;
463 }
464
465 /**
466 * Encodes as many characters as possible from the given input buffer,
467 * writing the results to the given output buffer.
468 *
469 * <p> The buffers are read from, and written to, starting at their current
470 * positions. At most {@link Buffer#remaining in.remaining()} characters
471 * will be read and at most {@link Buffer#remaining out.remaining()}
472 * bytes will be written. The buffers' positions will be advanced to
473 * reflect the characters read and the bytes written, but their marks and
474 * limits will not be modified.
475 *
476 * <p> In addition to reading characters from the input buffer and writing
477 * bytes to the output buffer, this method returns a {@link CoderResult}
478 * object to describe its reason for termination:
479 *
480 * <ul>
481 *
482 * <li><p> {@link CoderResult#UNDERFLOW} indicates that as much of the
483 * input buffer as possible has been encoded. If there is no further
484 * input then the invoker can proceed to the next step of the
485 * <a href="#steps">encoding operation</a>. Otherwise this method
486 * should be invoked again with further input. </p></li>
487 *
488 * <li><p> {@link CoderResult#OVERFLOW} indicates that there is
489 * insufficient space in the output buffer to encode any more characters.
490 * This method should be invoked again with an output buffer that has
491 * more {@linkplain Buffer#remaining remaining} bytes. This is
492 * typically done by draining any encoded bytes from the output
493 * buffer. </p></li>
494 *
495 * <li><p> A {@linkplain CoderResult#malformedForLength
496 * malformed-input} result indicates that a malformed-input
497 * error has been detected. The malformed characters begin at the input
498 * buffer's (possibly incremented) position; the number of malformed
499 * characters may be determined by invoking the result object's {@link
500 * CoderResult#length() length} method. This case applies only if the
501 * {@linkplain #onMalformedInput malformed action} of this encoder
502 * is {@link CodingErrorAction#REPORT}; otherwise the malformed input
503 * will be ignored or replaced, as requested. </p></li>
504 *
505 * <li><p> An {@linkplain CoderResult#unmappableForLength
506 * unmappable-character} result indicates that an
507 * unmappable-character error has been detected. The characters that
508 * encode the unmappable character begin at the input buffer's (possibly
509 * incremented) position; the number of such characters may be determined
510 * by invoking the result object's {@link CoderResult#length() length}
511 * method. This case applies only if the {@linkplain #onUnmappableCharacter
512 * unmappable action} of this encoder is {@link
513 * CodingErrorAction#REPORT}; otherwise the unmappable character will be
514 * ignored or replaced, as requested. </p></li>
515 *
516 * </ul>
517 *
518 * In any case, if this method is to be reinvoked in the same encoding
519 * operation then care should be taken to preserve any characters remaining
520 * in the input buffer so that they are available to the next invocation.
521 *
522 * <p> The {@code endOfInput} parameter advises this method as to whether
523 * the invoker can provide further input beyond that contained in the given
524 * input buffer. If there is a possibility of providing additional input
525 * then the invoker should pass {@code false} for this parameter; if there
526 * is no possibility of providing further input then the invoker should
527 * pass {@code true}. It is not erroneous, and in fact it is quite
528 * common, to pass {@code false} in one invocation and later discover that
529 * no further input was actually available. It is critical, however, that
530 * the final invocation of this method in a sequence of invocations always
531 * pass {@code true} so that any remaining unencoded input will be treated
532 * as being malformed.
533 *
534 * <p> This method works by invoking the {@link #encodeLoop encodeLoop}
535 * method, interpreting its results, handling error conditions, and
536 * reinvoking it as necessary. </p>
537 *
538 *
539 * @param in
540 * The input character buffer
541 *
542 * @param out
543 * The output byte buffer
544 *
545 * @param endOfInput
546 * {@code true} if, and only if, the invoker can provide no
547 * additional input characters beyond those in the given buffer
548 *
549 * @return A coder-result object describing the reason for termination
550 *
551 * @throws IllegalStateException
552 * If an encoding operation is already in progress and the previous
553 * step was an invocation neither of the {@link #reset reset}
554 * method, nor of this method with a value of {@code false} for
555 * the {@code endOfInput} parameter, nor of this method with a
556 * value of {@code true} for the {@code endOfInput} parameter
557 * but a return value indicating an incomplete encoding operation
558 *
559 * @throws CoderMalfunctionError
560 * If an invocation of the encodeLoop method threw
561 * an unexpected exception
562 */
563 public final CoderResult encode(CharBuffer in, ByteBuffer out,
564 boolean endOfInput)
565 {
566 int newState = endOfInput ? ST_END : ST_CODING;
567 if ((state != ST_RESET) && (state != ST_CODING)
568 && !(endOfInput && (state == ST_END)))
569 throwIllegalStateException(state, newState);
570 state = newState;
571
572 for (;;) {
573
574 CoderResult cr;
575 try {
576 cr = encodeLoop(in, out);
577 } catch (BufferUnderflowException x) {
578 throw new CoderMalfunctionError(x);
579 } catch (BufferOverflowException x) {
580 throw new CoderMalfunctionError(x);
581 }
582
583 if (cr.isOverflow())
584 return cr;
585
586 if (cr.isUnderflow()) {
587 if (endOfInput && in.hasRemaining()) {
588 cr = CoderResult.malformedForLength(in.remaining());
589 // Fall through to malformed-input case
590 } else {
591 return cr;
592 }
593 }
594
595 CodingErrorAction action = null;
596 if (cr.isMalformed())
597 action = malformedInputAction;
598 else if (cr.isUnmappable())
599 action = unmappableCharacterAction;
600 else
601 assert false : cr.toString();
602
603 if (action == CodingErrorAction.REPORT)
604 return cr;
605
606 if (action == CodingErrorAction.REPLACE) {
607 if (out.remaining() < replacement.length)
608 return CoderResult.OVERFLOW;
609 out.put(replacement);
610 }
611
612 if ((action == CodingErrorAction.IGNORE)
613 || (action == CodingErrorAction.REPLACE)) {
614 // Skip erroneous input either way
615 in.position(in.position() + cr.length());
616 continue;
617 }
618
619 assert false;
620 }
621
622 }
623
624 /**
625 * Flushes this encoder.
626 *
627 * <p> Some encoders maintain internal state and may need to write some
628 * final bytes to the output buffer once the overall input sequence has
629 * been read.
630 *
631 * <p> Any additional output is written to the output buffer beginning at
632 * its current position. At most {@link Buffer#remaining out.remaining()}
633 * bytes will be written. The buffer's position will be advanced
634 * appropriately, but its mark and limit will not be modified.
635 *
636 * <p> If this method completes successfully then it returns {@link
637 * CoderResult#UNDERFLOW}. If there is insufficient room in the output
638 * buffer then it returns {@link CoderResult#OVERFLOW}. If this happens
639 * then this method must be invoked again, with an output buffer that has
640 * more room, in order to complete the current <a href="#steps">encoding
641 * operation</a>.
642 *
643 * <p> If this encoder has already been flushed then invoking this method
644 * has no effect.
645 *
646 * <p> This method invokes the {@link #implFlush implFlush} method to
647 * perform the actual flushing operation. </p>
648 *
649 * @param out
650 * The output byte buffer
651 *
652 * @return A coder-result object, either {@link CoderResult#UNDERFLOW} or
653 * {@link CoderResult#OVERFLOW}
654 *
655 * @throws IllegalStateException
656 * If the previous step of the current encoding operation was an
657 * invocation neither of the {@link #flush flush} method nor of
658 * the three-argument {@link
659 * #encode(CharBuffer,ByteBuffer,boolean) encode} method
660 * with a value of {@code true} for the {@code endOfInput}
661 * parameter
662 */
663 public final CoderResult flush(ByteBuffer out) {
664 if (state == ST_END) {
665 CoderResult cr = implFlush(out);
666 if (cr.isUnderflow())
667 state = ST_FLUSHED;
668 return cr;
669 }
670
671 if (state != ST_FLUSHED)
672 throwIllegalStateException(state, ST_FLUSHED);
673
674 return CoderResult.UNDERFLOW; // Already flushed
675 }
676
677 /**
678 * Flushes this encoder.
679 *
680 * <p> The default implementation of this method does nothing, and always
681 * returns {@link CoderResult#UNDERFLOW}. This method should be overridden
682 * by encoders that may need to write final bytes to the output buffer
683 * once the entire input sequence has been read. </p>
684 *
685 * @param out
686 * The output byte buffer
687 *
688 * @return A coder-result object, either {@link CoderResult#UNDERFLOW} or
689 * {@link CoderResult#OVERFLOW}
690 */
691 protected CoderResult implFlush(ByteBuffer out) {
692 return CoderResult.UNDERFLOW;
693 }
694
695 /**
696 * Resets this encoder, clearing any internal state.
697 *
698 * <p> This method resets charset-independent state and also invokes the
699 * {@link #implReset() implReset} method in order to perform any
700 * charset-specific reset actions. </p>
701 *
702 * @return This encoder
703 *
704 */
705 public final CharsetEncoder reset() {
706 implReset();
707 state = ST_RESET;
708 return this;
709 }
710
711 /**
712 * Resets this encoder, clearing any charset-specific internal state.
713 *
714 * <p> The default implementation of this method does nothing. This method
715 * should be overridden by encoders that maintain internal state. </p>
716 */
717 protected void implReset() { }
718
719 /**
720 * Encodes one or more characters into one or more bytes.
721 *
722 * <p> This method encapsulates the basic encoding loop, encoding as many
723 * characters as possible until it either runs out of input, runs out of room
724 * in the output buffer, or encounters an encoding error. This method is
725 * invoked by the {@link #encode encode} method, which handles result
726 * interpretation and error recovery.
727 *
728 * <p> The buffers are read from, and written to, starting at their current
729 * positions. At most {@link Buffer#remaining in.remaining()} characters
730 * will be read, and at most {@link Buffer#remaining out.remaining()}
731 * bytes will be written. The buffers' positions will be advanced to
732 * reflect the characters read and the bytes written, but their marks and
733 * limits will not be modified.
734 *
735 * <p> This method returns a {@link CoderResult} object to describe its
736 * reason for termination, in the same manner as the {@link #encode encode}
737 * method. Most implementations of this method will handle encoding errors
738 * by returning an appropriate result object for interpretation by the
739 * {@link #encode encode} method. An optimized implementation may instead
740 * examine the relevant error action and implement that action itself.
741 *
742 * <p> An implementation of this method may perform arbitrary lookahead by
743 * returning {@link CoderResult#UNDERFLOW} until it receives sufficient
744 * input. </p>
745 *
746 * @param in
747 * The input character buffer
748 *
749 * @param out
750 * The output byte buffer
751 *
752 * @return A coder-result object describing the reason for termination
753 */
754 protected abstract CoderResult encodeLoop(CharBuffer in,
755 ByteBuffer out);
756
757 /**
758 * Convenience method that encodes the remaining content of a single input
759 * character buffer into a newly-allocated byte buffer.
760 *
761 * <p> This method implements an entire <a href="#steps">encoding
762 * operation</a>; that is, it resets this encoder, then it encodes the
763 * characters in the given character buffer, and finally it flushes this
764 * encoder. This method should therefore not be invoked if an encoding
765 * operation is already in progress. </p>
766 *
767 * @param in
768 * The input character buffer
769 *
770 * @return A newly-allocated byte buffer containing the result of the
771 * encoding operation. The buffer's position will be zero and its
772 * limit will follow the last byte written.
773 *
774 * @throws IllegalStateException
775 * If an encoding operation is already in progress
776 *
777 * @throws MalformedInputException
778 * If the character sequence starting at the input buffer's current
779 * position is not a legal sixteen-bit Unicode sequence and the current malformed-input action
780 * is {@link CodingErrorAction#REPORT}
781 *
782 * @throws UnmappableCharacterException
783 * If the character sequence starting at the input buffer's current
784 * position cannot be mapped to an equivalent byte sequence and
785 * the current unmappable-character action is {@link
786 * CodingErrorAction#REPORT}
787 */
788 public final ByteBuffer encode(CharBuffer in)
789 throws CharacterCodingException
790 {
791 int n = (int)(in.remaining() * averageBytesPerChar());
792 ByteBuffer out = ByteBuffer.allocate(n);
793
794 if ((n == 0) && (in.remaining() == 0))
795 return out;
796 reset();
797 for (;;) {
798 CoderResult cr = in.hasRemaining() ?
799 encode(in, out, true) : CoderResult.UNDERFLOW;
800 if (cr.isUnderflow())
801 cr = flush(out);
802
803 if (cr.isUnderflow())
804 break;
805 if (cr.isOverflow()) {
806 n = 2*n + 1; // Ensure progress; n might be 0!
807 ByteBuffer o = ByteBuffer.allocate(n);
808 out.flip();
809 o.put(out);
810 out = o;
811 continue;
812 }
813 cr.throwException();
814 }
815 out.flip();
816 return out;
817 }
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897 private boolean canEncode(CharBuffer cb) {
898 if (state == ST_FLUSHED)
899 reset();
900 else if (state != ST_RESET)
901 throwIllegalStateException(state, ST_CODING);
902 CodingErrorAction ma = malformedInputAction();
903 CodingErrorAction ua = unmappableCharacterAction();
904 try {
905 onMalformedInput(CodingErrorAction.REPORT);
906 onUnmappableCharacter(CodingErrorAction.REPORT);
907 encode(cb);
908 } catch (CharacterCodingException x) {
909 return false;
910 } finally {
911 onMalformedInput(ma);
912 onUnmappableCharacter(ua);
913 reset();
914 }
915 return true;
916 }
917
918 /**
919 * Tells whether or not this encoder can encode the given character.
920 *
921 * <p> This method returns {@code false} if the given character is a
922 * surrogate character; such characters can be interpreted only when they
923 * are members of a pair consisting of a high surrogate followed by a low
924 * surrogate. The {@link #canEncode(java.lang.CharSequence)
925 * canEncode(CharSequence)} method may be used to test whether or not a
926 * character sequence can be encoded.
927 *
928 * <p> This method may modify this encoder's state; it should therefore not
929 * be invoked if an <a href="#steps">encoding operation</a> is already in
930 * progress.
931 *
932 * <p> The default implementation of this method is not very efficient; it
933 * should generally be overridden to improve performance. </p>
934 *
935 * @param c
936 * The given character
937 *
938 * @return {@code true} if, and only if, this encoder can encode
939 * the given character
940 *
941 * @throws IllegalStateException
942 * If an encoding operation is already in progress
943 */
944 public boolean canEncode(char c) {
945 CharBuffer cb = CharBuffer.allocate(1);
946 cb.put(c);
947 cb.flip();
948 return canEncode(cb);
949 }
950
951 /**
952 * Tells whether or not this encoder can encode the given character
953 * sequence.
954 *
955 * <p> If this method returns {@code false} for a particular character
956 * sequence then more information about why the sequence cannot be encoded
957 * may be obtained by performing a full <a href="#steps">encoding
958 * operation</a>.
959 *
960 * <p> This method may modify this encoder's state; it should therefore not
961 * be invoked if an encoding operation is already in progress.
962 *
963 * <p> The default implementation of this method is not very efficient; it
964 * should generally be overridden to improve performance. </p>
965 *
966 * @param cs
967 * The given character sequence
968 *
969 * @return {@code true} if, and only if, this encoder can encode
970 * the given character without throwing any exceptions and without
971 * performing any replacements
972 *
973 * @throws IllegalStateException
974 * If an encoding operation is already in progress
975 */
976 public boolean canEncode(CharSequence cs) {
977 CharBuffer cb;
978 if (cs instanceof CharBuffer)
979 cb = ((CharBuffer)cs).duplicate();
980 else
981 cb = CharBuffer.wrap(cs.toString());
982 return canEncode(cb);
983 }
984
985
986
987
988 private void throwIllegalStateException(int from, int to) {
989 throw new IllegalStateException("Current state = " + stateNames[from]
990 + ", new state = " + stateNames[to]);
991 }
992
993 }
994