1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.tomcat.util.buf;
18
19 import java.io.IOException;
20 import java.io.ObjectInputStream;
21 import java.io.ObjectOutputStream;
22 import java.nio.ByteBuffer;
23 import java.nio.CharBuffer;
24 import java.nio.charset.Charset;
25 import java.nio.charset.StandardCharsets;
26
27 /*
28 * In a server it is very important to be able to operate on
29 * the original byte[] without converting everything to chars.
30 * Some protocols are ASCII only, and some allow different
31 * non-UNICODE encodings. The encoding is not known beforehand,
32 * and can even change during the execution of the protocol.
33 * ( for example a multipart message may have parts with different
34 * encoding )
35 *
36 * For HTTP it is not very clear how the encoding of RequestURI
37 * and mime values can be determined, but it is a great advantage
38 * to be able to parse the request without converting to string.
39 */
40
41 // TODO: This class could either extend ByteBuffer, or better a ByteBuffer
42 // inside this way it could provide the search/etc on ByteBuffer, as a helper.
43
44 /**
45 * This class is used to represent a chunk of bytes, and utilities to manipulate
46 * byte[].
47 *
48 * The buffer can be modified and used for both input and output.
49 *
50 * There are 2 modes: The chunk can be associated with a sink - ByteInputChannel
51 * or ByteOutputChannel, which will be used when the buffer is empty (on input)
52 * or filled (on output). For output, it can also grow. This operating mode is
53 * selected by calling setLimit() or allocate(initial, limit) with limit != -1.
54 *
55 * Various search and append method are defined - similar with String and
56 * StringBuffer, but operating on bytes.
57 *
58 * This is important because it allows processing the http headers directly on
59 * the received bytes, without converting to chars and Strings until the strings
60 * are needed. In addition, the charset is determined later, from headers or
61 * user code.
62 *
63 * @author dac@sun.com
64 * @author James Todd [gonzo@sun.com]
65 * @author Costin Manolache
66 * @author Remy Maucherat
67 */
68 public final class ByteChunk extends AbstractChunk {
69
70 private static final long serialVersionUID = 1L;
71
72 /**
73 * Input interface, used when the buffer is empty.
74 *
75 * Same as java.nio.channels.ReadableByteChannel
76 */
77 public static interface ByteInputChannel {
78
79 /**
80 * Read new bytes.
81 *
82 * @return The number of bytes read
83 *
84 * @throws IOException If an I/O error occurs during reading
85 */
86 public int realReadBytes() throws IOException;
87 }
88
89 /**
90 * When we need more space we'll either grow the buffer ( up to the limit )
91 * or send it to a channel.
92 *
93 * Same as java.nio.channel.WritableByteChannel.
94 */
95 public static interface ByteOutputChannel {
96
97 /**
98 * Send the bytes ( usually the internal conversion buffer ). Expect 8k
99 * output if the buffer is full.
100 *
101 * @param buf bytes that will be written
102 * @param off offset in the bytes array
103 * @param len length that will be written
104 * @throws IOException If an I/O occurs while writing the bytes
105 */
106 public void realWriteBytes(byte buf[], int off, int len) throws IOException;
107
108
109 /**
110 * Send the bytes ( usually the internal conversion buffer ). Expect 8k
111 * output if the buffer is full.
112 *
113 * @param from bytes that will be written
114 * @throws IOException If an I/O occurs while writing the bytes
115 */
116 public void realWriteBytes(ByteBuffer from) throws IOException;
117 }
118
119 // --------------------
120
121 /**
122 * Default encoding used to convert to strings. It should be UTF8, as most
123 * standards seem to converge, but the servlet API requires 8859_1, and this
124 * object is used mostly for servlets.
125 */
126 public static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
127
128 private transient Charset charset;
129
130 // byte[]
131 private byte[] buff;
132
133 // transient as serialization is primarily for values via, e.g. JMX
134 private transient ByteInputChannel in = null;
135 private transient ByteOutputChannel out = null;
136
137
138 /**
139 * Creates a new, uninitialized ByteChunk object.
140 */
141 public ByteChunk() {
142 }
143
144
145 public ByteChunk(int initial) {
146 allocate(initial, -1);
147 }
148
149
150 private void writeObject(ObjectOutputStream oos) throws IOException {
151 oos.defaultWriteObject();
152 oos.writeUTF(getCharset().name());
153 }
154
155
156 private void readObject(ObjectInputStream ois) throws ClassNotFoundException, IOException {
157 ois.defaultReadObject();
158 this.charset = Charset.forName(ois.readUTF());
159 }
160
161
162 @Override
163 public Object clone() throws CloneNotSupportedException {
164 return super.clone();
165 }
166
167
168 @Override
169 public void recycle() {
170 super.recycle();
171 charset = null;
172 }
173
174
175 // -------------------- Setup --------------------
176
177 public void allocate(int initial, int limit) {
178 if (buff == null || buff.length < initial) {
179 buff = new byte[initial];
180 }
181 setLimit(limit);
182 start = 0;
183 end = 0;
184 isSet = true;
185 hasHashCode = false;
186 }
187
188
189 /**
190 * Sets the buffer to the specified subarray of bytes.
191 *
192 * @param b the ascii bytes
193 * @param off the start offset of the bytes
194 * @param len the length of the bytes
195 */
196 public void setBytes(byte[] b, int off, int len) {
197 buff = b;
198 start = off;
199 end = start + len;
200 isSet = true;
201 hasHashCode = false;
202 }
203
204
205 public void setCharset(Charset charset) {
206 this.charset = charset;
207 }
208
209
210 public Charset getCharset() {
211 if (charset == null) {
212 charset = DEFAULT_CHARSET;
213 }
214 return charset;
215 }
216
217
218 /**
219 * @return the buffer.
220 */
221 public byte[] getBytes() {
222 return getBuffer();
223 }
224
225
226 /**
227 * @return the buffer.
228 */
229 public byte[] getBuffer() {
230 return buff;
231 }
232
233
234 /**
235 * When the buffer is empty, read the data from the input channel.
236 *
237 * @param in The input channel
238 */
239 public void setByteInputChannel(ByteInputChannel in) {
240 this.in = in;
241 }
242
243
244 /**
245 * When the buffer is full, write the data to the output channel. Also used
246 * when large amount of data is appended. If not set, the buffer will grow
247 * to the limit.
248 *
249 * @param out The output channel
250 */
251 public void setByteOutputChannel(ByteOutputChannel out) {
252 this.out = out;
253 }
254
255
256 // -------------------- Adding data to the buffer --------------------
257
258 public void append(byte b) throws IOException {
259 makeSpace(1);
260 int limit = getLimitInternal();
261
262 // couldn't make space
263 if (end >= limit) {
264 flushBuffer();
265 }
266 buff[end++] = b;
267 }
268
269
270 public void append(ByteChunk src) throws IOException {
271 append(src.getBytes(), src.getStart(), src.getLength());
272 }
273
274
275 /**
276 * Add data to the buffer.
277 *
278 * @param src Bytes array
279 * @param off Offset
280 * @param len Length
281 * @throws IOException Writing overflow data to the output channel failed
282 */
283 public void append(byte src[], int off, int len) throws IOException {
284 // will grow, up to limit
285 makeSpace(len);
286 int limit = getLimitInternal();
287
288 // Optimize on a common case.
289 // If the buffer is empty and the source is going to fill up all the
290 // space in buffer, may as well write it directly to the output,
291 // and avoid an extra copy
292 if (len == limit && end == start && out != null) {
293 out.realWriteBytes(src, off, len);
294 return;
295 }
296
297 // if we are below the limit
298 if (len <= limit - end) {
299 System.arraycopy(src, off, buff, end, len);
300 end += len;
301 return;
302 }
303
304 // Need more space than we can afford, need to flush buffer.
305
306 // The buffer is already at (or bigger than) limit.
307
308 // We chunk the data into slices fitting in the buffer limit, although
309 // if the data is written directly if it doesn't fit.
310
311 int avail = limit - end;
312 System.arraycopy(src, off, buff, end, avail);
313 end += avail;
314
315 flushBuffer();
316
317 int remain = len - avail;
318
319 while (remain > (limit - end)) {
320 out.realWriteBytes(src, (off + len) - remain, limit - end);
321 remain = remain - (limit - end);
322 }
323
324 System.arraycopy(src, (off + len) - remain, buff, end, remain);
325 end += remain;
326 }
327
328
329 /**
330 * Add data to the buffer.
331 *
332 * @param from the ByteBuffer with the data
333 * @throws IOException Writing overflow data to the output channel failed
334 */
335 public void append(ByteBuffer from) throws IOException {
336 int len = from.remaining();
337
338 // will grow, up to limit
339 makeSpace(len);
340 int limit = getLimitInternal();
341
342 // Optimize on a common case.
343 // If the buffer is empty and the source is going to fill up all the
344 // space in buffer, may as well write it directly to the output,
345 // and avoid an extra copy
346 if (len == limit && end == start && out != null) {
347 out.realWriteBytes(from);
348 from.position(from.limit());
349 return;
350 }
351 // if we have limit and we're below
352 if (len <= limit - end) {
353 // makeSpace will grow the buffer to the limit,
354 // so we have space
355 from.get(buff, end, len);
356 end += len;
357 return;
358 }
359
360 // need more space than we can afford, need to flush
361 // buffer
362
363 // the buffer is already at ( or bigger than ) limit
364
365 // We chunk the data into slices fitting in the buffer limit, although
366 // if the data is written directly if it doesn't fit
367
368 int avail = limit - end;
369 from.get(buff, end, avail);
370 end += avail;
371
372 flushBuffer();
373
374 int fromLimit = from.limit();
375 int remain = len - avail;
376 avail = limit - end;
377 while (remain >= avail) {
378 from.limit(from.position() + avail);
379 out.realWriteBytes(from);
380 from.position(from.limit());
381 remain = remain - avail;
382 }
383
384 from.limit(fromLimit);
385 from.get(buff, end, remain);
386 end += remain;
387 }
388
389
390 // -------------------- Removing data from the buffer --------------------
391
392 /*
393 * @deprecated Use {@link #subtract()}.
394 * This method will be removed in Tomcat 10
395 */
396 @Deprecated
397 public int substract() throws IOException {
398 return subtract();
399 }
400
401 public int subtract() throws IOException {
402 if (checkEof()) {
403 return -1;
404 }
405 return buff[start++] & 0xFF;
406 }
407
408 /*
409 * @deprecated Use {@link #subtractB()}.
410 * This method will be removed in Tomcat 10
411 */
412 @Deprecated
413 public byte substractB() throws IOException {
414 return subtractB();
415 }
416
417 public byte subtractB() throws IOException {
418 if (checkEof()) {
419 return -1;
420 }
421 return buff[start++];
422 }
423
424
425 /*
426 * @deprecated Use {@link #subtract(byte[],int,int)}.
427 * This method will be removed in Tomcat 10
428 */
429 @Deprecated
430 public int substract(byte dest[], int off, int len) throws IOException {
431 return subtract(dest, off, len);
432 }
433
434 public int subtract(byte dest[], int off, int len) throws IOException {
435 if (checkEof()) {
436 return -1;
437 }
438 int n = len;
439 if (len > getLength()) {
440 n = getLength();
441 }
442 System.arraycopy(buff, start, dest, off, n);
443 start += n;
444 return n;
445 }
446
447
448 /**
449 * Transfers bytes from the buffer to the specified ByteBuffer. After the
450 * operation the position of the ByteBuffer will be returned to the one
451 * before the operation, the limit will be the position incremented by the
452 * number of the transfered bytes.
453 *
454 * @param to the ByteBuffer into which bytes are to be written.
455 * @return an integer specifying the actual number of bytes read, or -1 if
456 * the end of the stream is reached
457 * @throws IOException if an input or output exception has occurred
458 *
459 * @deprecated Use {@link #subtract(ByteBuffer)}.
460 * This method will be removed in Tomcat 10
461 */
462 @Deprecated
463 public int substract(ByteBuffer to) throws IOException {
464 return subtract(to);
465 }
466
467
468 /**
469 * Transfers bytes from the buffer to the specified ByteBuffer. After the
470 * operation the position of the ByteBuffer will be returned to the one
471 * before the operation, the limit will be the position incremented by the
472 * number of the transfered bytes.
473 *
474 * @param to the ByteBuffer into which bytes are to be written.
475 * @return an integer specifying the actual number of bytes read, or -1 if
476 * the end of the stream is reached
477 * @throws IOException if an input or output exception has occurred
478 */
479 public int subtract(ByteBuffer to) throws IOException {
480 if (checkEof()) {
481 return -1;
482 }
483 int n = Math.min(to.remaining(), getLength());
484 to.put(buff, start, n);
485 to.limit(to.position());
486 to.position(to.position() - n);
487 start += n;
488 return n;
489 }
490
491
492 private boolean checkEof() throws IOException {
493 if ((end - start) == 0) {
494 if (in == null) {
495 return true;
496 }
497 int n = in.realReadBytes();
498 if (n < 0) {
499 return true;
500 }
501 }
502 return false;
503 }
504
505
506 /**
507 * Send the buffer to the sink. Called by append() when the limit is
508 * reached. You can also call it explicitly to force the data to be written.
509 *
510 * @throws IOException Writing overflow data to the output channel failed
511 */
512 public void flushBuffer() throws IOException {
513 // assert out!=null
514 if (out == null) {
515 throw new IOException(sm.getString(
516 "chunk.overflow", Integer.valueOf(getLimit()), Integer.valueOf(buff.length)));
517 }
518 out.realWriteBytes(buff, start, end - start);
519 end = start;
520 }
521
522
523 /**
524 * Make space for len bytes. If len is small, allocate a reserve space too.
525 * Never grow bigger than the limit or {@link AbstractChunk#ARRAY_MAX_SIZE}.
526 *
527 * @param count The size
528 */
529 public void makeSpace(int count) {
530 byte[] tmp = null;
531
532 int limit = getLimitInternal();
533
534 long newSize;
535 long desiredSize = end + count;
536
537 // Can't grow above the limit
538 if (desiredSize > limit) {
539 desiredSize = limit;
540 }
541
542 if (buff == null) {
543 if (desiredSize < 256) {
544 desiredSize = 256; // take a minimum
545 }
546 buff = new byte[(int) desiredSize];
547 }
548
549 // limit < buf.length (the buffer is already big)
550 // or we already have space XXX
551 if (desiredSize <= buff.length) {
552 return;
553 }
554 // grow in larger chunks
555 if (desiredSize < 2L * buff.length) {
556 newSize = buff.length * 2L;
557 } else {
558 newSize = buff.length * 2L + count;
559 }
560
561 if (newSize > limit) {
562 newSize = limit;
563 }
564 tmp = new byte[(int) newSize];
565
566 // Compacts buffer
567 System.arraycopy(buff, start, tmp, 0, end - start);
568 buff = tmp;
569 tmp = null;
570 end = end - start;
571 start = 0;
572 }
573
574
575 // -------------------- Conversion and getters --------------------
576
577 @Override
578 public String toString() {
579 if (isNull()) {
580 return null;
581 } else if (end - start == 0) {
582 return "";
583 }
584 return StringCache.toString(this);
585 }
586
587
588 public String toStringInternal() {
589 if (charset == null) {
590 charset = DEFAULT_CHARSET;
591 }
592 // new String(byte[], int, int, Charset) takes a defensive copy of the
593 // entire byte array. This is expensive if only a small subset of the
594 // bytes will be used. The code below is from Apache Harmony.
595 CharBuffer cb = charset.decode(ByteBuffer.wrap(buff, start, end - start));
596 return new String(cb.array(), cb.arrayOffset(), cb.length());
597 }
598
599
600 public long getLong() {
601 return Ascii.parseLong(buff, start, end - start);
602 }
603
604
605 // -------------------- equals --------------------
606
607 @Override
608 public boolean equals(Object obj) {
609 if (obj instanceof ByteChunk) {
610 return equals((ByteChunk) obj);
611 }
612 return false;
613 }
614
615
616 /**
617 * Compares the message bytes to the specified String object.
618 *
619 * @param s the String to compare
620 * @return <code>true</code> if the comparison succeeded, <code>false</code>
621 * otherwise
622 */
623 public boolean equals(String s) {
624 // XXX ENCODING - this only works if encoding is UTF8-compat
625 // ( ok for tomcat, where we compare ascii - header names, etc )!!!
626
627 byte[] b = buff;
628 int len = end - start;
629 if (b == null || len != s.length()) {
630 return false;
631 }
632 int off = start;
633 for (int i = 0; i < len; i++) {
634 if (b[off++] != s.charAt(i)) {
635 return false;
636 }
637 }
638 return true;
639 }
640
641
642 /**
643 * Compares the message bytes to the specified String object.
644 *
645 * @param s the String to compare
646 * @return <code>true</code> if the comparison succeeded, <code>false</code>
647 * otherwise
648 */
649 public boolean equalsIgnoreCase(String s) {
650 byte[] b = buff;
651 int len = end - start;
652 if (b == null || len != s.length()) {
653 return false;
654 }
655 int off = start;
656 for (int i = 0; i < len; i++) {
657 if (Ascii.toLower(b[off++]) != Ascii.toLower(s.charAt(i))) {
658 return false;
659 }
660 }
661 return true;
662 }
663
664
665 public boolean equals(ByteChunk bb) {
666 return equals(bb.getBytes(), bb.getStart(), bb.getLength());
667 }
668
669
670 public boolean equals(byte b2[], int off2, int len2) {
671 byte b1[] = buff;
672 if (b1 == null && b2 == null) {
673 return true;
674 }
675
676 int len = end - start;
677 if (len != len2 || b1 == null || b2 == null) {
678 return false;
679 }
680
681 int off1 = start;
682
683 while (len-- > 0) {
684 if (b1[off1++] != b2[off2++]) {
685 return false;
686 }
687 }
688 return true;
689 }
690
691
692 public boolean equals(CharChunk cc) {
693 return equals(cc.getChars(), cc.getStart(), cc.getLength());
694 }
695
696
697 public boolean equals(char c2[], int off2, int len2) {
698 // XXX works only for enc compatible with ASCII/UTF !!!
699 byte b1[] = buff;
700 if (c2 == null && b1 == null) {
701 return true;
702 }
703
704 if (b1 == null || c2 == null || end - start != len2) {
705 return false;
706 }
707 int off1 = start;
708 int len = end - start;
709
710 while (len-- > 0) {
711 if ((char) b1[off1++] != c2[off2++]) {
712 return false;
713 }
714 }
715 return true;
716 }
717
718
719 /**
720 * Returns true if the buffer starts with the specified string when tested
721 * in a case sensitive manner.
722 *
723 * @param s the string
724 * @param pos The position
725 *
726 * @return <code>true</code> if the start matches
727 */
728 public boolean startsWith(String s, int pos) {
729 byte[] b = buff;
730 int len = s.length();
731 if (b == null || len + pos > end - start) {
732 return false;
733 }
734 int off = start + pos;
735 for (int i = 0; i < len; i++) {
736 if (b[off++] != s.charAt(i)) {
737 return false;
738 }
739 }
740 return true;
741 }
742
743
744 /**
745 * Returns true if the buffer starts with the specified string when tested
746 * in a case insensitive manner.
747 *
748 * @param s the string
749 * @param pos The position
750 *
751 * @return <code>true</code> if the start matches
752 */
753 public boolean startsWithIgnoreCase(String s, int pos) {
754 byte[] b = buff;
755 int len = s.length();
756 if (b == null || len + pos > end - start) {
757 return false;
758 }
759 int off = start + pos;
760 for (int i = 0; i < len; i++) {
761 if (Ascii.toLower(b[off++]) != Ascii.toLower(s.charAt(i))) {
762 return false;
763 }
764 }
765 return true;
766 }
767
768
769 @Override
770 protected int getBufferElement(int index) {
771 return buff[index];
772 }
773
774
775 /**
776 * Returns the first instance of the given character in this ByteChunk
777 * starting at the specified byte. If the character is not found, -1 is
778 * returned. <br>
779 * NOTE: This only works for characters in the range 0-127.
780 *
781 * @param c The character
782 * @param starting The start position
783 * @return The position of the first instance of the character or -1 if the
784 * character is not found.
785 */
786 public int indexOf(char c, int starting) {
787 int ret = indexOf(buff, start + starting, end, c);
788 return (ret >= start) ? ret - start : -1;
789 }
790
791
792 /**
793 * Returns the first instance of the given character in the given byte array
794 * between the specified start and end. <br>
795 * NOTE: This only works for characters in the range 0-127.
796 *
797 * @param bytes The array to search
798 * @param start The point to start searching from in the array
799 * @param end The point to stop searching in the array
800 * @param s The character to search for
801 * @return The position of the first instance of the character or -1 if the
802 * character is not found.
803 */
804 public static int indexOf(byte bytes[], int start, int end, char s) {
805 int offset = start;
806
807 while (offset < end) {
808 byte b = bytes[offset];
809 if (b == s) {
810 return offset;
811 }
812 offset++;
813 }
814 return -1;
815 }
816
817
818 /**
819 * Returns the first instance of the given byte in the byte array between
820 * the specified start and end.
821 *
822 * @param bytes The byte array to search
823 * @param start The point to start searching from in the byte array
824 * @param end The point to stop searching in the byte array
825 * @param b The byte to search for
826 * @return The position of the first instance of the byte or -1 if the byte
827 * is not found.
828 */
829 public static int findByte(byte bytes[], int start, int end, byte b) {
830 int offset = start;
831 while (offset < end) {
832 if (bytes[offset] == b) {
833 return offset;
834 }
835 offset++;
836 }
837 return -1;
838 }
839
840
841 /**
842 * Returns the first instance of any of the given bytes in the byte array
843 * between the specified start and end.
844 *
845 * @param bytes The byte array to search
846 * @param start The point to start searching from in the byte array
847 * @param end The point to stop searching in the byte array
848 * @param b The array of bytes to search for
849 * @return The position of the first instance of the byte or -1 if the byte
850 * is not found.
851 */
852 public static int findBytes(byte bytes[], int start, int end, byte b[]) {
853 int blen = b.length;
854 int offset = start;
855 while (offset < end) {
856 for (int i = 0; i < blen; i++) {
857 if (bytes[offset] == b[i]) {
858 return offset;
859 }
860 }
861 offset++;
862 }
863 return -1;
864 }
865
866
867 /**
868 * Convert specified String to a byte array. This ONLY WORKS for ascii, UTF
869 * chars will be truncated.
870 *
871 * @param value to convert to byte array
872 * @return the byte array value
873 */
874 public static final byte[] convertToBytes(String value) {
875 byte[] result = new byte[value.length()];
876 for (int i = 0; i < value.length(); i++) {
877 result[i] = (byte) value.charAt(i);
878 }
879 return result;
880 }
881 }
882