1 /*
2 * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 package java.util.regex;
27
28 import java.util.ConcurrentModificationException;
29 import java.util.Iterator;
30 import java.util.NoSuchElementException;
31 import java.util.Objects;
32 import java.util.Spliterator;
33 import java.util.Spliterators;
34 import java.util.function.Consumer;
35 import java.util.function.Function;
36 import java.util.stream.Stream;
37 import java.util.stream.StreamSupport;
38
39 /**
40 * An engine that performs match operations on a {@linkplain
41 * java.lang.CharSequence character sequence} by interpreting a {@link Pattern}.
42 *
43 * <p> A matcher is created from a pattern by invoking the pattern's {@link
44 * Pattern#matcher matcher} method. Once created, a matcher can be used to
45 * perform three different kinds of match operations:
46 *
47 * <ul>
48 *
49 * <li><p> The {@link #matches matches} method attempts to match the entire
50 * input sequence against the pattern. </p></li>
51 *
52 * <li><p> The {@link #lookingAt lookingAt} method attempts to match the
53 * input sequence, starting at the beginning, against the pattern. </p></li>
54 *
55 * <li><p> The {@link #find find} method scans the input sequence looking
56 * for the next subsequence that matches the pattern. </p></li>
57 *
58 * </ul>
59 *
60 * <p> Each of these methods returns a boolean indicating success or failure.
61 * More information about a successful match can be obtained by querying the
62 * state of the matcher.
63 *
64 * <p> A matcher finds matches in a subset of its input called the
65 * <i>region</i>. By default, the region contains all of the matcher's input.
66 * The region can be modified via the {@link #region(int, int) region} method
67 * and queried via the {@link #regionStart() regionStart} and {@link
68 * #regionEnd() regionEnd} methods. The way that the region boundaries interact
69 * with some pattern constructs can be changed. See {@link
70 * #useAnchoringBounds(boolean) useAnchoringBounds} and {@link
71 * #useTransparentBounds(boolean) useTransparentBounds} for more details.
72 *
73 * <p> This class also defines methods for replacing matched subsequences with
74 * new strings whose contents can, if desired, be computed from the match
75 * result. The {@link #appendReplacement appendReplacement} and {@link
76 * #appendTail appendTail} methods can be used in tandem in order to collect
77 * the result into an existing string buffer or string builder. Alternatively,
78 * the more convenient {@link #replaceAll replaceAll} method can be used to
79 * create a string in which every matching subsequence in the input sequence
80 * is replaced.
81 *
82 * <p> The explicit state of a matcher includes the start and end indices of
83 * the most recent successful match. It also includes the start and end
84 * indices of the input subsequence captured by each <a
85 * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total
86 * count of such subsequences. As a convenience, methods are also provided for
87 * returning these captured subsequences in string form.
88 *
89 * <p> The explicit state of a matcher is initially undefined; attempting to
90 * query any part of it before a successful match will cause an {@link
91 * IllegalStateException} to be thrown. The explicit state of a matcher is
92 * recomputed by every match operation.
93 *
94 * <p> The implicit state of a matcher includes the input character sequence as
95 * well as the <i>append position</i>, which is initially zero and is updated
96 * by the {@link #appendReplacement appendReplacement} method.
97 *
98 * <p> A matcher may be reset explicitly by invoking its {@link #reset()}
99 * method or, if a new input sequence is desired, its {@link
100 * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a
101 * matcher discards its explicit state information and sets the append position
102 * to zero.
103 *
104 * <p> Instances of this class are not safe for use by multiple concurrent
105 * threads. </p>
106 *
107 *
108 * @author Mike McCloskey
109 * @author Mark Reinhold
110 * @author JSR-51 Expert Group
111 * @since 1.4
112 * @spec JSR-51
113 */
114
115 public final class Matcher implements MatchResult {
116
117 /**
118 * The Pattern object that created this Matcher.
119 */
120 Pattern parentPattern;
121
122 /**
123 * The storage used by groups. They may contain invalid values if
124 * a group was skipped during the matching.
125 */
126 int[] groups;
127
128 /**
129 * The range within the sequence that is to be matched. Anchors
130 * will match at these "hard" boundaries. Changing the region
131 * changes these values.
132 */
133 int from, to;
134
135 /**
136 * Lookbehind uses this value to ensure that the subexpression
137 * match ends at the point where the lookbehind was encountered.
138 */
139 int lookbehindTo;
140
141 /**
142 * The original string being matched.
143 */
144 CharSequence text;
145
146 /**
147 * Matcher state used by the last node. NOANCHOR is used when a
148 * match does not have to consume all of the input. ENDANCHOR is
149 * the mode used for matching all the input.
150 */
151 static final int ENDANCHOR = 1;
152 static final int NOANCHOR = 0;
153 int acceptMode = NOANCHOR;
154
155 /**
156 * The range of string that last matched the pattern. If the last
157 * match failed then first is -1; last initially holds 0 then it
158 * holds the index of the end of the last match (which is where the
159 * next search starts).
160 */
161 int first = -1, last = 0;
162
163 /**
164 * The end index of what matched in the last match operation.
165 */
166 int oldLast = -1;
167
168 /**
169 * The index of the last position appended in a substitution.
170 */
171 int lastAppendPosition = 0;
172
173 /**
174 * Storage used by nodes to tell what repetition they are on in
175 * a pattern, and where groups begin. The nodes themselves are stateless,
176 * so they rely on this field to hold state during a match.
177 */
178 int[] locals;
179
180 /**
181 * Storage used by top greedy Loop node to store a specific hash set to
182 * keep the beginning index of the failed repetition match. The nodes
183 * themselves are stateless, so they rely on this field to hold state
184 * during a match.
185 */
186 IntHashSet[] localsPos;
187
188 /**
189 * Boolean indicating whether or not more input could change
190 * the results of the last match.
191 *
192 * If hitEnd is true, and a match was found, then more input
193 * might cause a different match to be found.
194 * If hitEnd is true and a match was not found, then more
195 * input could cause a match to be found.
196 * If hitEnd is false and a match was found, then more input
197 * will not change the match.
198 * If hitEnd is false and a match was not found, then more
199 * input will not cause a match to be found.
200 */
201 boolean hitEnd;
202
203 /**
204 * Boolean indicating whether or not more input could change
205 * a positive match into a negative one.
206 *
207 * If requireEnd is true, and a match was found, then more
208 * input could cause the match to be lost.
209 * If requireEnd is false and a match was found, then more
210 * input might change the match but the match won't be lost.
211 * If a match was not found, then requireEnd has no meaning.
212 */
213 boolean requireEnd;
214
215 /**
216 * If transparentBounds is true then the boundaries of this
217 * matcher's region are transparent to lookahead, lookbehind,
218 * and boundary matching constructs that try to see beyond them.
219 */
220 boolean transparentBounds = false;
221
222 /**
223 * If anchoringBounds is true then the boundaries of this
224 * matcher's region match anchors such as ^ and $.
225 */
226 boolean anchoringBounds = true;
227
228 /**
229 * Number of times this matcher's state has been modified
230 */
231 int modCount;
232
233 /**
234 * No default constructor.
235 */
236 Matcher() {
237 }
238
239 /**
240 * All matchers have the state used by Pattern during a match.
241 */
242 Matcher(Pattern parent, CharSequence text) {
243 this.parentPattern = parent;
244 this.text = text;
245
246 // Allocate state storage
247 int parentGroupCount = Math.max(parent.capturingGroupCount, 10);
248 groups = new int[parentGroupCount * 2];
249 locals = new int[parent.localCount];
250 localsPos = new IntHashSet[parent.localTCNCount];
251
252 // Put fields into initial states
253 reset();
254 }
255
256 /**
257 * Returns the pattern that is interpreted by this matcher.
258 *
259 * @return The pattern for which this matcher was created
260 */
261 public Pattern pattern() {
262 return parentPattern;
263 }
264
265 /**
266 * Returns the match state of this matcher as a {@link MatchResult}.
267 * The result is unaffected by subsequent operations performed upon this
268 * matcher.
269 *
270 * @return a {@code MatchResult} with the state of this matcher
271 * @since 1.5
272 */
273 public MatchResult toMatchResult() {
274 return toMatchResult(text.toString());
275 }
276
277 private MatchResult toMatchResult(String text) {
278 return new ImmutableMatchResult(this.first,
279 this.last,
280 groupCount(),
281 this.groups.clone(),
282 text);
283 }
284
285 private static class ImmutableMatchResult implements MatchResult {
286 private final int first;
287 private final int last;
288 private final int[] groups;
289 private final int groupCount;
290 private final String text;
291
292 ImmutableMatchResult(int first, int last, int groupCount,
293 int groups[], String text)
294 {
295 this.first = first;
296 this.last = last;
297 this.groupCount = groupCount;
298 this.groups = groups;
299 this.text = text;
300 }
301
302 @Override
303 public int start() {
304 checkMatch();
305 return first;
306 }
307
308 @Override
309 public int start(int group) {
310 checkMatch();
311 if (group < 0 || group > groupCount)
312 throw new IndexOutOfBoundsException("No group " + group);
313 return groups[group * 2];
314 }
315
316 @Override
317 public int end() {
318 checkMatch();
319 return last;
320 }
321
322 @Override
323 public int end(int group) {
324 checkMatch();
325 if (group < 0 || group > groupCount)
326 throw new IndexOutOfBoundsException("No group " + group);
327 return groups[group * 2 + 1];
328 }
329
330 @Override
331 public int groupCount() {
332 return groupCount;
333 }
334
335 @Override
336 public String group() {
337 checkMatch();
338 return group(0);
339 }
340
341 @Override
342 public String group(int group) {
343 checkMatch();
344 if (group < 0 || group > groupCount)
345 throw new IndexOutOfBoundsException("No group " + group);
346 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
347 return null;
348 return text.subSequence(groups[group * 2], groups[group * 2 + 1]).toString();
349 }
350
351 private void checkMatch() {
352 if (first < 0)
353 throw new IllegalStateException("No match found");
354
355 }
356 }
357
358 /**
359 * Changes the {@code Pattern} that this {@code Matcher} uses to
360 * find matches with.
361 *
362 * <p> This method causes this matcher to lose information
363 * about the groups of the last match that occurred. The
364 * matcher's position in the input is maintained and its
365 * last append position is unaffected.</p>
366 *
367 * @param newPattern
368 * The new pattern used by this matcher
369 * @return This matcher
370 * @throws IllegalArgumentException
371 * If newPattern is {@code null}
372 * @since 1.5
373 */
374 public Matcher usePattern(Pattern newPattern) {
375 if (newPattern == null)
376 throw new IllegalArgumentException("Pattern cannot be null");
377 parentPattern = newPattern;
378
379 // Reallocate state storage
380 int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10);
381 groups = new int[parentGroupCount * 2];
382 locals = new int[newPattern.localCount];
383 for (int i = 0; i < groups.length; i++)
384 groups[i] = -1;
385 for (int i = 0; i < locals.length; i++)
386 locals[i] = -1;
387 localsPos = new IntHashSet[parentPattern.localTCNCount];
388 modCount++;
389 return this;
390 }
391
392 /**
393 * Resets this matcher.
394 *
395 * <p> Resetting a matcher discards all of its explicit state information
396 * and sets its append position to zero. The matcher's region is set to the
397 * default region, which is its entire character sequence. The anchoring
398 * and transparency of this matcher's region boundaries are unaffected.
399 *
400 * @return This matcher
401 */
402 public Matcher reset() {
403 first = -1;
404 last = 0;
405 oldLast = -1;
406 for(int i=0; i<groups.length; i++)
407 groups[i] = -1;
408 for(int i=0; i<locals.length; i++)
409 locals[i] = -1;
410 for (int i = 0; i < localsPos.length; i++) {
411 if (localsPos[i] != null)
412 localsPos[i].clear();
413 }
414 lastAppendPosition = 0;
415 from = 0;
416 to = getTextLength();
417 modCount++;
418 return this;
419 }
420
421 /**
422 * Resets this matcher with a new input sequence.
423 *
424 * <p> Resetting a matcher discards all of its explicit state information
425 * and sets its append position to zero. The matcher's region is set to
426 * the default region, which is its entire character sequence. The
427 * anchoring and transparency of this matcher's region boundaries are
428 * unaffected.
429 *
430 * @param input
431 * The new input character sequence
432 *
433 * @return This matcher
434 */
435 public Matcher reset(CharSequence input) {
436 text = input;
437 return reset();
438 }
439
440 /**
441 * Returns the start index of the previous match.
442 *
443 * @return The index of the first character matched
444 *
445 * @throws IllegalStateException
446 * If no match has yet been attempted,
447 * or if the previous match operation failed
448 */
449 public int start() {
450 if (first < 0)
451 throw new IllegalStateException("No match available");
452 return first;
453 }
454
455 /**
456 * Returns the start index of the subsequence captured by the given group
457 * during the previous match operation.
458 *
459 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
460 * to right, starting at one. Group zero denotes the entire pattern, so
461 * the expression <i>m.</i>{@code start(0)} is equivalent to
462 * <i>m.</i>{@code start()}. </p>
463 *
464 * @param group
465 * The index of a capturing group in this matcher's pattern
466 *
467 * @return The index of the first character captured by the group,
468 * or {@code -1} if the match was successful but the group
469 * itself did not match anything
470 *
471 * @throws IllegalStateException
472 * If no match has yet been attempted,
473 * or if the previous match operation failed
474 *
475 * @throws IndexOutOfBoundsException
476 * If there is no capturing group in the pattern
477 * with the given index
478 */
479 public int start(int group) {
480 if (first < 0)
481 throw new IllegalStateException("No match available");
482 if (group < 0 || group > groupCount())
483 throw new IndexOutOfBoundsException("No group " + group);
484 return groups[group * 2];
485 }
486
487 /**
488 * Returns the start index of the subsequence captured by the given
489 * <a href="Pattern.html#groupname">named-capturing group</a> during the
490 * previous match operation.
491 *
492 * @param name
493 * The name of a named-capturing group in this matcher's pattern
494 *
495 * @return The index of the first character captured by the group,
496 * or {@code -1} if the match was successful but the group
497 * itself did not match anything
498 *
499 * @throws IllegalStateException
500 * If no match has yet been attempted,
501 * or if the previous match operation failed
502 *
503 * @throws IllegalArgumentException
504 * If there is no capturing group in the pattern
505 * with the given name
506 * @since 1.8
507 */
508 public int start(String name) {
509 return groups[getMatchedGroupIndex(name) * 2];
510 }
511
512 /**
513 * Returns the offset after the last character matched.
514 *
515 * @return The offset after the last character matched
516 *
517 * @throws IllegalStateException
518 * If no match has yet been attempted,
519 * or if the previous match operation failed
520 */
521 public int end() {
522 if (first < 0)
523 throw new IllegalStateException("No match available");
524 return last;
525 }
526
527 /**
528 * Returns the offset after the last character of the subsequence
529 * captured by the given group during the previous match operation.
530 *
531 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
532 * to right, starting at one. Group zero denotes the entire pattern, so
533 * the expression <i>m.</i>{@code end(0)} is equivalent to
534 * <i>m.</i>{@code end()}. </p>
535 *
536 * @param group
537 * The index of a capturing group in this matcher's pattern
538 *
539 * @return The offset after the last character captured by the group,
540 * or {@code -1} if the match was successful
541 * but the group itself did not match anything
542 *
543 * @throws IllegalStateException
544 * If no match has yet been attempted,
545 * or if the previous match operation failed
546 *
547 * @throws IndexOutOfBoundsException
548 * If there is no capturing group in the pattern
549 * with the given index
550 */
551 public int end(int group) {
552 if (first < 0)
553 throw new IllegalStateException("No match available");
554 if (group < 0 || group > groupCount())
555 throw new IndexOutOfBoundsException("No group " + group);
556 return groups[group * 2 + 1];
557 }
558
559 /**
560 * Returns the offset after the last character of the subsequence
561 * captured by the given <a href="Pattern.html#groupname">named-capturing
562 * group</a> during the previous match operation.
563 *
564 * @param name
565 * The name of a named-capturing group in this matcher's pattern
566 *
567 * @return The offset after the last character captured by the group,
568 * or {@code -1} if the match was successful
569 * but the group itself did not match anything
570 *
571 * @throws IllegalStateException
572 * If no match has yet been attempted,
573 * or if the previous match operation failed
574 *
575 * @throws IllegalArgumentException
576 * If there is no capturing group in the pattern
577 * with the given name
578 * @since 1.8
579 */
580 public int end(String name) {
581 return groups[getMatchedGroupIndex(name) * 2 + 1];
582 }
583
584 /**
585 * Returns the input subsequence matched by the previous match.
586 *
587 * <p> For a matcher <i>m</i> with input sequence <i>s</i>,
588 * the expressions <i>m.</i>{@code group()} and
589 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(),} <i>m.</i>
590 * {@code end())} are equivalent. </p>
591 *
592 * <p> Note that some patterns, for example {@code a*}, match the empty
593 * string. This method will return the empty string when the pattern
594 * successfully matches the empty string in the input. </p>
595 *
596 * @return The (possibly empty) subsequence matched by the previous match,
597 * in string form
598 *
599 * @throws IllegalStateException
600 * If no match has yet been attempted,
601 * or if the previous match operation failed
602 */
603 public String group() {
604 return group(0);
605 }
606
607 /**
608 * Returns the input subsequence captured by the given group during the
609 * previous match operation.
610 *
611 * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index
612 * <i>g</i>, the expressions <i>m.</i>{@code group(}<i>g</i>{@code )} and
613 * <i>s.</i>{@code substring(}<i>m.</i>{@code start(}<i>g</i>{@code
614 * ),} <i>m.</i>{@code end(}<i>g</i>{@code ))}
615 * are equivalent. </p>
616 *
617 * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left
618 * to right, starting at one. Group zero denotes the entire pattern, so
619 * the expression {@code m.group(0)} is equivalent to {@code m.group()}.
620 * </p>
621 *
622 * <p> If the match was successful but the group specified failed to match
623 * any part of the input sequence, then {@code null} is returned. Note
624 * that some groups, for example {@code (a*)}, match the empty string.
625 * This method will return the empty string when such a group successfully
626 * matches the empty string in the input. </p>
627 *
628 * @param group
629 * The index of a capturing group in this matcher's pattern
630 *
631 * @return The (possibly empty) subsequence captured by the group
632 * during the previous match, or {@code null} if the group
633 * failed to match part of the input
634 *
635 * @throws IllegalStateException
636 * If no match has yet been attempted,
637 * or if the previous match operation failed
638 *
639 * @throws IndexOutOfBoundsException
640 * If there is no capturing group in the pattern
641 * with the given index
642 */
643 public String group(int group) {
644 if (first < 0)
645 throw new IllegalStateException("No match found");
646 if (group < 0 || group > groupCount())
647 throw new IndexOutOfBoundsException("No group " + group);
648 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
649 return null;
650 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
651 }
652
653 /**
654 * Returns the input subsequence captured by the given
655 * <a href="Pattern.html#groupname">named-capturing group</a> during the
656 * previous match operation.
657 *
658 * <p> If the match was successful but the group specified failed to match
659 * any part of the input sequence, then {@code null} is returned. Note
660 * that some groups, for example {@code (a*)}, match the empty string.
661 * This method will return the empty string when such a group successfully
662 * matches the empty string in the input. </p>
663 *
664 * @param name
665 * The name of a named-capturing group in this matcher's pattern
666 *
667 * @return The (possibly empty) subsequence captured by the named group
668 * during the previous match, or {@code null} if the group
669 * failed to match part of the input
670 *
671 * @throws IllegalStateException
672 * If no match has yet been attempted,
673 * or if the previous match operation failed
674 *
675 * @throws IllegalArgumentException
676 * If there is no capturing group in the pattern
677 * with the given name
678 * @since 1.7
679 */
680 public String group(String name) {
681 int group = getMatchedGroupIndex(name);
682 if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
683 return null;
684 return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
685 }
686
687 /**
688 * Returns the number of capturing groups in this matcher's pattern.
689 *
690 * <p> Group zero denotes the entire pattern by convention. It is not
691 * included in this count.
692 *
693 * <p> Any non-negative integer smaller than or equal to the value
694 * returned by this method is guaranteed to be a valid group index for
695 * this matcher. </p>
696 *
697 * @return The number of capturing groups in this matcher's pattern
698 */
699 public int groupCount() {
700 return parentPattern.capturingGroupCount - 1;
701 }
702
703 /**
704 * Attempts to match the entire region against the pattern.
705 *
706 * <p> If the match succeeds then more information can be obtained via the
707 * {@code start}, {@code end}, and {@code group} methods. </p>
708 *
709 * @return {@code true} if, and only if, the entire region sequence
710 * matches this matcher's pattern
711 */
712 public boolean matches() {
713 return match(from, ENDANCHOR);
714 }
715
716 /**
717 * Attempts to find the next subsequence of the input sequence that matches
718 * the pattern.
719 *
720 * <p> This method starts at the beginning of this matcher's region, or, if
721 * a previous invocation of the method was successful and the matcher has
722 * not since been reset, at the first character not matched by the previous
723 * match.
724 *
725 * <p> If the match succeeds then more information can be obtained via the
726 * {@code start}, {@code end}, and {@code group} methods. </p>
727 *
728 * @return {@code true} if, and only if, a subsequence of the input
729 * sequence matches this matcher's pattern
730 */
731 public boolean find() {
732 int nextSearchIndex = last;
733 if (nextSearchIndex == first)
734 nextSearchIndex++;
735
736 // If next search starts before region, start it at region
737 if (nextSearchIndex < from)
738 nextSearchIndex = from;
739
740 // If next search starts beyond region then it fails
741 if (nextSearchIndex > to) {
742 for (int i = 0; i < groups.length; i++)
743 groups[i] = -1;
744 return false;
745 }
746 return search(nextSearchIndex);
747 }
748
749 /**
750 * Resets this matcher and then attempts to find the next subsequence of
751 * the input sequence that matches the pattern, starting at the specified
752 * index.
753 *
754 * <p> If the match succeeds then more information can be obtained via the
755 * {@code start}, {@code end}, and {@code group} methods, and subsequent
756 * invocations of the {@link #find()} method will start at the first
757 * character not matched by this match. </p>
758 *
759 * @param start the index to start searching for a match
760 * @throws IndexOutOfBoundsException
761 * If start is less than zero or if start is greater than the
762 * length of the input sequence.
763 *
764 * @return {@code true} if, and only if, a subsequence of the input
765 * sequence starting at the given index matches this matcher's
766 * pattern
767 */
768 public boolean find(int start) {
769 int limit = getTextLength();
770 if ((start < 0) || (start > limit))
771 throw new IndexOutOfBoundsException("Illegal start index");
772 reset();
773 return search(start);
774 }
775
776 /**
777 * Attempts to match the input sequence, starting at the beginning of the
778 * region, against the pattern.
779 *
780 * <p> Like the {@link #matches matches} method, this method always starts
781 * at the beginning of the region; unlike that method, it does not
782 * require that the entire region be matched.
783 *
784 * <p> If the match succeeds then more information can be obtained via the
785 * {@code start}, {@code end}, and {@code group} methods. </p>
786 *
787 * @return {@code true} if, and only if, a prefix of the input
788 * sequence matches this matcher's pattern
789 */
790 public boolean lookingAt() {
791 return match(from, NOANCHOR);
792 }
793
794 /**
795 * Returns a literal replacement {@code String} for the specified
796 * {@code String}.
797 *
798 * This method produces a {@code String} that will work
799 * as a literal replacement {@code s} in the
800 * {@code appendReplacement} method of the {@link Matcher} class.
801 * The {@code String} produced will match the sequence of characters
802 * in {@code s} treated as a literal sequence. Slashes ('\') and
803 * dollar signs ('$') will be given no special meaning.
804 *
805 * @param s The string to be literalized
806 * @return A literal string replacement
807 * @since 1.5
808 */
809 public static String quoteReplacement(String s) {
810 if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1))
811 return s;
812 StringBuilder sb = new StringBuilder();
813 for (int i=0; i<s.length(); i++) {
814 char c = s.charAt(i);
815 if (c == '\\' || c == '$') {
816 sb.append('\\');
817 }
818 sb.append(c);
819 }
820 return sb.toString();
821 }
822
823 /**
824 * Implements a non-terminal append-and-replace step.
825 *
826 * <p> This method performs the following actions: </p>
827 *
828 * <ol>
829 *
830 * <li><p> It reads characters from the input sequence, starting at the
831 * append position, and appends them to the given string buffer. It
832 * stops after reading the last character preceding the previous match,
833 * that is, the character at index {@link
834 * #start()} {@code -} {@code 1}. </p></li>
835 *
836 * <li><p> It appends the given replacement string to the string buffer.
837 * </p></li>
838 *
839 * <li><p> It sets the append position of this matcher to the index of
840 * the last character matched, plus one, that is, to {@link #end()}.
841 * </p></li>
842 *
843 * </ol>
844 *
845 * <p> The replacement string may contain references to subsequences
846 * captured during the previous match: Each occurrence of
847 * <code>${</code><i>name</i><code>}</code> or {@code $}<i>g</i>
848 * will be replaced by the result of evaluating the corresponding
849 * {@link #group(String) group(name)} or {@link #group(int) group(g)}
850 * respectively. For {@code $}<i>g</i>,
851 * the first number after the {@code $} is always treated as part of
852 * the group reference. Subsequent numbers are incorporated into g if
853 * they would form a legal group reference. Only the numerals '0'
854 * through '9' are considered as potential components of the group
855 * reference. If the second group matched the string {@code "foo"}, for
856 * example, then passing the replacement string {@code "$2bar"} would
857 * cause {@code "foobar"} to be appended to the string buffer. A dollar
858 * sign ({@code $}) may be included as a literal in the replacement
859 * string by preceding it with a backslash ({@code \$}).
860 *
861 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
862 * the replacement string may cause the results to be different than if it
863 * were being treated as a literal replacement string. Dollar signs may be
864 * treated as references to captured subsequences as described above, and
865 * backslashes are used to escape literal characters in the replacement
866 * string.
867 *
868 * <p> This method is intended to be used in a loop together with the
869 * {@link #appendTail(StringBuffer) appendTail} and {@link #find() find}
870 * methods. The following code, for example, writes {@code one dog two dogs
871 * in the yard} to the standard-output stream: </p>
872 *
873 * <blockquote><pre>
874 * Pattern p = Pattern.compile("cat");
875 * Matcher m = p.matcher("one cat two cats in the yard");
876 * StringBuffer sb = new StringBuffer();
877 * while (m.find()) {
878 * m.appendReplacement(sb, "dog");
879 * }
880 * m.appendTail(sb);
881 * System.out.println(sb.toString());</pre></blockquote>
882 *
883 * @param sb
884 * The target string buffer
885 *
886 * @param replacement
887 * The replacement string
888 *
889 * @return This matcher
890 *
891 * @throws IllegalStateException
892 * If no match has yet been attempted,
893 * or if the previous match operation failed
894 *
895 * @throws IllegalArgumentException
896 * If the replacement string refers to a named-capturing
897 * group that does not exist in the pattern
898 *
899 * @throws IndexOutOfBoundsException
900 * If the replacement string refers to a capturing group
901 * that does not exist in the pattern
902 */
903 public Matcher appendReplacement(StringBuffer sb, String replacement) {
904 // If no match, return error
905 if (first < 0)
906 throw new IllegalStateException("No match available");
907 StringBuilder result = new StringBuilder();
908 appendExpandedReplacement(replacement, result);
909 // Append the intervening text
910 sb.append(text, lastAppendPosition, first);
911 // Append the match substitution
912 sb.append(result);
913 lastAppendPosition = last;
914 modCount++;
915 return this;
916 }
917
918 /**
919 * Implements a non-terminal append-and-replace step.
920 *
921 * <p> This method performs the following actions: </p>
922 *
923 * <ol>
924 *
925 * <li><p> It reads characters from the input sequence, starting at the
926 * append position, and appends them to the given string builder. It
927 * stops after reading the last character preceding the previous match,
928 * that is, the character at index {@link
929 * #start()} {@code -} {@code 1}. </p></li>
930 *
931 * <li><p> It appends the given replacement string to the string builder.
932 * </p></li>
933 *
934 * <li><p> It sets the append position of this matcher to the index of
935 * the last character matched, plus one, that is, to {@link #end()}.
936 * </p></li>
937 *
938 * </ol>
939 *
940 * <p> The replacement string may contain references to subsequences
941 * captured during the previous match: Each occurrence of
942 * {@code $}<i>g</i> will be replaced by the result of
943 * evaluating {@link #group(int) group}{@code (}<i>g</i>{@code )}.
944 * The first number after the {@code $} is always treated as part of
945 * the group reference. Subsequent numbers are incorporated into g if
946 * they would form a legal group reference. Only the numerals '0'
947 * through '9' are considered as potential components of the group
948 * reference. If the second group matched the string {@code "foo"}, for
949 * example, then passing the replacement string {@code "$2bar"} would
950 * cause {@code "foobar"} to be appended to the string builder. A dollar
951 * sign ({@code $}) may be included as a literal in the replacement
952 * string by preceding it with a backslash ({@code \$}).
953 *
954 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
955 * the replacement string may cause the results to be different than if it
956 * were being treated as a literal replacement string. Dollar signs may be
957 * treated as references to captured subsequences as described above, and
958 * backslashes are used to escape literal characters in the replacement
959 * string.
960 *
961 * <p> This method is intended to be used in a loop together with the
962 * {@link #appendTail(StringBuilder) appendTail} and
963 * {@link #find() find} methods. The following code, for example, writes
964 * {@code one dog two dogs in the yard} to the standard-output stream: </p>
965 *
966 * <blockquote><pre>
967 * Pattern p = Pattern.compile("cat");
968 * Matcher m = p.matcher("one cat two cats in the yard");
969 * StringBuilder sb = new StringBuilder();
970 * while (m.find()) {
971 * m.appendReplacement(sb, "dog");
972 * }
973 * m.appendTail(sb);
974 * System.out.println(sb.toString());</pre></blockquote>
975 *
976 * @param sb
977 * The target string builder
978 * @param replacement
979 * The replacement string
980 * @return This matcher
981 *
982 * @throws IllegalStateException
983 * If no match has yet been attempted,
984 * or if the previous match operation failed
985 * @throws IllegalArgumentException
986 * If the replacement string refers to a named-capturing
987 * group that does not exist in the pattern
988 * @throws IndexOutOfBoundsException
989 * If the replacement string refers to a capturing group
990 * that does not exist in the pattern
991 * @since 9
992 */
993 public Matcher appendReplacement(StringBuilder sb, String replacement) {
994 // If no match, return error
995 if (first < 0)
996 throw new IllegalStateException("No match available");
997 StringBuilder result = new StringBuilder();
998 appendExpandedReplacement(replacement, result);
999 // Append the intervening text
1000 sb.append(text, lastAppendPosition, first);
1001 // Append the match substitution
1002 sb.append(result);
1003 lastAppendPosition = last;
1004 modCount++;
1005 return this;
1006 }
1007
1008 /**
1009 * Processes replacement string to replace group references with
1010 * groups.
1011 */
1012 private StringBuilder appendExpandedReplacement(
1013 String replacement, StringBuilder result) {
1014 int cursor = 0;
1015 while (cursor < replacement.length()) {
1016 char nextChar = replacement.charAt(cursor);
1017 if (nextChar == '\\') {
1018 cursor++;
1019 if (cursor == replacement.length())
1020 throw new IllegalArgumentException(
1021 "character to be escaped is missing");
1022 nextChar = replacement.charAt(cursor);
1023 result.append(nextChar);
1024 cursor++;
1025 } else if (nextChar == '$') {
1026 // Skip past $
1027 cursor++;
1028 // Throw IAE if this "$" is the last character in replacement
1029 if (cursor == replacement.length())
1030 throw new IllegalArgumentException(
1031 "Illegal group reference: group index is missing");
1032 nextChar = replacement.charAt(cursor);
1033 int refNum = -1;
1034 if (nextChar == '{') {
1035 cursor++;
1036 StringBuilder gsb = new StringBuilder();
1037 while (cursor < replacement.length()) {
1038 nextChar = replacement.charAt(cursor);
1039 if (ASCII.isLower(nextChar) ||
1040 ASCII.isUpper(nextChar) ||
1041 ASCII.isDigit(nextChar)) {
1042 gsb.append(nextChar);
1043 cursor++;
1044 } else {
1045 break;
1046 }
1047 }
1048 if (gsb.length() == 0)
1049 throw new IllegalArgumentException(
1050 "named capturing group has 0 length name");
1051 if (nextChar != '}')
1052 throw new IllegalArgumentException(
1053 "named capturing group is missing trailing '}'");
1054 String gname = gsb.toString();
1055 if (ASCII.isDigit(gname.charAt(0)))
1056 throw new IllegalArgumentException(
1057 "capturing group name {" + gname +
1058 "} starts with digit character");
1059 if (!parentPattern.namedGroups().containsKey(gname))
1060 throw new IllegalArgumentException(
1061 "No group with name {" + gname + "}");
1062 refNum = parentPattern.namedGroups().get(gname);
1063 cursor++;
1064 } else {
1065 // The first number is always a group
1066 refNum = nextChar - '0';
1067 if ((refNum < 0) || (refNum > 9))
1068 throw new IllegalArgumentException(
1069 "Illegal group reference");
1070 cursor++;
1071 // Capture the largest legal group string
1072 boolean done = false;
1073 while (!done) {
1074 if (cursor >= replacement.length()) {
1075 break;
1076 }
1077 int nextDigit = replacement.charAt(cursor) - '0';
1078 if ((nextDigit < 0) || (nextDigit > 9)) { // not a number
1079 break;
1080 }
1081 int newRefNum = (refNum * 10) + nextDigit;
1082 if (groupCount() < newRefNum) {
1083 done = true;
1084 } else {
1085 refNum = newRefNum;
1086 cursor++;
1087 }
1088 }
1089 }
1090 // Append group
1091 if (start(refNum) != -1 && end(refNum) != -1)
1092 result.append(text, start(refNum), end(refNum));
1093 } else {
1094 result.append(nextChar);
1095 cursor++;
1096 }
1097 }
1098 return result;
1099 }
1100
1101 /**
1102 * Implements a terminal append-and-replace step.
1103 *
1104 * <p> This method reads characters from the input sequence, starting at
1105 * the append position, and appends them to the given string buffer. It is
1106 * intended to be invoked after one or more invocations of the {@link
1107 * #appendReplacement(StringBuffer, String) appendReplacement} method in
1108 * order to copy the remainder of the input sequence. </p>
1109 *
1110 * @param sb
1111 * The target string buffer
1112 *
1113 * @return The target string buffer
1114 */
1115 public StringBuffer appendTail(StringBuffer sb) {
1116 sb.append(text, lastAppendPosition, getTextLength());
1117 return sb;
1118 }
1119
1120 /**
1121 * Implements a terminal append-and-replace step.
1122 *
1123 * <p> This method reads characters from the input sequence, starting at
1124 * the append position, and appends them to the given string builder. It is
1125 * intended to be invoked after one or more invocations of the {@link
1126 * #appendReplacement(StringBuilder, String)
1127 * appendReplacement} method in order to copy the remainder of the input
1128 * sequence. </p>
1129 *
1130 * @param sb
1131 * The target string builder
1132 *
1133 * @return The target string builder
1134 *
1135 * @since 9
1136 */
1137 public StringBuilder appendTail(StringBuilder sb) {
1138 sb.append(text, lastAppendPosition, getTextLength());
1139 return sb;
1140 }
1141
1142 /**
1143 * Replaces every subsequence of the input sequence that matches the
1144 * pattern with the given replacement string.
1145 *
1146 * <p> This method first resets this matcher. It then scans the input
1147 * sequence looking for matches of the pattern. Characters that are not
1148 * part of any match are appended directly to the result string; each match
1149 * is replaced in the result by the replacement string. The replacement
1150 * string may contain references to captured subsequences as in the {@link
1151 * #appendReplacement appendReplacement} method.
1152 *
1153 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1154 * the replacement string may cause the results to be different than if it
1155 * were being treated as a literal replacement string. Dollar signs may be
1156 * treated as references to captured subsequences as described above, and
1157 * backslashes are used to escape literal characters in the replacement
1158 * string.
1159 *
1160 * <p> Given the regular expression {@code a*b}, the input
1161 * {@code "aabfooaabfooabfoob"}, and the replacement string
1162 * {@code "-"}, an invocation of this method on a matcher for that
1163 * expression would yield the string {@code "-foo-foo-foo-"}.
1164 *
1165 * <p> Invoking this method changes this matcher's state. If the matcher
1166 * is to be used in further matching operations then it should first be
1167 * reset. </p>
1168 *
1169 * @param replacement
1170 * The replacement string
1171 *
1172 * @return The string constructed by replacing each matching subsequence
1173 * by the replacement string, substituting captured subsequences
1174 * as needed
1175 */
1176 public String replaceAll(String replacement) {
1177 reset();
1178 boolean result = find();
1179 if (result) {
1180 StringBuilder sb = new StringBuilder();
1181 do {
1182 appendReplacement(sb, replacement);
1183 result = find();
1184 } while (result);
1185 appendTail(sb);
1186 return sb.toString();
1187 }
1188 return text.toString();
1189 }
1190
1191 /**
1192 * Replaces every subsequence of the input sequence that matches the
1193 * pattern with the result of applying the given replacer function to the
1194 * match result of this matcher corresponding to that subsequence.
1195 * Exceptions thrown by the function are relayed to the caller.
1196 *
1197 * <p> This method first resets this matcher. It then scans the input
1198 * sequence looking for matches of the pattern. Characters that are not
1199 * part of any match are appended directly to the result string; each match
1200 * is replaced in the result by the applying the replacer function that
1201 * returns a replacement string. Each replacement string may contain
1202 * references to captured subsequences as in the {@link #appendReplacement
1203 * appendReplacement} method.
1204 *
1205 * <p> Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1206 * a replacement string may cause the results to be different than if it
1207 * were being treated as a literal replacement string. Dollar signs may be
1208 * treated as references to captured subsequences as described above, and
1209 * backslashes are used to escape literal characters in the replacement
1210 * string.
1211 *
1212 * <p> Given the regular expression {@code dog}, the input
1213 * {@code "zzzdogzzzdogzzz"}, and the function
1214 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1215 * a matcher for that expression would yield the string
1216 * {@code "zzzDOGzzzDOGzzz"}.
1217 *
1218 * <p> Invoking this method changes this matcher's state. If the matcher
1219 * is to be used in further matching operations then it should first be
1220 * reset. </p>
1221 *
1222 * <p> The replacer function should not modify this matcher's state during
1223 * replacement. This method will, on a best-effort basis, throw a
1224 * {@link java.util.ConcurrentModificationException} if such modification is
1225 * detected.
1226 *
1227 * <p> The state of each match result passed to the replacer function is
1228 * guaranteed to be constant only for the duration of the replacer function
1229 * call and only if the replacer function does not modify this matcher's
1230 * state.
1231 *
1232 * @implNote
1233 * This implementation applies the replacer function to this matcher, which
1234 * is an instance of {@code MatchResult}.
1235 *
1236 * @param replacer
1237 * The function to be applied to the match result of this matcher
1238 * that returns a replacement string.
1239 * @return The string constructed by replacing each matching subsequence
1240 * with the result of applying the replacer function to that
1241 * matched subsequence, substituting captured subsequences as
1242 * needed.
1243 * @throws NullPointerException if the replacer function is null
1244 * @throws ConcurrentModificationException if it is detected, on a
1245 * best-effort basis, that the replacer function modified this
1246 * matcher's state
1247 * @since 9
1248 */
1249 public String replaceAll(Function<MatchResult, String> replacer) {
1250 Objects.requireNonNull(replacer);
1251 reset();
1252 boolean result = find();
1253 if (result) {
1254 StringBuilder sb = new StringBuilder();
1255 do {
1256 int ec = modCount;
1257 String replacement = replacer.apply(this);
1258 if (ec != modCount)
1259 throw new ConcurrentModificationException();
1260 appendReplacement(sb, replacement);
1261 result = find();
1262 } while (result);
1263 appendTail(sb);
1264 return sb.toString();
1265 }
1266 return text.toString();
1267 }
1268
1269 /**
1270 * Returns a stream of match results for each subsequence of the input
1271 * sequence that matches the pattern. The match results occur in the
1272 * same order as the matching subsequences in the input sequence.
1273 *
1274 * <p> Each match result is produced as if by {@link #toMatchResult()}.
1275 *
1276 * <p> This method does not reset this matcher. Matching starts on
1277 * initiation of the terminal stream operation either at the beginning of
1278 * this matcher's region, or, if the matcher has not since been reset, at
1279 * the first character not matched by a previous match.
1280 *
1281 * <p> If the matcher is to be used for further matching operations after
1282 * the terminal stream operation completes then it should be first reset.
1283 *
1284 * <p> This matcher's state should not be modified during execution of the
1285 * returned stream's pipeline. The returned stream's source
1286 * {@code Spliterator} is <em>fail-fast</em> and will, on a best-effort
1287 * basis, throw a {@link java.util.ConcurrentModificationException} if such
1288 * modification is detected.
1289 *
1290 * @return a sequential stream of match results.
1291 * @since 9
1292 */
1293 public Stream<MatchResult> results() {
1294 class MatchResultIterator implements Iterator<MatchResult> {
1295 // -ve for call to find, 0 for not found, 1 for found
1296 int state = -1;
1297 // State for concurrent modification checking
1298 // -1 for uninitialized
1299 int expectedCount = -1;
1300 // The input sequence as a string, set once only after first find
1301 // Avoids repeated conversion from CharSequence for each match
1302 String textAsString;
1303
1304 @Override
1305 public MatchResult next() {
1306 if (expectedCount >= 0 && expectedCount != modCount)
1307 throw new ConcurrentModificationException();
1308
1309 if (!hasNext())
1310 throw new NoSuchElementException();
1311
1312 state = -1;
1313 return toMatchResult(textAsString);
1314 }
1315
1316 @Override
1317 public boolean hasNext() {
1318 if (state >= 0)
1319 return state == 1;
1320
1321 // Defer throwing ConcurrentModificationException to when next
1322 // or forEachRemaining is called. The is consistent with other
1323 // fail-fast implementations.
1324 if (expectedCount >= 0 && expectedCount != modCount)
1325 return true;
1326
1327 boolean found = find();
1328 // Capture the input sequence as a string on first find
1329 if (found && state < 0)
1330 textAsString = text.toString();
1331 state = found ? 1 : 0;
1332 expectedCount = modCount;
1333 return found;
1334 }
1335
1336 @Override
1337 public void forEachRemaining(Consumer<? super MatchResult> action) {
1338 if (expectedCount >= 0 && expectedCount != modCount)
1339 throw new ConcurrentModificationException();
1340
1341 int s = state;
1342 if (s == 0)
1343 return;
1344
1345 // Set state to report no more elements on further operations
1346 state = 0;
1347 expectedCount = -1;
1348
1349 // Perform a first find if required
1350 if (s < 0 && !find())
1351 return;
1352
1353 // Capture the input sequence as a string on first find
1354 textAsString = text.toString();
1355
1356 do {
1357 int ec = modCount;
1358 action.accept(toMatchResult(textAsString));
1359 if (ec != modCount)
1360 throw new ConcurrentModificationException();
1361 } while (find());
1362 }
1363 }
1364 return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
1365 new MatchResultIterator(), Spliterator.ORDERED | Spliterator.NONNULL), false);
1366 }
1367
1368 /**
1369 * Replaces the first subsequence of the input sequence that matches the
1370 * pattern with the given replacement string.
1371 *
1372 * <p> This method first resets this matcher. It then scans the input
1373 * sequence looking for a match of the pattern. Characters that are not
1374 * part of the match are appended directly to the result string; the match
1375 * is replaced in the result by the replacement string. The replacement
1376 * string may contain references to captured subsequences as in the {@link
1377 * #appendReplacement appendReplacement} method.
1378 *
1379 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1380 * the replacement string may cause the results to be different than if it
1381 * were being treated as a literal replacement string. Dollar signs may be
1382 * treated as references to captured subsequences as described above, and
1383 * backslashes are used to escape literal characters in the replacement
1384 * string.
1385 *
1386 * <p> Given the regular expression {@code dog}, the input
1387 * {@code "zzzdogzzzdogzzz"}, and the replacement string
1388 * {@code "cat"}, an invocation of this method on a matcher for that
1389 * expression would yield the string {@code "zzzcatzzzdogzzz"}. </p>
1390 *
1391 * <p> Invoking this method changes this matcher's state. If the matcher
1392 * is to be used in further matching operations then it should first be
1393 * reset. </p>
1394 *
1395 * @param replacement
1396 * The replacement string
1397 * @return The string constructed by replacing the first matching
1398 * subsequence by the replacement string, substituting captured
1399 * subsequences as needed
1400 */
1401 public String replaceFirst(String replacement) {
1402 if (replacement == null)
1403 throw new NullPointerException("replacement");
1404 reset();
1405 if (!find())
1406 return text.toString();
1407 StringBuilder sb = new StringBuilder();
1408 appendReplacement(sb, replacement);
1409 appendTail(sb);
1410 return sb.toString();
1411 }
1412
1413 /**
1414 * Replaces the first subsequence of the input sequence that matches the
1415 * pattern with the result of applying the given replacer function to the
1416 * match result of this matcher corresponding to that subsequence.
1417 * Exceptions thrown by the replace function are relayed to the caller.
1418 *
1419 * <p> This method first resets this matcher. It then scans the input
1420 * sequence looking for a match of the pattern. Characters that are not
1421 * part of the match are appended directly to the result string; the match
1422 * is replaced in the result by the applying the replacer function that
1423 * returns a replacement string. The replacement string may contain
1424 * references to captured subsequences as in the {@link #appendReplacement
1425 * appendReplacement} method.
1426 *
1427 * <p>Note that backslashes ({@code \}) and dollar signs ({@code $}) in
1428 * the replacement string may cause the results to be different than if it
1429 * were being treated as a literal replacement string. Dollar signs may be
1430 * treated as references to captured subsequences as described above, and
1431 * backslashes are used to escape literal characters in the replacement
1432 * string.
1433 *
1434 * <p> Given the regular expression {@code dog}, the input
1435 * {@code "zzzdogzzzdogzzz"}, and the function
1436 * {@code mr -> mr.group().toUpperCase()}, an invocation of this method on
1437 * a matcher for that expression would yield the string
1438 * {@code "zzzDOGzzzdogzzz"}.
1439 *
1440 * <p> Invoking this method changes this matcher's state. If the matcher
1441 * is to be used in further matching operations then it should first be
1442 * reset.
1443 *
1444 * <p> The replacer function should not modify this matcher's state during
1445 * replacement. This method will, on a best-effort basis, throw a
1446 * {@link java.util.ConcurrentModificationException} if such modification is
1447 * detected.
1448 *
1449 * <p> The state of the match result passed to the replacer function is
1450 * guaranteed to be constant only for the duration of the replacer function
1451 * call and only if the replacer function does not modify this matcher's
1452 * state.
1453 *
1454 * @implNote
1455 * This implementation applies the replacer function to this matcher, which
1456 * is an instance of {@code MatchResult}.
1457 *
1458 * @param replacer
1459 * The function to be applied to the match result of this matcher
1460 * that returns a replacement string.
1461 * @return The string constructed by replacing the first matching
1462 * subsequence with the result of applying the replacer function to
1463 * the matched subsequence, substituting captured subsequences as
1464 * needed.
1465 * @throws NullPointerException if the replacer function is null
1466 * @throws ConcurrentModificationException if it is detected, on a
1467 * best-effort basis, that the replacer function modified this
1468 * matcher's state
1469 * @since 9
1470 */
1471 public String replaceFirst(Function<MatchResult, String> replacer) {
1472 Objects.requireNonNull(replacer);
1473 reset();
1474 if (!find())
1475 return text.toString();
1476 StringBuilder sb = new StringBuilder();
1477 int ec = modCount;
1478 String replacement = replacer.apply(this);
1479 if (ec != modCount)
1480 throw new ConcurrentModificationException();
1481 appendReplacement(sb, replacement);
1482 appendTail(sb);
1483 return sb.toString();
1484 }
1485
1486 /**
1487 * Sets the limits of this matcher's region. The region is the part of the
1488 * input sequence that will be searched to find a match. Invoking this
1489 * method resets the matcher, and then sets the region to start at the
1490 * index specified by the {@code start} parameter and end at the
1491 * index specified by the {@code end} parameter.
1492 *
1493 * <p>Depending on the transparency and anchoring being used (see
1494 * {@link #useTransparentBounds(boolean) useTransparentBounds} and
1495 * {@link #useAnchoringBounds(boolean) useAnchoringBounds}), certain
1496 * constructs such as anchors may behave differently at or around the
1497 * boundaries of the region.
1498 *
1499 * @param start
1500 * The index to start searching at (inclusive)
1501 * @param end
1502 * The index to end searching at (exclusive)
1503 * @throws IndexOutOfBoundsException
1504 * If start or end is less than zero, if
1505 * start is greater than the length of the input sequence, if
1506 * end is greater than the length of the input sequence, or if
1507 * start is greater than end.
1508 * @return this matcher
1509 * @since 1.5
1510 */
1511 public Matcher region(int start, int end) {
1512 if ((start < 0) || (start > getTextLength()))
1513 throw new IndexOutOfBoundsException("start");
1514 if ((end < 0) || (end > getTextLength()))
1515 throw new IndexOutOfBoundsException("end");
1516 if (start > end)
1517 throw new IndexOutOfBoundsException("start > end");
1518 reset();
1519 from = start;
1520 to = end;
1521 return this;
1522 }
1523
1524 /**
1525 * Reports the start index of this matcher's region. The
1526 * searches this matcher conducts are limited to finding matches
1527 * within {@link #regionStart() regionStart} (inclusive) and
1528 * {@link #regionEnd() regionEnd} (exclusive).
1529 *
1530 * @return The starting point of this matcher's region
1531 * @since 1.5
1532 */
1533 public int regionStart() {
1534 return from;
1535 }
1536
1537 /**
1538 * Reports the end index (exclusive) of this matcher's region.
1539 * The searches this matcher conducts are limited to finding matches
1540 * within {@link #regionStart() regionStart} (inclusive) and
1541 * {@link #regionEnd() regionEnd} (exclusive).
1542 *
1543 * @return the ending point of this matcher's region
1544 * @since 1.5
1545 */
1546 public int regionEnd() {
1547 return to;
1548 }
1549
1550 /**
1551 * Queries the transparency of region bounds for this matcher.
1552 *
1553 * <p> This method returns {@code true} if this matcher uses
1554 * <i>transparent</i> bounds, {@code false} if it uses <i>opaque</i>
1555 * bounds.
1556 *
1557 * <p> See {@link #useTransparentBounds(boolean) useTransparentBounds} for a
1558 * description of transparent and opaque bounds.
1559 *
1560 * <p> By default, a matcher uses opaque region boundaries.
1561 *
1562 * @return {@code true} iff this matcher is using transparent bounds,
1563 * {@code false} otherwise.
1564 * @see java.util.regex.Matcher#useTransparentBounds(boolean)
1565 * @since 1.5
1566 */
1567 public boolean hasTransparentBounds() {
1568 return transparentBounds;
1569 }
1570
1571 /**
1572 * Sets the transparency of region bounds for this matcher.
1573 *
1574 * <p> Invoking this method with an argument of {@code true} will set this
1575 * matcher to use <i>transparent</i> bounds. If the boolean
1576 * argument is {@code false}, then <i>opaque</i> bounds will be used.
1577 *
1578 * <p> Using transparent bounds, the boundaries of this
1579 * matcher's region are transparent to lookahead, lookbehind,
1580 * and boundary matching constructs. Those constructs can see beyond the
1581 * boundaries of the region to see if a match is appropriate.
1582 *
1583 * <p> Using opaque bounds, the boundaries of this matcher's
1584 * region are opaque to lookahead, lookbehind, and boundary matching
1585 * constructs that may try to see beyond them. Those constructs cannot
1586 * look past the boundaries so they will fail to match anything outside
1587 * of the region.
1588 *
1589 * <p> By default, a matcher uses opaque bounds.
1590 *
1591 * @param b a boolean indicating whether to use opaque or transparent
1592 * regions
1593 * @return this matcher
1594 * @see java.util.regex.Matcher#hasTransparentBounds
1595 * @since 1.5
1596 */
1597 public Matcher useTransparentBounds(boolean b) {
1598 transparentBounds = b;
1599 return this;
1600 }
1601
1602 /**
1603 * Queries the anchoring of region bounds for this matcher.
1604 *
1605 * <p> This method returns {@code true} if this matcher uses
1606 * <i>anchoring</i> bounds, {@code false} otherwise.
1607 *
1608 * <p> See {@link #useAnchoringBounds(boolean) useAnchoringBounds} for a
1609 * description of anchoring bounds.
1610 *
1611 * <p> By default, a matcher uses anchoring region boundaries.
1612 *
1613 * @return {@code true} iff this matcher is using anchoring bounds,
1614 * {@code false} otherwise.
1615 * @see java.util.regex.Matcher#useAnchoringBounds(boolean)
1616 * @since 1.5
1617 */
1618 public boolean hasAnchoringBounds() {
1619 return anchoringBounds;
1620 }
1621
1622 /**
1623 * Sets the anchoring of region bounds for this matcher.
1624 *
1625 * <p> Invoking this method with an argument of {@code true} will set this
1626 * matcher to use <i>anchoring</i> bounds. If the boolean
1627 * argument is {@code false}, then <i>non-anchoring</i> bounds will be
1628 * used.
1629 *
1630 * <p> Using anchoring bounds, the boundaries of this
1631 * matcher's region match anchors such as ^ and $.
1632 *
1633 * <p> Without anchoring bounds, the boundaries of this
1634 * matcher's region will not match anchors such as ^ and $.
1635 *
1636 * <p> By default, a matcher uses anchoring region boundaries.
1637 *
1638 * @param b a boolean indicating whether or not to use anchoring bounds.
1639 * @return this matcher
1640 * @see java.util.regex.Matcher#hasAnchoringBounds
1641 * @since 1.5
1642 */
1643 public Matcher useAnchoringBounds(boolean b) {
1644 anchoringBounds = b;
1645 return this;
1646 }
1647
1648 /**
1649 * <p>Returns the string representation of this matcher. The
1650 * string representation of a {@code Matcher} contains information
1651 * that may be useful for debugging. The exact format is unspecified.
1652 *
1653 * @return The string representation of this matcher
1654 * @since 1.5
1655 */
1656 public String toString() {
1657 StringBuilder sb = new StringBuilder();
1658 sb.append("java.util.regex.Matcher")
1659 .append("[pattern=").append(pattern())
1660 .append(" region=")
1661 .append(regionStart()).append(',').append(regionEnd())
1662 .append(" lastmatch=");
1663 if ((first >= 0) && (group() != null)) {
1664 sb.append(group());
1665 }
1666 sb.append(']');
1667 return sb.toString();
1668 }
1669
1670 /**
1671 * <p>Returns true if the end of input was hit by the search engine in
1672 * the last match operation performed by this matcher.
1673 *
1674 * <p>When this method returns true, then it is possible that more input
1675 * would have changed the result of the last search.
1676 *
1677 * @return true iff the end of input was hit in the last match; false
1678 * otherwise
1679 * @since 1.5
1680 */
1681 public boolean hitEnd() {
1682 return hitEnd;
1683 }
1684
1685 /**
1686 * <p>Returns true if more input could change a positive match into a
1687 * negative one.
1688 *
1689 * <p>If this method returns true, and a match was found, then more
1690 * input could cause the match to be lost. If this method returns false
1691 * and a match was found, then more input might change the match but the
1692 * match won't be lost. If a match was not found, then requireEnd has no
1693 * meaning.
1694 *
1695 * @return true iff more input could change a positive match into a
1696 * negative one.
1697 * @since 1.5
1698 */
1699 public boolean requireEnd() {
1700 return requireEnd;
1701 }
1702
1703 /**
1704 * Initiates a search to find a Pattern within the given bounds.
1705 * The groups are filled with default values and the match of the root
1706 * of the state machine is called. The state machine will hold the state
1707 * of the match as it proceeds in this matcher.
1708 *
1709 * Matcher.from is not set here, because it is the "hard" boundary
1710 * of the start of the search which anchors will set to. The from param
1711 * is the "soft" boundary of the start of the search, meaning that the
1712 * regex tries to match at that index but ^ won't match there. Subsequent
1713 * calls to the search methods start at a new "soft" boundary which is
1714 * the end of the previous match.
1715 */
1716 boolean search(int from) {
1717 this.hitEnd = false;
1718 this.requireEnd = false;
1719 from = from < 0 ? 0 : from;
1720 this.first = from;
1721 this.oldLast = oldLast < 0 ? from : oldLast;
1722 for (int i = 0; i < groups.length; i++)
1723 groups[i] = -1;
1724 for (int i = 0; i < localsPos.length; i++) {
1725 if (localsPos[i] != null)
1726 localsPos[i].clear();
1727 }
1728 acceptMode = NOANCHOR;
1729 boolean result = parentPattern.root.match(this, from, text);
1730 if (!result)
1731 this.first = -1;
1732 this.oldLast = this.last;
1733 this.modCount++;
1734 return result;
1735 }
1736
1737 /**
1738 * Initiates a search for an anchored match to a Pattern within the given
1739 * bounds. The groups are filled with default values and the match of the
1740 * root of the state machine is called. The state machine will hold the
1741 * state of the match as it proceeds in this matcher.
1742 */
1743 boolean match(int from, int anchor) {
1744 this.hitEnd = false;
1745 this.requireEnd = false;
1746 from = from < 0 ? 0 : from;
1747 this.first = from;
1748 this.oldLast = oldLast < 0 ? from : oldLast;
1749 for (int i = 0; i < groups.length; i++)
1750 groups[i] = -1;
1751 for (int i = 0; i < localsPos.length; i++) {
1752 if (localsPos[i] != null)
1753 localsPos[i].clear();
1754 }
1755 acceptMode = anchor;
1756 boolean result = parentPattern.matchRoot.match(this, from, text);
1757 if (!result)
1758 this.first = -1;
1759 this.oldLast = this.last;
1760 this.modCount++;
1761 return result;
1762 }
1763
1764 /**
1765 * Returns the end index of the text.
1766 *
1767 * @return the index after the last character in the text
1768 */
1769 int getTextLength() {
1770 return text.length();
1771 }
1772
1773 /**
1774 * Generates a String from this matcher's input in the specified range.
1775 *
1776 * @param beginIndex the beginning index, inclusive
1777 * @param endIndex the ending index, exclusive
1778 * @return A String generated from this matcher's input
1779 */
1780 CharSequence getSubSequence(int beginIndex, int endIndex) {
1781 return text.subSequence(beginIndex, endIndex);
1782 }
1783
1784 /**
1785 * Returns this matcher's input character at index i.
1786 *
1787 * @return A char from the specified index
1788 */
1789 char charAt(int i) {
1790 return text.charAt(i);
1791 }
1792
1793 /**
1794 * Returns the group index of the matched capturing group.
1795 *
1796 * @return the index of the named-capturing group
1797 */
1798 int getMatchedGroupIndex(String name) {
1799 Objects.requireNonNull(name, "Group name");
1800 if (first < 0)
1801 throw new IllegalStateException("No match found");
1802 if (!parentPattern.namedGroups().containsKey(name))
1803 throw new IllegalArgumentException("No group with name <" + name + ">");
1804 return parentPattern.namedGroups().get(name);
1805 }
1806 }