Monitoring JavaMelody sur /demo

1 /*

2  * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.

3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.

4  *

5  * This code is free software; you can redistribute it and/or modify it

6  * under the terms of the GNU General Public License version 2 only, as

7  * published by the Free Software Foundation.  Oracle designates this

8  * particular file as subject to the "Classpath" exception as provided

9  * by Oracle in the LICENSE file that accompanied this code.

10  *

11  * This code is distributed in the hope that it will be useful, but WITHOUT

12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

14  * version 2 for more details (a copy is included in the LICENSE file that

15  * accompanied this code).

16  *

17  * You should have received a copy of the GNU General Public License version

18  * 2 along with this work; if not, write to the Free Software Foundation,

19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.

20  *

21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA

22  * or visit www.oracle.com if you need additional information or have any

23  * questions.

24  */

25 

26 package java.net;

27 

28 import java.io.IOException;

29 import java.io.InvalidObjectException;

30 import java.io.ObjectInputStream;

31 import java.io.ObjectOutputStream;

32 import java.io.Serializable;

33 import java.nio.ByteBuffer;

34 import java.nio.CharBuffer;

35 import java.nio.charset.CharsetDecoder;

36 import java.nio.charset.CoderResult;

37 import java.nio.charset.CodingErrorAction;

38 import java.nio.charset.CharacterCodingException;

39 import java.text.Normalizer;

40 import jdk.internal.misc.JavaNetUriAccess;

41 import jdk.internal.misc.SharedSecrets;

42 import sun.nio.cs.ThreadLocalCoders;

43 

44 import java.lang.Character;             // for javadoc

45 import java.lang.NullPointerException;  // for javadoc

46 

47 

48 /**

49  * Represents a Uniform Resource Identifier (URI) reference.

50  *

51  * <p> Aside from some minor deviations noted below, an instance of this

52  * class represents a URI reference as defined by

53  * <a href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform

54  * Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a

55  * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for

56  * Literal IPv6 Addresses in URLs</i></a>. The Literal IPv6 address format

57  * also supports scope_ids. The syntax and usage of scope_ids is described

58  * <a href="Inet6Address.html#scoped">here</a>.

59  * This class provides constructors for creating URI instances from

60  * their components or by parsing their string forms, methods for accessing the

61  * various components of an instance, and methods for normalizing, resolving,

62  * and relativizing URI instances.  Instances of this class are immutable.

63  *

64  *

65  * <h3> URI syntax and components </h3>

66  *

67  * At the highest level a URI reference (hereinafter simply "URI") in string

68  * form has the syntax

69  *

70  * <blockquote>

71  * [<i>scheme</i><b>{@code :}</b>]<i>scheme-specific-part</i>[<b>{@code #}</b><i>fragment</i>]

72  * </blockquote>

73  *

74  * where square brackets [...] delineate optional components and the characters

75  * <b>{@code :}</b> and <b>{@code #}</b> stand for themselves.

76  *

77  * <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is

78  * said to be <i>relative</i>.  URIs are also classified according to whether

79  * they are <i>opaque</i> or <i>hierarchical</i>.

80  *

81  * <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does

82  * not begin with a slash character ({@code '/'}).  Opaque URIs are not

83  * subject to further parsing.  Some examples of opaque URIs are:

84  *

85  * <blockquote><ul style="list-style-type:none">

86  * <li>{@code mailto:java-net@java.sun.com}</li>

87  * <li>{@code news:comp.lang.java}</li>

88  * <li>{@code urn:isbn:096139210x}</li>

89  * </ul></blockquote>

90  *

91  * <p> A <i>hierarchical</i> URI is either an absolute URI whose

92  * scheme-specific part begins with a slash character, or a relative URI, that

93  * is, a URI that does not specify a scheme.  Some examples of hierarchical

94  * URIs are:

95  *

96  * <blockquote>

97  * {@code http://example.com/languages/java/}<br>

98  * {@code sample/a/index.html#28}<br>

99  * {@code ../../demo/b/index.html}<br>

100  * {@code file:///~/calendar}

101  * </blockquote>

102  *

103  * <p> A hierarchical URI is subject to further parsing according to the syntax

104  *

105  * <blockquote>

106  * [<i>scheme</i><b>{@code :}</b>][<b>{@code //}</b><i>authority</i>][<i>path</i>][<b>{@code ?}</b><i>query</i>][<b>{@code #}</b><i>fragment</i>]

107  * </blockquote>

108  *

109  * where the characters <b>{@code :}</b>, <b>{@code /}</b>,

110  * <b>{@code ?}</b>, and <b>{@code #}</b> stand for themselves.  The

111  * scheme-specific part of a hierarchical URI consists of the characters

112  * between the scheme and fragment components.

113  *

114  * <p> The authority component of a hierarchical URI is, if specified, either

115  * <i>server-based</i> or <i>registry-based</i>.  A server-based authority

116  * parses according to the familiar syntax

117  *

118  * <blockquote>

119  * [<i>user-info</i><b>{@code @}</b>]<i>host</i>[<b>{@code :}</b><i>port</i>]

120  * </blockquote>

121  *

122  * where the characters <b>{@code @}</b> and <b>{@code :}</b> stand for

123  * themselves.  Nearly all URI schemes currently in use are server-based.  An

124  * authority component that does not parse in this way is considered to be

125  * registry-based.

126  *

127  * <p> The path component of a hierarchical URI is itself said to be absolute

128  * if it begins with a slash character ({@code '/'}); otherwise it is

129  * relative.  The path of a hierarchical URI that is either absolute or

130  * specifies an authority is always absolute.

131  *

132  * <p> All told, then, a URI instance has the following nine components:

133  *

134  * <table class="striped" style="margin-left:2em">

135  * <caption style="display:none">Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment</caption>

136  * <thead>

137  * <tr><th scope="col">Component</th><th scope="col">Type</th></tr>

138  * </thead>

139  * <tbody style="text-align:left">

140  * <tr><th scope="row">scheme</th><td>{@code String}</td></tr>

141  * <tr><th scope="row">scheme-specific-part</th><td>{@code String}</td></tr>

142  * <tr><th scope="row">authority</th><td>{@code String}</td></tr>

143  * <tr><th scope="row">user-info</th><td>{@code String}</td></tr>

144  * <tr><th scope="row">host</th><td>{@code String}</td></tr>

145  * <tr><th scope="row">port</th><td>{@code int}</td></tr>

146  * <tr><th scope="row">path</th><td>{@code String}</td></tr>

147  * <tr><th scope="row">query</th><td>{@code String}</td></tr>

148  * <tr><th scope="row">fragment</th><td>{@code String}</td></tr>

149  * </tbody>

150  * </table>

151  *

152  * In a given instance any particular component is either <i>undefined</i> or

153  * <i>defined</i> with a distinct value.  Undefined string components are

154  * represented by {@code null}, while undefined integer components are

155  * represented by {@code -1}.  A string component may be defined to have the

156  * empty string as its value; this is not equivalent to that component being

157  * undefined.

158  *

159  * <p> Whether a particular component is or is not defined in an instance

160  * depends upon the type of the URI being represented.  An absolute URI has a

161  * scheme component.  An opaque URI has a scheme, a scheme-specific part, and

162  * possibly a fragment, but has no other components.  A hierarchical URI always

163  * has a path (though it may be empty) and a scheme-specific-part (which at

164  * least contains the path), and may have any of the other components.  If the

165  * authority component is present and is server-based then the host component

166  * will be defined and the user-information and port components may be defined.

167  *

168  *

169  * <h4> Operations on URI instances </h4>

170  *

171  * The key operations supported by this class are those of

172  * <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.

173  *

174  * <p> <i>Normalization</i> is the process of removing unnecessary {@code "."}

175  * and {@code ".."} segments from the path component of a hierarchical URI.

176  * Each {@code "."} segment is simply removed.  A {@code ".."} segment is

177  * removed only if it is preceded by a non-{@code ".."} segment.

178  * Normalization has no effect upon opaque URIs.

179  *

180  * <p> <i>Resolution</i> is the process of resolving one URI against another,

181  * <i>base</i> URI.  The resulting URI is constructed from components of both

182  * URIs in the manner specified by RFC&nbsp;2396, taking components from the

183  * base URI for those not specified in the original.  For hierarchical URIs,

184  * the path of the original is resolved against the path of the base and then

185  * normalized.  The result, for example, of resolving

186  *

187  * <blockquote>

188  * {@code sample/a/index.html#28}

189  * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;

190  * &nbsp;&nbsp;&nbsp;&nbsp;(1)

191  * </blockquote>

192  *

193  * against the base URI {@code http://example.com/languages/java/} is the result

194  * URI

195  *

196  * <blockquote>

197  * {@code http://example.com/languages/java/sample/a/index.html#28}

198  * </blockquote>

199  *

200  * Resolving the relative URI

201  *

202  * <blockquote>

203  * {@code ../../demo/b/index.html}&nbsp;&nbsp;&nbsp;&nbsp;(2)

204  * </blockquote>

205  *

206  * against this result yields, in turn,

207  *

208  * <blockquote>

209  * {@code http://example.com/languages/java/demo/b/index.html}

210  * </blockquote>

211  *

212  * Resolution of both absolute and relative URIs, and of both absolute and

213  * relative paths in the case of hierarchical URIs, is supported.  Resolving

214  * the URI {@code file:///~calendar} against any other URI simply yields the

215  * original URI, since it is absolute.  Resolving the relative URI (2) above

216  * against the relative base URI (1) yields the normalized, but still relative,

217  * URI

218  *

219  * <blockquote>

220  * {@code demo/b/index.html}

221  * </blockquote>

222  *

223  * <p> <i>Relativization</i>, finally, is the inverse of resolution: For any

224  * two normalized URIs <i>u</i> and&nbsp;<i>v</i>,

225  *

226  * <blockquote>

227  *   <i>u</i>{@code .relativize(}<i>u</i>{@code .resolve(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}&nbsp;&nbsp;and<br>

228  *   <i>u</i>{@code .resolve(}<i>u</i>{@code .relativize(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}&nbsp;&nbsp;.<br>

229  * </blockquote>

230  *

231  * This operation is often useful when constructing a document containing URIs

232  * that must be made relative to the base URI of the document wherever

233  * possible.  For example, relativizing the URI

234  *

235  * <blockquote>

236  * {@code http://example.com/languages/java/sample/a/index.html#28}

237  * </blockquote>

238  *

239  * against the base URI

240  *

241  * <blockquote>

242  * {@code http://example.com/languages/java/}

243  * </blockquote>

244  *

245  * yields the relative URI {@code sample/a/index.html#28}.

246  *

247  *

248  * <h4> Character categories </h4>

249  *

250  * RFC&nbsp;2396 specifies precisely which characters are permitted in the

251  * various components of a URI reference.  The following categories, most of

252  * which are taken from that specification, are used below to describe these

253  * constraints:

254  *

255  * <table class="striped" style="margin-left:2em">

256  * <caption style="display:none">Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other</caption>

257  *   <thead>

258  *   <tr><th scope="col">Category</th><th scope="col">Description</th></tr>

259  *   </thead>

260  *   <tbody style="text-align:left">

261  *   <tr><th scope="row" style="vertical-align:top">alpha</th>

262  *       <td>The US-ASCII alphabetic characters,

263  *        {@code 'A'}&nbsp;through&nbsp;{@code 'Z'}

264  *        and {@code 'a'}&nbsp;through&nbsp;{@code 'z'}</td></tr>

265  *   <tr><th scope="row" style="vertical-align:top">digit</th>

266  *       <td>The US-ASCII decimal digit characters,

267  *       {@code '0'}&nbsp;through&nbsp;{@code '9'}</td></tr>

268  *   <tr><th scope="row" style="vertical-align:top">alphanum</th>

269  *       <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>

270  *   <tr><th scope="row" style="vertical-align:top">unreserved</th>

271  *       <td>All <i>alphanum</i> characters together with those in the string

272  *        {@code "_-!.~'()*"}</td></tr>

273  *   <tr><th scope="row" style="vertical-align:top">punct</th>

274  *       <td>The characters in the string {@code ",;:$&+="}</td></tr>

275  *   <tr><th scope="row" style="vertical-align:top">reserved</th>

276  *       <td>All <i>punct</i> characters together with those in the string

277  *        {@code "?/[]@"}</td></tr>

278  *   <tr><th scope="row" style="vertical-align:top">escaped</th>

279  *       <td>Escaped octets, that is, triplets consisting of the percent

280  *           character ({@code '%'}) followed by two hexadecimal digits

281  *           ({@code '0'}-{@code '9'}, {@code 'A'}-{@code 'F'}, and

282  *           {@code 'a'}-{@code 'f'})</td></tr>

283  *   <tr><th scope="row" style="vertical-align:top">other</th>

284  *       <td>The Unicode characters that are not in the US-ASCII character set,

285  *           are not control characters (according to the {@link

286  *           java.lang.Character#isISOControl(char) Character.isISOControl}

287  *           method), and are not space characters (according to the {@link

288  *           java.lang.Character#isSpaceChar(char) Character.isSpaceChar}

289  *           method)&nbsp;&nbsp;<i>(<b>Deviation from RFC 2396</b>, which is

290  *           limited to US-ASCII)</i></td></tr>

291  * </tbody>

292  * </table>

293  *

294  * <p><a id="legal-chars"></a> The set of all legal URI characters consists of

295  * the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>

296  * characters.

297  *

298  *

299  * <h4> Escaped octets, quotation, encoding, and decoding </h4>

300  *

301  * RFC 2396 allows escaped octets to appear in the user-info, path, query, and

302  * fragment components.  Escaping serves two purposes in URIs:

303  *

304  * <ul>

305  *

306  *   <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to

307  *   conform strictly to RFC&nbsp;2396 by not containing any <i>other</i>

308  *   characters.  </p></li>

309  *

310  *   <li><p> To <i>quote</i> characters that are otherwise illegal in a

311  *   component.  The user-info, path, query, and fragment components differ

312  *   slightly in terms of which characters are considered legal and illegal.

313  *   </p></li>

314  *

315  * </ul>

316  *

317  * These purposes are served in this class by three related operations:

318  *

319  * <ul>

320  *

321  *   <li><p><a id="encode"></a> A character is <i>encoded</i> by replacing it

322  *   with the sequence of escaped octets that represent that character in the

323  *   UTF-8 character set.  The Euro currency symbol ({@code '\u005Cu20AC'}),

324  *   for example, is encoded as {@code "%E2%82%AC"}.  <i>(<b>Deviation from

325  *   RFC&nbsp;2396</b>, which does not specify any particular character

326  *   set.)</i> </p></li>

327  *

328  *   <li><p><a id="quote"></a> An illegal character is <i>quoted</i> simply by

329  *   encoding it.  The space character, for example, is quoted by replacing it

330  *   with {@code "%20"}.  UTF-8 contains US-ASCII, hence for US-ASCII

331  *   characters this transformation has exactly the effect required by

332  *   RFC&nbsp;2396. </p></li>

333  *

334  *   <li><p><a id="decode"></a>

335  *   A sequence of escaped octets is <i>decoded</i> by

336  *   replacing it with the sequence of characters that it represents in the

337  *   UTF-8 character set.  UTF-8 contains US-ASCII, hence decoding has the

338  *   effect of de-quoting any quoted US-ASCII characters as well as that of

339  *   decoding any encoded non-US-ASCII characters.  If a <a

340  *   href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs

341  *   when decoding the escaped octets then the erroneous octets are replaced by

342  *   {@code '\u005CuFFFD'}, the Unicode replacement character.  </p></li>

343  *

344  * </ul>

345  *

346  * These operations are exposed in the constructors and methods of this class

347  * as follows:

348  *

349  * <ul>

350  *

351  *   <li><p> The {@linkplain #URI(java.lang.String) single-argument

352  *   constructor} requires any illegal characters in its argument to be

353  *   quoted and preserves any escaped octets and <i>other</i> characters that

354  *   are present.  </p></li>

355  *

356  *   <li><p> The {@linkplain

357  *   #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)

358  *   multi-argument constructors} quote illegal characters as

359  *   required by the components in which they appear.  The percent character

360  *   ({@code '%'}) is always quoted by these constructors.  Any <i>other</i>

361  *   characters are preserved.  </p></li>

362  *

363  *   <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()

364  *   getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()

365  *   getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link

366  *   #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the

367  *   values of their corresponding components in raw form, without interpreting

368  *   any escaped octets.  The strings returned by these methods may contain

369  *   both escaped octets and <i>other</i> characters, and will not contain any

370  *   illegal characters.  </p></li>

371  *

372  *   <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()

373  *   getPath}, {@link #getQuery() getQuery}, {@link #getFragment()

374  *   getFragment}, {@link #getAuthority() getAuthority}, and {@link

375  *   #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped

376  *   octets in their corresponding components.  The strings returned by these

377  *   methods may contain both <i>other</i> characters and illegal characters,

378  *   and will not contain any escaped octets.  </p></li>

379  *

380  *   <li><p> The {@link #toString() toString} method returns a URI string with

381  *   all necessary quotation but which may contain <i>other</i> characters.

382  *   </p></li>

383  *

384  *   <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully

385  *   quoted and encoded URI string that does not contain any <i>other</i>

386  *   characters.  </p></li>

387  *

388  * </ul>

389  *

390  *

391  * <h4> Identities </h4>

392  *

393  * For any URI <i>u</i>, it is always the case that

394  *

395  * <blockquote>

396  * {@code new URI(}<i>u</i>{@code .toString()).equals(}<i>u</i>{@code )}&nbsp;.

397  * </blockquote>

398  *

399  * For any URI <i>u</i> that does not contain redundant syntax such as two

400  * slashes before an empty authority (as in {@code file:///tmp/}&nbsp;) or a

401  * colon following a host name but no port (as in

402  * {@code http://java.sun.com:}&nbsp;), and that does not encode characters

403  * except those that must be quoted, the following identities also hold:

404  * <pre>

405  *     new URI(<i>u</i>.getScheme(),

406  *             <i>u</i>.getSchemeSpecificPart(),

407  *             <i>u</i>.getFragment())

408  *     .equals(<i>u</i>)</pre>

409  * in all cases,

410  * <pre>

411  *     new URI(<i>u</i>.getScheme(),

412  *             <i>u</i>.getAuthority(),

413  *             <i>u</i>.getPath(), <i>u</i>.getQuery(),

414  *             <i>u</i>.getFragment())

415  *     .equals(<i>u</i>)</pre>

416  * if <i>u</i> is hierarchical, and

417  * <pre>

418  *     new URI(<i>u</i>.getScheme(),

419  *             <i>u</i>.getUserInfo(), <i>u</i>.getHost(), <i>u</i>.getPort(),

420  *             <i>u</i>.getPath(), <i>u</i>.getQuery(),

421  *             <i>u</i>.getFragment())

422  *     .equals(<i>u</i>)</pre>

423  * if <i>u</i> is hierarchical and has either no authority or a server-based

424  * authority.

425  *

426  *

427  * <h4> URIs, URLs, and URNs </h4>

428  *

429  * A URI is a uniform resource <i>identifier</i> while a URL is a uniform

430  * resource <i>locator</i>.  Hence every URL is a URI, abstractly speaking, but

431  * not every URI is a URL.  This is because there is another subcategory of

432  * URIs, uniform resource <i>names</i> (URNs), which name resources but do not

433  * specify how to locate them.  The {@code mailto}, {@code news}, and

434  * {@code isbn} URIs shown above are examples of URNs.

435  *

436  * <p> The conceptual distinction between URIs and URLs is reflected in the

437  * differences between this class and the {@link URL} class.

438  *

439  * <p> An instance of this class represents a URI reference in the syntactic

440  * sense defined by RFC&nbsp;2396.  A URI may be either absolute or relative.

441  * A URI string is parsed according to the generic syntax without regard to the

442  * scheme, if any, that it specifies.  No lookup of the host, if any, is

443  * performed, and no scheme-dependent stream handler is constructed.  Equality,

444  * hashing, and comparison are defined strictly in terms of the character

445  * content of the instance.  In other words, a URI instance is little more than

446  * a structured string that supports the syntactic, scheme-independent

447  * operations of comparison, normalization, resolution, and relativization.

448  *

449  * <p> An instance of the {@link URL} class, by contrast, represents the

450  * syntactic components of a URL together with some of the information required

451  * to access the resource that it describes.  A URL must be absolute, that is,

452  * it must always specify a scheme.  A URL string is parsed according to its

453  * scheme.  A stream handler is always established for a URL, and in fact it is

454  * impossible to create a URL instance for a scheme for which no handler is

455  * available.  Equality and hashing depend upon both the scheme and the

456  * Internet address of the host, if any; comparison is not defined.  In other

457  * words, a URL is a structured string that supports the syntactic operation of

458  * resolution as well as the network I/O operations of looking up the host and

459  * opening a connection to the specified resource.

460  *

461  *

462  * @author Mark Reinhold

463  * @since 1.4

464  *

465  * @see <a href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC&nbsp;2279: UTF-8, a

466  * transformation format of ISO 10646</i></a>, <br><a

467  * href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6 Addressing

468  * Architecture</i></a>, <br><a

469  * href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform

470  * Resource Identifiers (URI): Generic Syntax</i></a>, <br><a

471  * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for

472  * Literal IPv6 Addresses in URLs</i></a>, <br><a

473  * href="URISyntaxException.html">URISyntaxException</a>

474  */

475 

476 public final class URI

477     implements Comparable<URI>, Serializable

478 {

479 

480     // Note: Comments containing the word "ASSERT" indicate places where a

481     // throw of an InternalError should be replaced by an appropriate assertion

482     // statement once asserts are enabled in the build.

483 

484     static final long serialVersionUID = -6052424284110960213L;

485 

486 

487     // -- Properties and components of this instance --

488 

489     // Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]

490     private transient String scheme;            // null ==> relative URI

491     private transient String fragment;

492 

493     // Hierarchical URI components: [//<authority>]<path>[?<query>]

494     private transient String authority;         // Registry or server

495 

496     // Server-based authority: [<userInfo>@]<host>[:<port>]

497     private transient String userInfo;

498     private transient String host;              // null ==> registry-based

499     private transient int port = -1;            // -1 ==> undefined

500 

501     // Remaining components of hierarchical URIs

502     private transient String path;              // null ==> opaque

503     private transient String query;

504 

505     // The remaining fields may be computed on demand, which is safe even in

506     // the face of multiple threads racing to initialize them

507     private transient String schemeSpecificPart;

508     private transient int hash;        // Zero ==> undefined

509 

510     private transient String decodedUserInfo;

511     private transient String decodedAuthority;

512     private transient String decodedPath;

513     private transient String decodedQuery;

514     private transient String decodedFragment;

515     private transient String decodedSchemeSpecificPart;

516 

517     /**

518      * The string form of this URI.

519      *

520      * @serial

521      */

522     private volatile String string;             // The only serializable field

523 

524 

525 

526     // -- Constructors and factories --

527 

528     private URI() { }                           // Used internally

529 

530     /**

531      * Constructs a URI by parsing the given string.

532      *

533      * <p> This constructor parses the given string exactly as specified by the

534      * grammar in <a

535      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,

536      * Appendix&nbsp;A, <b><i>except for the following deviations:</i></b> </p>

537      *

538      * <ul>

539      *

540      *   <li><p> An empty authority component is permitted as long as it is

541      *   followed by a non-empty path, a query component, or a fragment

542      *   component.  This allows the parsing of URIs such as

543      *   {@code "file:///foo/bar"}, which seems to be the intent of

544      *   RFC&nbsp;2396 although the grammar does not permit it.  If the

545      *   authority component is empty then the user-information, host, and port

546      *   components are undefined. </p></li>

547      *

548      *   <li><p> Empty relative paths are permitted; this seems to be the

549      *   intent of RFC&nbsp;2396 although the grammar does not permit it.  The

550      *   primary consequence of this deviation is that a standalone fragment

551      *   such as {@code "#foo"} parses as a relative URI with an empty path

552      *   and the given fragment, and can be usefully <a

553      *   href="#resolve-frag">resolved</a> against a base URI.

554      *

555      *   <li><p> IPv4 addresses in host components are parsed rigorously, as

556      *   specified by <a

557      *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>: Each

558      *   element of a dotted-quad address must contain no more than three

559      *   decimal digits.  Each element is further constrained to have a value

560      *   no greater than 255. </p></li>

561      *

562      *   <li> <p> Hostnames in host components that comprise only a single

563      *   domain label are permitted to start with an <i>alphanum</i>

564      *   character. This seems to be the intent of <a

565      *   href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>

566      *   section&nbsp;3.2.2 although the grammar does not permit it. The

567      *   consequence of this deviation is that the authority component of a

568      *   hierarchical URI such as {@code s://123}, will parse as a server-based

569      *   authority. </p></li>

570      *

571      *   <li><p> IPv6 addresses are permitted for the host component.  An IPv6

572      *   address must be enclosed in square brackets ({@code '['} and

573      *   {@code ']'}) as specified by <a

574      *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>.  The

575      *   IPv6 address itself must parse according to <a

576      *   href="http://www.ietf.org/rfc/rfc2373.txt">RFC&nbsp;2373</a>.  IPv6

577      *   addresses are further constrained to describe no more than sixteen

578      *   bytes of address information, a constraint implicit in RFC&nbsp;2373

579      *   but not expressible in the grammar. </p></li>

580      *

581      *   <li><p> Characters in the <i>other</i> category are permitted wherever

582      *   RFC&nbsp;2396 permits <i>escaped</i> octets, that is, in the

583      *   user-information, path, query, and fragment components, as well as in

584      *   the authority component if the authority is registry-based.  This

585      *   allows URIs to contain Unicode characters beyond those in the US-ASCII

586      *   character set. </p></li>

587      *

588      * </ul>

589      *

590      * @param  str   The string to be parsed into a URI

591      *

592      * @throws  NullPointerException

593      *          If {@code str} is {@code null}

594      *

595      * @throws  URISyntaxException

596      *          If the given string violates RFC&nbsp;2396, as augmented

597      *          by the above deviations

598      */

599     public URI(String str) throws URISyntaxException {

600         new Parser(str).parse(false);

601     }

602 

603     /**

604      * Constructs a hierarchical URI from the given components.

605      *

606      * <p> If a scheme is given then the path, if also given, must either be

607      * empty or begin with a slash character ({@code '/'}).  Otherwise a

608      * component of the new URI may be left undefined by passing {@code null}

609      * for the corresponding parameter or, in the case of the {@code port}

610      * parameter, by passing {@code -1}.

611      *

612      * <p> This constructor first builds a URI string from the given components

613      * according to the rules specified in <a

614      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,

615      * section&nbsp;5.2, step&nbsp;7: </p>

616      *

617      * <ol>

618      *

619      *   <li><p> Initially, the result string is empty. </p></li>

620      *

621      *   <li><p> If a scheme is given then it is appended to the result,

622      *   followed by a colon character ({@code ':'}).  </p></li>

623      *

624      *   <li><p> If user information, a host, or a port are given then the

625      *   string {@code "//"} is appended.  </p></li>

626      *

627      *   <li><p> If user information is given then it is appended, followed by

628      *   a commercial-at character ({@code '@'}).  Any character not in the

629      *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>

630      *   categories is <a href="#quote">quoted</a>.  </p></li>

631      *

632      *   <li><p> If a host is given then it is appended.  If the host is a

633      *   literal IPv6 address but is not enclosed in square brackets

634      *   ({@code '['} and {@code ']'}) then the square brackets are added.

635      *   </p></li>

636      *

637      *   <li><p> If a port number is given then a colon character

638      *   ({@code ':'}) is appended, followed by the port number in decimal.

639      *   </p></li>

640      *

641      *   <li><p> If a path is given then it is appended.  Any character not in

642      *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>

643      *   categories, and not equal to the slash character ({@code '/'}) or the

644      *   commercial-at character ({@code '@'}), is quoted.  </p></li>

645      *

646      *   <li><p> If a query is given then a question-mark character

647      *   ({@code '?'}) is appended, followed by the query.  Any character that

648      *   is not a <a href="#legal-chars">legal URI character</a> is quoted.

649      *   </p></li>

650      *

651      *   <li><p> Finally, if a fragment is given then a hash character

652      *   ({@code '#'}) is appended, followed by the fragment.  Any character

653      *   that is not a legal URI character is quoted.  </p></li>

654      *

655      * </ol>

656      *

657      * <p> The resulting URI string is then parsed as if by invoking the {@link

658      * #URI(String)} constructor and then invoking the {@link

659      * #parseServerAuthority()} method upon the result; this may cause a {@link

660      * URISyntaxException} to be thrown.  </p>

661      *

662      * @param   scheme    Scheme name

663      * @param   userInfo  User name and authorization information

664      * @param   host      Host name

665      * @param   port      Port number

666      * @param   path      Path

667      * @param   query     Query

668      * @param   fragment  Fragment

669      *

670      * @throws URISyntaxException

671      *         If both a scheme and a path are given but the path is relative,

672      *         if the URI string constructed from the given components violates

673      *         RFC&nbsp;2396, or if the authority component of the string is

674      *         present but cannot be parsed as a server-based authority

675      */

676     public URI(String scheme,

677                String userInfo, String host, int port,

678                String path, String query, String fragment)

679         throws URISyntaxException

680     {

681         String s = toString(scheme, null,

682                             null, userInfo, host, port,

683                             path, query, fragment);

684         checkPath(s, scheme, path);

685         new Parser(s).parse(true);

686     }

687 

688     /**

689      * Constructs a hierarchical URI from the given components.

690      *

691      * <p> If a scheme is given then the path, if also given, must either be

692      * empty or begin with a slash character ({@code '/'}).  Otherwise a

693      * component of the new URI may be left undefined by passing {@code null}

694      * for the corresponding parameter.

695      *

696      * <p> This constructor first builds a URI string from the given components

697      * according to the rules specified in <a

698      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,

699      * section&nbsp;5.2, step&nbsp;7: </p>

700      *

701      * <ol>

702      *

703      *   <li><p> Initially, the result string is empty.  </p></li>

704      *

705      *   <li><p> If a scheme is given then it is appended to the result,

706      *   followed by a colon character ({@code ':'}).  </p></li>

707      *

708      *   <li><p> If an authority is given then the string {@code "//"} is

709      *   appended, followed by the authority.  If the authority contains a

710      *   literal IPv6 address then the address must be enclosed in square

711      *   brackets ({@code '['} and {@code ']'}).  Any character not in the

712      *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>

713      *   categories, and not equal to the commercial-at character

714      *   ({@code '@'}), is <a href="#quote">quoted</a>.  </p></li>

715      *

716      *   <li><p> If a path is given then it is appended.  Any character not in

717      *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>

718      *   categories, and not equal to the slash character ({@code '/'}) or the

719      *   commercial-at character ({@code '@'}), is quoted.  </p></li>

720      *

721      *   <li><p> If a query is given then a question-mark character

722      *   ({@code '?'}) is appended, followed by the query.  Any character that

723      *   is not a <a href="#legal-chars">legal URI character</a> is quoted.

724      *   </p></li>

725      *

726      *   <li><p> Finally, if a fragment is given then a hash character

727      *   ({@code '#'}) is appended, followed by the fragment.  Any character

728      *   that is not a legal URI character is quoted.  </p></li>

729      *

730      * </ol>

731      *

732      * <p> The resulting URI string is then parsed as if by invoking the {@link

733      * #URI(String)} constructor and then invoking the {@link

734      * #parseServerAuthority()} method upon the result; this may cause a {@link

735      * URISyntaxException} to be thrown.  </p>

736      *

737      * @param   scheme     Scheme name

738      * @param   authority  Authority

739      * @param   path       Path

740      * @param   query      Query

741      * @param   fragment   Fragment

742      *

743      * @throws URISyntaxException

744      *         If both a scheme and a path are given but the path is relative,

745      *         if the URI string constructed from the given components violates

746      *         RFC&nbsp;2396, or if the authority component of the string is

747      *         present but cannot be parsed as a server-based authority

748      */

749     public URI(String scheme,

750                String authority,

751                String path, String query, String fragment)

752         throws URISyntaxException

753     {

754         String s = toString(scheme, null,

755                             authority, null, null, -1,

756                             path, query, fragment);

757         checkPath(s, scheme, path);

758         new Parser(s).parse(false);

759     }

760 

761     /**

762      * Constructs a hierarchical URI from the given components.

763      *

764      * <p> A component may be left undefined by passing {@code null}.

765      *

766      * <p> This convenience constructor works as if by invoking the

767      * seven-argument constructor as follows:

768      *

769      * <blockquote>

770      * {@code new} {@link #URI(String, String, String, int, String, String, String)

771      * URI}{@code (scheme, null, host, -1, path, null, fragment);}

772      * </blockquote>

773      *

774      * @param   scheme    Scheme name

775      * @param   host      Host name

776      * @param   path      Path

777      * @param   fragment  Fragment

778      *

779      * @throws  URISyntaxException

780      *          If the URI string constructed from the given components

781      *          violates RFC&nbsp;2396

782      */

783     public URI(String scheme, String host, String path, String fragment)

784         throws URISyntaxException

785     {

786         this(scheme, null, host, -1, path, null, fragment);

787     }

788 

789     /**

790      * Constructs a URI from the given components.

791      *

792      * <p> A component may be left undefined by passing {@code null}.

793      *

794      * <p> This constructor first builds a URI in string form using the given

795      * components as follows:  </p>

796      *

797      * <ol>

798      *

799      *   <li><p> Initially, the result string is empty.  </p></li>

800      *

801      *   <li><p> If a scheme is given then it is appended to the result,

802      *   followed by a colon character ({@code ':'}).  </p></li>

803      *

804      *   <li><p> If a scheme-specific part is given then it is appended.  Any

805      *   character that is not a <a href="#legal-chars">legal URI character</a>

806      *   is <a href="#quote">quoted</a>.  </p></li>

807      *

808      *   <li><p> Finally, if a fragment is given then a hash character

809      *   ({@code '#'}) is appended to the string, followed by the fragment.

810      *   Any character that is not a legal URI character is quoted.  </p></li>

811      *

812      * </ol>

813      *

814      * <p> The resulting URI string is then parsed in order to create the new

815      * URI instance as if by invoking the {@link #URI(String)} constructor;

816      * this may cause a {@link URISyntaxException} to be thrown.  </p>

817      *

818      * @param   scheme    Scheme name

819      * @param   ssp       Scheme-specific part

820      * @param   fragment  Fragment

821      *

822      * @throws  URISyntaxException

823      *          If the URI string constructed from the given components

824      *          violates RFC&nbsp;2396

825      */

826     public URI(String scheme, String ssp, String fragment)

827         throws URISyntaxException

828     {

829         new Parser(toString(scheme, ssp,

830                             null, null, null, -1,

831                             null, null, fragment))

832             .parse(false);

833     }

834 

835     /**

836      * Constructs a simple URI consisting of only a scheme and a pre-validated

837      * path. Provides a fast-path for some internal cases.

838      */

839     URI(String scheme, String path) {

840         assert validSchemeAndPath(scheme, path);

841         this.scheme = scheme;

842         this.path = path;

843     }

844 

845     private static boolean validSchemeAndPath(String scheme, String path) {

846         try {

847             URI u = new URI(scheme + ":" + path);

848             return scheme.equals(u.scheme) && path.equals(u.path);

849         } catch (URISyntaxException e) {

850             return false;

851         }

852     }

853 

854     /**

855      * Creates a URI by parsing the given string.

856      *

857      * <p> This convenience factory method works as if by invoking the {@link

858      * #URI(String)} constructor; any {@link URISyntaxException} thrown by the

859      * constructor is caught and wrapped in a new {@link

860      * IllegalArgumentException} object, which is then thrown.

861      *

862      * <p> This method is provided for use in situations where it is known that

863      * the given string is a legal URI, for example for URI constants declared

864      * within in a program, and so it would be considered a programming error

865      * for the string not to parse as such.  The constructors, which throw

866      * {@link URISyntaxException} directly, should be used situations where a

867      * URI is being constructed from user input or from some other source that

868      * may be prone to errors.  </p>

869      *

870      * @param  str   The string to be parsed into a URI

871      * @return The new URI

872      *

873      * @throws  NullPointerException

874      *          If {@code str} is {@code null}

875      *

876      * @throws  IllegalArgumentException

877      *          If the given string violates RFC&nbsp;2396

878      */

879     public static URI create(String str) {

880         try {

881             return new URI(str);

882         } catch (URISyntaxException x) {

883             throw new IllegalArgumentException(x.getMessage(), x);

884         }

885     }

886 

887 

888     // -- Operations --

889 

890     /**

891      * Attempts to parse this URI's authority component, if defined, into

892      * user-information, host, and port components.

893      *

894      * <p> If this URI's authority component has already been recognized as

895      * being server-based then it will already have been parsed into

896      * user-information, host, and port components.  In this case, or if this

897      * URI has no authority component, this method simply returns this URI.

898      *

899      * <p> Otherwise this method attempts once more to parse the authority

900      * component into user-information, host, and port components, and throws

901      * an exception describing why the authority component could not be parsed

902      * in that way.

903      *

904      * <p> This method is provided because the generic URI syntax specified in

905      * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>

906      * cannot always distinguish a malformed server-based authority from a

907      * legitimate registry-based authority.  It must therefore treat some

908      * instances of the former as instances of the latter.  The authority

909      * component in the URI string {@code "//foo:bar"}, for example, is not a

910      * legal server-based authority but it is legal as a registry-based

911      * authority.

912      *

913      * <p> In many common situations, for example when working URIs that are

914      * known to be either URNs or URLs, the hierarchical URIs being used will

915      * always be server-based.  They therefore must either be parsed as such or

916      * treated as an error.  In these cases a statement such as

917      *

918      * <blockquote>

919      * {@code URI }<i>u</i>{@code  = new URI(str).parseServerAuthority();}

920      * </blockquote>

921      *

922      * <p> can be used to ensure that <i>u</i> always refers to a URI that, if

923      * it has an authority component, has a server-based authority with proper

924      * user-information, host, and port components.  Invoking this method also

925      * ensures that if the authority could not be parsed in that way then an

926      * appropriate diagnostic message can be issued based upon the exception

927      * that is thrown. </p>

928      *

929      * @return  A URI whose authority field has been parsed

930      *          as a server-based authority

931      *

932      * @throws  URISyntaxException

933      *          If the authority component of this URI is defined

934      *          but cannot be parsed as a server-based authority

935      *          according to RFC&nbsp;2396

936      */

937     public URI parseServerAuthority()

938         throws URISyntaxException

939     {

940         // We could be clever and cache the error message and index from the

941         // exception thrown during the original parse, but that would require

942         // either more fields or a more-obscure representation.

943         if ((host != null) || (authority == null))

944             return this;

945         new Parser(toString()).parse(true);

946         return this;

947     }

948 

949     /**

950      * Normalizes this URI's path.

951      *

952      * <p> If this URI is opaque, or if its path is already in normal form,

953      * then this URI is returned.  Otherwise a new URI is constructed that is

954      * identical to this URI except that its path is computed by normalizing

955      * this URI's path in a manner consistent with <a

956      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,

957      * section&nbsp;5.2, step&nbsp;6, sub-steps&nbsp;c through&nbsp;f; that is:

958      * </p>

959      *

960      * <ol>

961      *

962      *   <li><p> All {@code "."} segments are removed. </p></li>

963      *

964      *   <li><p> If a {@code ".."} segment is preceded by a non-{@code ".."}

965      *   segment then both of these segments are removed.  This step is

966      *   repeated until it is no longer applicable. </p></li>

967      *

968      *   <li><p> If the path is relative, and if its first segment contains a

969      *   colon character ({@code ':'}), then a {@code "."} segment is

970      *   prepended.  This prevents a relative URI with a path such as

971      *   {@code "a:b/c/d"} from later being re-parsed as an opaque URI with a

972      *   scheme of {@code "a"} and a scheme-specific part of {@code "b/c/d"}.

973      *   <b><i>(Deviation from RFC&nbsp;2396)</i></b> </p></li>

974      *

975      * </ol>

976      *

977      * <p> A normalized path will begin with one or more {@code ".."} segments

978      * if there were insufficient non-{@code ".."} segments preceding them to

979      * allow their removal.  A normalized path will begin with a {@code "."}

980      * segment if one was inserted by step 3 above.  Otherwise, a normalized

981      * path will not contain any {@code "."} or {@code ".."} segments. </p>

982      *

983      * @return  A URI equivalent to this URI,

984      *          but whose path is in normal form

985      */

986     public URI normalize() {

987         return normalize(this);

988     }

989 

990     /**

991      * Resolves the given URI against this URI.

992      *

993      * <p> If the given URI is already absolute, or if this URI is opaque, then

994      * the given URI is returned.

995      *

996      * <p><a id="resolve-frag"></a> If the given URI's fragment component is

997      * defined, its path component is empty, and its scheme, authority, and

998      * query components are undefined, then a URI with the given fragment but

999      * with all other components equal to those of this URI is returned.  This

1000      * allows a URI representing a standalone fragment reference, such as

1001      * {@code "#foo"}, to be usefully resolved against a base URI.

1002      *

1003      * <p> Otherwise this method constructs a new hierarchical URI in a manner

1004      * consistent with <a

1005      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,

1006      * section&nbsp;5.2; that is: </p>

1007      *

1008      * <ol>

1009      *

1010      *   <li><p> A new URI is constructed with this URI's scheme and the given

1011      *   URI's query and fragment components. </p></li>

1012      *

1013      *   <li><p> If the given URI has an authority component then the new URI's

1014      *   authority and path are taken from the given URI. </p></li>

1015      *

1016      *   <li><p> Otherwise the new URI's authority component is copied from

1017      *   this URI, and its path is computed as follows: </p>

1018      *

1019      *   <ol>

1020      *

1021      *     <li><p> If the given URI's path is absolute then the new URI's path

1022      *     is taken from the given URI. </p></li>

1023      *

1024      *     <li><p> Otherwise the given URI's path is relative, and so the new

1025      *     URI's path is computed by resolving the path of the given URI

1026      *     against the path of this URI.  This is done by concatenating all but

1027      *     the last segment of this URI's path, if any, with the given URI's

1028      *     path and then normalizing the result as if by invoking the {@link

1029      *     #normalize() normalize} method. </p></li>

1030      *

1031      *   </ol></li>

1032      *

1033      * </ol>

1034      *

1035      * <p> The result of this method is absolute if, and only if, either this

1036      * URI is absolute or the given URI is absolute.  </p>

1037      *

1038      * @param  uri  The URI to be resolved against this URI

1039      * @return The resulting URI

1040      *

1041      * @throws  NullPointerException

1042      *          If {@code uri} is {@code null}

1043      */

1044     public URI resolve(URI uri) {

1045         return resolve(this, uri);

1046     }

1047 

1048     /**

1049      * Constructs a new URI by parsing the given string and then resolving it

1050      * against this URI.

1051      *

1052      * <p> This convenience method works as if invoking it were equivalent to

1053      * evaluating the expression {@link #resolve(java.net.URI)

1054      * resolve}{@code (URI.}{@link #create(String) create}{@code (str))}. </p>

1055      *

1056      * @param  str   The string to be parsed into a URI

1057      * @return The resulting URI

1058      *

1059      * @throws  NullPointerException

1060      *          If {@code str} is {@code null}

1061      *

1062      * @throws  IllegalArgumentException

1063      *          If the given string violates RFC&nbsp;2396

1064      */

1065     public URI resolve(String str) {

1066         return resolve(URI.create(str));

1067     }

1068 

1069     /**

1070      * Relativizes the given URI against this URI.

1071      *

1072      * <p> The relativization of the given URI against this URI is computed as

1073      * follows: </p>

1074      *

1075      * <ol>

1076      *

1077      *   <li><p> If either this URI or the given URI are opaque, or if the

1078      *   scheme and authority components of the two URIs are not identical, or

1079      *   if the path of this URI is not a prefix of the path of the given URI,

1080      *   then the given URI is returned. </p></li>

1081      *

1082      *   <li><p> Otherwise a new relative hierarchical URI is constructed with

1083      *   query and fragment components taken from the given URI and with a path

1084      *   component computed by removing this URI's path from the beginning of

1085      *   the given URI's path. </p></li>

1086      *

1087      * </ol>

1088      *

1089      * @param  uri  The URI to be relativized against this URI

1090      * @return The resulting URI

1091      *

1092      * @throws  NullPointerException

1093      *          If {@code uri} is {@code null}

1094      */

1095     public URI relativize(URI uri) {

1096         return relativize(this, uri);

1097     }

1098 

1099     /**

1100      * Constructs a URL from this URI.

1101      *

1102      * <p> This convenience method works as if invoking it were equivalent to

1103      * evaluating the expression {@code new URL(this.toString())} after

1104      * first checking that this URI is absolute. </p>

1105      *

1106      * @return  A URL constructed from this URI

1107      *

1108      * @throws  IllegalArgumentException

1109      *          If this URL is not absolute

1110      *

1111      * @throws  MalformedURLException

1112      *          If a protocol handler for the URL could not be found,

1113      *          or if some other error occurred while constructing the URL

1114      */

1115     public URL toURL() throws MalformedURLException {

1116         return URL.fromURI(this);

1117     }

1118 

1119     // -- Component access methods --

1120 

1121     /**

1122      * Returns the scheme component of this URI.

1123      *

1124      * <p> The scheme component of a URI, if defined, only contains characters

1125      * in the <i>alphanum</i> category and in the string {@code "-.+"}.  A

1126      * scheme always starts with an <i>alpha</i> character. <p>

1127      *

1128      * The scheme component of a URI cannot contain escaped octets, hence this

1129      * method does not perform any decoding.

1130      *

1131      * @return  The scheme component of this URI,

1132      *          or {@code null} if the scheme is undefined

1133      */

1134     public String getScheme() {

1135         return scheme;

1136     }

1137 

1138     /**

1139      * Tells whether or not this URI is absolute.

1140      *

1141      * <p> A URI is absolute if, and only if, it has a scheme component. </p>

1142      *

1143      * @return  {@code true} if, and only if, this URI is absolute

1144      */

1145     public boolean isAbsolute() {

1146         return scheme != null;

1147     }

1148 

1149     /**

1150      * Tells whether or not this URI is opaque.

1151      *

1152      * <p> A URI is opaque if, and only if, it is absolute and its

1153      * scheme-specific part does not begin with a slash character ('/').

1154      * An opaque URI has a scheme, a scheme-specific part, and possibly

1155      * a fragment; all other components are undefined. </p>

1156      *

1157      * @return  {@code true} if, and only if, this URI is opaque

1158      */

1159     public boolean isOpaque() {

1160         return path == null;

1161     }

1162 

1163     /**

1164      * Returns the raw scheme-specific part of this URI.  The scheme-specific

1165      * part is never undefined, though it may be empty.

1166      *

1167      * <p> The scheme-specific part of a URI only contains legal URI

1168      * characters. </p>

1169      *

1170      * @return  The raw scheme-specific part of this URI

1171      *          (never {@code null})

1172      */

1173     public String getRawSchemeSpecificPart() {

1174         String part = schemeSpecificPart;

1175         if (part != null) {

1176             return part;

1177         }

1178 

1179         String s = string;

1180         if (s != null) {

1181             // if string is defined, components will have been parsed

1182             int start = 0;

1183             int end = s.length();

1184             if (scheme != null) {

1185                 start = scheme.length() + 1;

1186             }

1187             if (fragment != null) {

1188                 end -= fragment.length() + 1;

1189             }

1190             if (path != null && path.length() == end - start) {

1191                 part = path;

1192             } else {

1193                 part = s.substring(start, end);

1194             }

1195         } else {

1196             StringBuilder sb = new StringBuilder();

1197             appendSchemeSpecificPart(sb, null, getAuthority(), getUserInfo(),

1198                                  host, port, getPath(), getQuery());

1199             part = sb.toString();

1200         }

1201         return schemeSpecificPart = part;

1202     }

1203 

1204     /**

1205      * Returns the decoded scheme-specific part of this URI.

1206      *

1207      * <p> The string returned by this method is equal to that returned by the

1208      * {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method

1209      * except that all sequences of escaped octets are <a

1210      * href="#decode">decoded</a>.  </p>

1211      *

1212      * @return  The decoded scheme-specific part of this URI

1213      *          (never {@code null})

1214      */

1215     public String getSchemeSpecificPart() {

1216         String part = decodedSchemeSpecificPart;

1217         if (part == null) {

1218             decodedSchemeSpecificPart = part = decode(getRawSchemeSpecificPart());

1219         }

1220         return part;

1221     }

1222 

1223     /**

1224      * Returns the raw authority component of this URI.

1225      *

1226      * <p> The authority component of a URI, if defined, only contains the

1227      * commercial-at character ({@code '@'}) and characters in the

1228      * <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>

1229      * categories.  If the authority is server-based then it is further

1230      * constrained to have valid user-information, host, and port

1231      * components. </p>

1232      *

1233      * @return  The raw authority component of this URI,

1234      *          or {@code null} if the authority is undefined

1235      */

1236     public String getRawAuthority() {

1237         return authority;

1238     }

1239 

1240     /**

1241      * Returns the decoded authority component of this URI.

1242      *

1243      * <p> The string returned by this method is equal to that returned by the

1244      * {@link #getRawAuthority() getRawAuthority} method except that all

1245      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>

1246      *

1247      * @return  The decoded authority component of this URI,

1248      *          or {@code null} if the authority is undefined

1249      */

1250     public String getAuthority() {

1251         String auth = decodedAuthority;

1252         if ((auth == null) && (authority != null)) {

1253             decodedAuthority = auth = decode(authority);

1254         }

1255         return auth;

1256     }

1257 

1258     /**

1259      * Returns the raw user-information component of this URI.

1260      *

1261      * <p> The user-information component of a URI, if defined, only contains

1262      * characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and

1263      * <i>other</i> categories. </p>

1264      *

1265      * @return  The raw user-information component of this URI,

1266      *          or {@code null} if the user information is undefined

1267      */

1268     public String getRawUserInfo() {

1269         return userInfo;

1270     }

1271 

1272     /**

1273      * Returns the decoded user-information component of this URI.

1274      *

1275      * <p> The string returned by this method is equal to that returned by the

1276      * {@link #getRawUserInfo() getRawUserInfo} method except that all

1277      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>

1278      *

1279      * @return  The decoded user-information component of this URI,

1280      *          or {@code null} if the user information is undefined

1281      */

1282     public String getUserInfo() {

1283         String user = decodedUserInfo;

1284         if ((user == null) && (userInfo != null)) {

1285             decodedUserInfo = user = decode(userInfo);

1286         }

1287         return user;

1288     }

1289 

1290     /**

1291      * Returns the host component of this URI.

1292      *

1293      * <p> The host component of a URI, if defined, will have one of the

1294      * following forms: </p>

1295      *

1296      * <ul>

1297      *

1298      *   <li><p> A domain name consisting of one or more <i>labels</i>

1299      *   separated by period characters ({@code '.'}), optionally followed by

1300      *   a period character.  Each label consists of <i>alphanum</i> characters

1301      *   as well as hyphen characters ({@code '-'}), though hyphens never

1302      *   occur as the first or last characters in a label. The rightmost

1303      *   label of a domain name consisting of two or more labels, begins

1304      *   with an <i>alpha</i> character. </li>

1305      *

1306      *   <li><p> A dotted-quad IPv4 address of the form

1307      *   <i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +},

1308      *   where no <i>digit</i> sequence is longer than three characters and no

1309      *   sequence has a value larger than 255. </p></li>

1310      *

1311      *   <li><p> An IPv6 address enclosed in square brackets ({@code '['} and

1312      *   {@code ']'}) and consisting of hexadecimal digits, colon characters

1313      *   ({@code ':'}), and possibly an embedded IPv4 address.  The full

1314      *   syntax of IPv6 addresses is specified in <a

1315      *   href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6

1316      *   Addressing Architecture</i></a>.  </p></li>

1317      *

1318      * </ul>

1319      *

1320      * The host component of a URI cannot contain escaped octets, hence this

1321      * method does not perform any decoding.

1322      *

1323      * @return  The host component of this URI,

1324      *          or {@code null} if the host is undefined

1325      */

1326     public String getHost() {

1327         return host;

1328     }

1329 

1330     /**

1331      * Returns the port number of this URI.

1332      *

1333      * <p> The port component of a URI, if defined, is a non-negative

1334      * integer. </p>

1335      *

1336      * @return  The port component of this URI,

1337      *          or {@code -1} if the port is undefined

1338      */

1339     public int getPort() {

1340         return port;

1341     }

1342 

1343     /**

1344      * Returns the raw path component of this URI.

1345      *

1346      * <p> The path component of a URI, if defined, only contains the slash

1347      * character ({@code '/'}), the commercial-at character ({@code '@'}),

1348      * and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,

1349      * and <i>other</i> categories. </p>

1350      *

1351      * @return  The path component of this URI,

1352      *          or {@code null} if the path is undefined

1353      */

1354     public String getRawPath() {

1355         return path;

1356     }

1357 

1358     /**

1359      * Returns the decoded path component of this URI.

1360      *

1361      * <p> The string returned by this method is equal to that returned by the

1362      * {@link #getRawPath() getRawPath} method except that all sequences of

1363      * escaped octets are <a href="#decode">decoded</a>.  </p>

1364      *

1365      * @return  The decoded path component of this URI,

1366      *          or {@code null} if the path is undefined

1367      */

1368     public String getPath() {

1369         String decoded = decodedPath;

1370         if ((decoded == null) && (path != null)) {

1371             decodedPath = decoded = decode(path);

1372         }

1373         return decoded;

1374     }

1375 

1376     /**

1377      * Returns the raw query component of this URI.

1378      *

1379      * <p> The query component of a URI, if defined, only contains legal URI

1380      * characters. </p>

1381      *

1382      * @return  The raw query component of this URI,

1383      *          or {@code null} if the query is undefined

1384      */

1385     public String getRawQuery() {

1386         return query;

1387     }

1388 

1389     /**

1390      * Returns the decoded query component of this URI.

1391      *

1392      * <p> The string returned by this method is equal to that returned by the

1393      * {@link #getRawQuery() getRawQuery} method except that all sequences of

1394      * escaped octets are <a href="#decode">decoded</a>.  </p>

1395      *

1396      * @return  The decoded query component of this URI,

1397      *          or {@code null} if the query is undefined

1398      */

1399     public String getQuery() {

1400         String decoded = decodedQuery;

1401         if ((decoded == null) && (query != null)) {

1402             decodedQuery = decoded = decode(query, false);

1403         }

1404         return decoded;

1405     }

1406 

1407     /**

1408      * Returns the raw fragment component of this URI.

1409      *

1410      * <p> The fragment component of a URI, if defined, only contains legal URI

1411      * characters. </p>

1412      *

1413      * @return  The raw fragment component of this URI,

1414      *          or {@code null} if the fragment is undefined

1415      */

1416     public String getRawFragment() {

1417         return fragment;

1418     }

1419 

1420     /**

1421      * Returns the decoded fragment component of this URI.

1422      *

1423      * <p> The string returned by this method is equal to that returned by the

1424      * {@link #getRawFragment() getRawFragment} method except that all

1425      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>

1426      *

1427      * @return  The decoded fragment component of this URI,

1428      *          or {@code null} if the fragment is undefined

1429      */

1430     public String getFragment() {

1431         String decoded = decodedFragment;

1432         if ((decoded == null) && (fragment != null)) {

1433             decodedFragment = decoded = decode(fragment, false);

1434         }

1435         return decoded;

1436     }

1437 

1438 

1439     // -- Equality, comparison, hash code, toString, and serialization --

1440 

1441     /**

1442      * Tests this URI for equality with another object.

1443      *

1444      * <p> If the given object is not a URI then this method immediately

1445      * returns {@code false}.

1446      *

1447      * <p> For two URIs to be considered equal requires that either both are

1448      * opaque or both are hierarchical.  Their schemes must either both be

1449      * undefined or else be equal without regard to case. Their fragments

1450      * must either both be undefined or else be equal.

1451      *

1452      * <p> For two opaque URIs to be considered equal, their scheme-specific

1453      * parts must be equal.

1454      *

1455      * <p> For two hierarchical URIs to be considered equal, their paths must

1456      * be equal and their queries must either both be undefined or else be

1457      * equal.  Their authorities must either both be undefined, or both be

1458      * registry-based, or both be server-based.  If their authorities are

1459      * defined and are registry-based, then they must be equal.  If their

1460      * authorities are defined and are server-based, then their hosts must be

1461      * equal without regard to case, their port numbers must be equal, and

1462      * their user-information components must be equal.

1463      *

1464      * <p> When testing the user-information, path, query, fragment, authority,

1465      * or scheme-specific parts of two URIs for equality, the raw forms rather

1466      * than the encoded forms of these components are compared and the

1467      * hexadecimal digits of escaped octets are compared without regard to

1468      * case.

1469      *

1470      * <p> This method satisfies the general contract of the {@link

1471      * java.lang.Object#equals(Object) Object.equals} method. </p>

1472      *

1473      * @param   ob   The object to which this object is to be compared

1474      *

1475      * @return  {@code true} if, and only if, the given object is a URI that

1476      *          is identical to this URI

1477      */

1478     public boolean equals(Object ob) {

1479         if (ob == this)

1480             return true;

1481         if (!(ob instanceof URI))

1482             return false;

1483         URI that = (URI)ob;

1484         if (this.isOpaque() != that.isOpaque()) return false;

1485         if (!equalIgnoringCase(this.scheme, that.scheme)) return false;

1486         if (!equal(this.fragment, that.fragment)) return false;

1487 

1488         // Opaque

1489         if (this.isOpaque())

1490             return equal(this.schemeSpecificPart, that.schemeSpecificPart);

1491 

1492         // Hierarchical

1493         if (!equal(this.path, that.path)) return false;

1494         if (!equal(this.query, that.query)) return false;

1495 

1496         // Authorities

1497         if (this.authority == that.authority) return true;

1498         if (this.host != null) {

1499             // Server-based

1500             if (!equal(this.userInfo, that.userInfo)) return false;

1501             if (!equalIgnoringCase(this.host, that.host)) return false;

1502             if (this.port != that.port) return false;

1503         } else if (this.authority != null) {

1504             // Registry-based

1505             if (!equal(this.authority, that.authority)) return false;

1506         } else if (this.authority != that.authority) {

1507             return false;

1508         }

1509 

1510         return true;

1511     }

1512 

1513     /**

1514      * Returns a hash-code value for this URI.  The hash code is based upon all

1515      * of the URI's components, and satisfies the general contract of the

1516      * {@link java.lang.Object#hashCode() Object.hashCode} method.

1517      *

1518      * @return  A hash-code value for this URI

1519      */

1520     public int hashCode() {

1521         int h = hash;

1522         if (h == 0) {

1523             h = hashIgnoringCase(0, scheme);

1524             h = hash(h, fragment);

1525             if (isOpaque()) {

1526                 h = hash(h, schemeSpecificPart);

1527             } else {

1528                 h = hash(h, path);

1529                 h = hash(h, query);

1530                 if (host != null) {

1531                     h = hash(h, userInfo);

1532                     h = hashIgnoringCase(h, host);

1533                     h += 1949 * port;

1534                 } else {

1535                     h = hash(h, authority);

1536                 }

1537             }

1538             if (h != 0) {

1539                 hash = h;

1540             }

1541         }

1542         return h;

1543     }

1544 

1545     /**

1546      * Compares this URI to another object, which must be a URI.

1547      *

1548      * <p> When comparing corresponding components of two URIs, if one

1549      * component is undefined but the other is defined then the first is

1550      * considered to be less than the second.  Unless otherwise noted, string

1551      * components are ordered according to their natural, case-sensitive

1552      * ordering as defined by the {@link java.lang.String#compareTo(Object)

1553      * String.compareTo} method.  String components that are subject to

1554      * encoding are compared by comparing their raw forms rather than their

1555      * encoded forms.

1556      *

1557      * <p> The ordering of URIs is defined as follows: </p>

1558      *

1559      * <ul>

1560      *

1561      *   <li><p> Two URIs with different schemes are ordered according the

1562      *   ordering of their schemes, without regard to case. </p></li>

1563      *

1564      *   <li><p> A hierarchical URI is considered to be less than an opaque URI

1565      *   with an identical scheme. </p></li>

1566      *

1567      *   <li><p> Two opaque URIs with identical schemes are ordered according

1568      *   to the ordering of their scheme-specific parts. </p></li>

1569      *

1570      *   <li><p> Two opaque URIs with identical schemes and scheme-specific

1571      *   parts are ordered according to the ordering of their

1572      *   fragments. </p></li>

1573      *

1574      *   <li><p> Two hierarchical URIs with identical schemes are ordered

1575      *   according to the ordering of their authority components: </p>

1576      *

1577      *   <ul>

1578      *

1579      *     <li><p> If both authority components are server-based then the URIs

1580      *     are ordered according to their user-information components; if these

1581      *     components are identical then the URIs are ordered according to the

1582      *     ordering of their hosts, without regard to case; if the hosts are

1583      *     identical then the URIs are ordered according to the ordering of

1584      *     their ports. </p></li>

1585      *

1586      *     <li><p> If one or both authority components are registry-based then

1587      *     the URIs are ordered according to the ordering of their authority

1588      *     components. </p></li>

1589      *

1590      *   </ul></li>

1591      *

1592      *   <li><p> Finally, two hierarchical URIs with identical schemes and

1593      *   authority components are ordered according to the ordering of their

1594      *   paths; if their paths are identical then they are ordered according to

1595      *   the ordering of their queries; if the queries are identical then they

1596      *   are ordered according to the order of their fragments. </p></li>

1597      *

1598      * </ul>

1599      *

1600      * <p> This method satisfies the general contract of the {@link

1601      * java.lang.Comparable#compareTo(Object) Comparable.compareTo}

1602      * method. </p>

1603      *

1604      * @param   that

1605      *          The object to which this URI is to be compared

1606      *

1607      * @return  A negative integer, zero, or a positive integer as this URI is

1608      *          less than, equal to, or greater than the given URI

1609      *

1610      * @throws  ClassCastException

1611      *          If the given object is not a URI

1612      */

1613     public int compareTo(URI that) {

1614         int c;

1615 

1616         if ((c = compareIgnoringCase(this.scheme, that.scheme)) != 0)

1617             return c;

1618 

1619         if (this.isOpaque()) {

1620             if (that.isOpaque()) {

1621                 // Both opaque

1622                 if ((c = compare(this.schemeSpecificPart,

1623                                  that.schemeSpecificPart)) != 0)

1624                     return c;

1625                 return compare(this.fragment, that.fragment);

1626             }

1627             return +1;                  // Opaque > hierarchical

1628         } else if (that.isOpaque()) {

1629             return -1;                  // Hierarchical < opaque

1630         }

1631 

1632         // Hierarchical

1633         if ((this.host != null) && (that.host != null)) {

1634             // Both server-based

1635             if ((c = compare(this.userInfo, that.userInfo)) != 0)

1636                 return c;

1637             if ((c = compareIgnoringCase(this.host, that.host)) != 0)

1638                 return c;

1639             if ((c = this.port - that.port) != 0)

1640                 return c;

1641         } else {

1642             // If one or both authorities are registry-based then we simply

1643             // compare them in the usual, case-sensitive way.  If one is

1644             // registry-based and one is server-based then the strings are

1645             // guaranteed to be unequal, hence the comparison will never return

1646             // zero and the compareTo and equals methods will remain

1647             // consistent.

1648             if ((c = compare(this.authority, that.authority)) != 0) return c;

1649         }

1650 

1651         if ((c = compare(this.path, that.path)) != 0) return c;

1652         if ((c = compare(this.query, that.query)) != 0) return c;

1653         return compare(this.fragment, that.fragment);

1654     }

1655 

1656     /**

1657      * Returns the content of this URI as a string.

1658      *

1659      * <p> If this URI was created by invoking one of the constructors in this

1660      * class then a string equivalent to the original input string, or to the

1661      * string computed from the originally-given components, as appropriate, is

1662      * returned.  Otherwise this URI was created by normalization, resolution,

1663      * or relativization, and so a string is constructed from this URI's

1664      * components according to the rules specified in <a

1665      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,

1666      * section&nbsp;5.2, step&nbsp;7. </p>

1667      *

1668      * @return  The string form of this URI

1669      */

1670     public String toString() {

1671         String s = string;

1672         if (s == null) {

1673             s = defineString();

1674         }

1675         return s;

1676     }

1677 

1678     private String defineString() {

1679         String s = string;

1680         if (s != null) {

1681             return s;

1682         }

1683 

1684         StringBuilder sb = new StringBuilder();

1685         if (scheme != null) {

1686             sb.append(scheme);

1687             sb.append(':');

1688         }

1689         if (isOpaque()) {

1690             sb.append(schemeSpecificPart);

1691         } else {

1692             if (host != null) {

1693                 sb.append("//");

1694                 if (userInfo != null) {

1695                     sb.append(userInfo);

1696                     sb.append('@');

1697                 }

1698                 boolean needBrackets = ((host.indexOf(':') >= 0)

1699                         && !host.startsWith("[")

1700                         && !host.endsWith("]"));

1701                 if (needBrackets) sb.append('[');

1702                 sb.append(host);

1703                 if (needBrackets) sb.append(']');

1704                 if (port != -1) {

1705                     sb.append(':');

1706                     sb.append(port);

1707                 }

1708             } else if (authority != null) {

1709                 sb.append("//");

1710                 sb.append(authority);

1711             }

1712             if (path != null)

1713                 sb.append(path);

1714             if (query != null) {

1715                 sb.append('?');

1716                 sb.append(query);

1717             }

1718         }

1719         if (fragment != null) {

1720             sb.append('#');

1721             sb.append(fragment);

1722         }

1723         return string = sb.toString();

1724     }

1725 

1726     /**

1727      * Returns the content of this URI as a US-ASCII string.

1728      *

1729      * <p> If this URI does not contain any characters in the <i>other</i>

1730      * category then an invocation of this method will return the same value as

1731      * an invocation of the {@link #toString() toString} method.  Otherwise

1732      * this method works as if by invoking that method and then <a

1733      * href="#encode">encoding</a> the result.  </p>

1734      *

1735      * @return  The string form of this URI, encoded as needed

1736      *          so that it only contains characters in the US-ASCII

1737      *          charset

1738      */

1739     public String toASCIIString() {

1740         return encode(toString());

1741     }

1742 

1743 

1744     // -- Serialization support --

1745 

1746     /**

1747      * Saves the content of this URI to the given serial stream.

1748      *

1749      * <p> The only serializable field of a URI instance is its {@code string}

1750      * field.  That field is given a value, if it does not have one already,

1751      * and then the {@link java.io.ObjectOutputStream#defaultWriteObject()}

1752      * method of the given object-output stream is invoked. </p>

1753      *

1754      * @param  os  The object-output stream to which this object

1755      *             is to be written

1756      */

1757     private void writeObject(ObjectOutputStream os)

1758         throws IOException

1759     {

1760         defineString();

1761         os.defaultWriteObject();        // Writes the string field only

1762     }

1763 

1764     /**

1765      * Reconstitutes a URI from the given serial stream.

1766      *

1767      * <p> The {@link java.io.ObjectInputStream#defaultReadObject()} method is

1768      * invoked to read the value of the {@code string} field.  The result is

1769      * then parsed in the usual way.

1770      *

1771      * @param  is  The object-input stream from which this object

1772      *             is being read

1773      */

1774     private void readObject(ObjectInputStream is)

1775         throws ClassNotFoundException, IOException

1776     {

1777         port = -1;                      // Argh

1778         is.defaultReadObject();

1779         try {

1780             new Parser(string).parse(false);

1781         } catch (URISyntaxException x) {

1782             IOException y = new InvalidObjectException("Invalid URI");

1783             y.initCause(x);

1784             throw y;

1785         }

1786     }

1787 

1788 

1789     // -- End of public methods --

1790 

1791 

1792     // -- Utility methods for string-field comparison and hashing --

1793 

1794     // These methods return appropriate values for null string arguments,

1795     // thereby simplifying the equals, hashCode, and compareTo methods.

1796     //

1797     // The case-ignoring methods should only be applied to strings whose

1798     // characters are all known to be US-ASCII.  Because of this restriction,

1799     // these methods are faster than the similar methods in the String class.

1800 

1801     // US-ASCII only

1802     private static int toLower(char c) {

1803         if ((c >= 'A') && (c <= 'Z'))

1804             return c + ('a' - 'A');

1805         return c;

1806     }

1807 

1808     // US-ASCII only

1809     private static int toUpper(char c) {

1810         if ((c >= 'a') && (c <= 'z'))

1811             return c - ('a' - 'A');

1812         return c;

1813     }

1814 

1815     private static boolean equal(String s, String t) {

1816         if (s == t) return true;

1817         if ((s != null) && (t != null)) {

1818             if (s.length() != t.length())

1819                 return false;

1820             if (s.indexOf('%') < 0)

1821                 return s.equals(t);

1822             int n = s.length();

1823             for (int i = 0; i < n;) {

1824                 char c = s.charAt(i);

1825                 char d = t.charAt(i);

1826                 if (c != '%') {

1827                     if (c != d)

1828                         return false;

1829                     i++;

1830                     continue;

1831                 }

1832                 if (d != '%')

1833                     return false;

1834                 i++;

1835                 if (toLower(s.charAt(i)) != toLower(t.charAt(i)))

1836                     return false;

1837                 i++;

1838                 if (toLower(s.charAt(i)) != toLower(t.charAt(i)))

1839                     return false;

1840                 i++;

1841             }

1842             return true;

1843         }

1844         return false;

1845     }

1846 

1847     // US-ASCII only

1848     private static boolean equalIgnoringCase(String s, String t) {

1849         if (s == t) return true;

1850         if ((s != null) && (t != null)) {

1851             int n = s.length();

1852             if (t.length() != n)

1853                 return false;

1854             for (int i = 0; i < n; i++) {

1855                 if (toLower(s.charAt(i)) != toLower(t.charAt(i)))

1856                     return false;

1857             }

1858             return true;

1859         }

1860         return false;

1861     }

1862 

1863     private static int hash(int hash, String s) {

1864         if (s == null) return hash;

1865         return s.indexOf('%') < 0 ? hash * 127 + s.hashCode()

1866                                   : normalizedHash(hash, s);

1867     }

1868 

1869 

1870     private static int normalizedHash(int hash, String s) {

1871         int h = 0;

1872         for (int index = 0; index < s.length(); index++) {

1873             char ch = s.charAt(index);

1874             h = 31 * h + ch;

1875             if (ch == '%') {

1876                 /*

1877                  * Process the next two encoded characters

1878                  */

1879                 for (int i = index + 1; i < index + 3; i++)

1880                     h = 31 * h + toUpper(s.charAt(i));

1881                 index += 2;

1882             }

1883         }

1884         return hash * 127 + h;

1885     }

1886 

1887     // US-ASCII only

1888     private static int hashIgnoringCase(int hash, String s) {

1889         if (s == null) return hash;

1890         int h = hash;

1891         int n = s.length();

1892         for (int i = 0; i < n; i++)

1893             h = 31 * h + toLower(s.charAt(i));

1894         return h;

1895     }

1896 

1897     private static int compare(String s, String t) {

1898         if (s == t) return 0;

1899         if (s != null) {

1900             if (t != null)

1901                 return s.compareTo(t);

1902             else

1903                 return +1;

1904         } else {

1905             return -1;

1906         }

1907     }

1908 

1909     // US-ASCII only

1910     private static int compareIgnoringCase(String s, String t) {

1911         if (s == t) return 0;

1912         if (s != null) {

1913             if (t != null) {

1914                 int sn = s.length();

1915                 int tn = t.length();

1916                 int n = sn < tn ? sn : tn;

1917                 for (int i = 0; i < n; i++) {

1918                     int c = toLower(s.charAt(i)) - toLower(t.charAt(i));

1919                     if (c != 0)

1920                         return c;

1921                 }

1922                 return sn - tn;

1923             }

1924             return +1;

1925         } else {

1926             return -1;

1927         }

1928     }

1929 

1930 

1931     // -- String construction --

1932 

1933     // If a scheme is given then the path, if given, must be absolute

1934     //

1935     private static void checkPath(String s, String scheme, String path)

1936         throws URISyntaxException

1937     {

1938         if (scheme != null) {

1939             if (path != null && !path.isEmpty() && path.charAt(0) != '/')

1940                 throw new URISyntaxException(s, "Relative path in absolute URI");

1941         }

1942     }

1943 

1944     private void appendAuthority(StringBuilder sb,

1945                                  String authority,

1946                                  String userInfo,

1947                                  String host,

1948                                  int port)

1949     {

1950         if (host != null) {

1951             sb.append("//");

1952             if (userInfo != null) {

1953                 sb.append(quote(userInfo, L_USERINFO, H_USERINFO));

1954                 sb.append('@');

1955             }

1956             boolean needBrackets = ((host.indexOf(':') >= 0)

1957                                     && !host.startsWith("[")

1958                                     && !host.endsWith("]"));

1959             if (needBrackets) sb.append('[');

1960             sb.append(host);

1961             if (needBrackets) sb.append(']');

1962             if (port != -1) {

1963                 sb.append(':');

1964                 sb.append(port);

1965             }

1966         } else if (authority != null) {

1967             sb.append("//");

1968             if (authority.startsWith("[")) {

1969                 // authority should (but may not) contain an embedded IPv6 address

1970                 int end = authority.indexOf(']');

1971                 String doquote = authority, dontquote = "";

1972                 if (end != -1 && authority.indexOf(':') != -1) {

1973                     // the authority contains an IPv6 address

1974                     if (end == authority.length()) {

1975                         dontquote = authority;

1976                         doquote = "";

1977                     } else {

1978                         dontquote = authority.substring(0 , end + 1);

1979                         doquote = authority.substring(end + 1);

1980                     }

1981                 }

1982                 sb.append(dontquote);

1983                 sb.append(quote(doquote,

1984                             L_REG_NAME | L_SERVER,

1985                             H_REG_NAME | H_SERVER));

1986             } else {

1987                 sb.append(quote(authority,

1988                             L_REG_NAME | L_SERVER,

1989                             H_REG_NAME | H_SERVER));

1990             }

1991         }

1992     }

1993 

1994     private void appendSchemeSpecificPart(StringBuilder sb,

1995                                           String opaquePart,

1996                                           String authority,

1997                                           String userInfo,

1998                                           String host,

1999                                           int port,

2000                                           String path,

2001                                           String query)

2002     {

2003         if (opaquePart != null) {

2004             /* check if SSP begins with an IPv6 address

2005              * because we must not quote a literal IPv6 address

2006              */

2007             if (opaquePart.startsWith("//[")) {

2008                 int end =  opaquePart.indexOf(']');

2009                 if (end != -1 && opaquePart.indexOf(':')!=-1) {

2010                     String doquote, dontquote;

2011                     if (end == opaquePart.length()) {

2012                         dontquote = opaquePart;

2013                         doquote = "";

2014                     } else {

2015                         dontquote = opaquePart.substring(0,end+1);

2016                         doquote = opaquePart.substring(end+1);

2017                     }

2018                     sb.append (dontquote);

2019                     sb.append(quote(doquote, L_URIC, H_URIC));

2020                 }

2021             } else {

2022                 sb.append(quote(opaquePart, L_URIC, H_URIC));

2023             }

2024         } else {

2025             appendAuthority(sb, authority, userInfo, host, port);

2026             if (path != null)

2027                 sb.append(quote(path, L_PATH, H_PATH));

2028             if (query != null) {

2029                 sb.append('?');

2030                 sb.append(quote(query, L_URIC, H_URIC));

2031             }

2032         }

2033     }

2034 

2035     private void appendFragment(StringBuilder sb, String fragment) {

2036         if (fragment != null) {

2037             sb.append('#');

2038             sb.append(quote(fragment, L_URIC, H_URIC));

2039         }

2040     }

2041 

2042     private String toString(String scheme,

2043                             String opaquePart,

2044                             String authority,

2045                             String userInfo,

2046                             String host,

2047                             int port,

2048                             String path,

2049                             String query,

2050                             String fragment)

2051     {

2052         StringBuilder sb = new StringBuilder();

2053         if (scheme != null) {

2054             sb.append(scheme);

2055             sb.append(':');

2056         }

2057         appendSchemeSpecificPart(sb, opaquePart,

2058                                  authority, userInfo, host, port,

2059                                  path, query);

2060         appendFragment(sb, fragment);

2061         return sb.toString();

2062     }

2063 

2064     // -- Normalization, resolution, and relativization --

2065 

2066     // RFC2396 5.2 (6)

2067     private static String resolvePath(String base, String child,

2068                                       boolean absolute)

2069     {

2070         int i = base.lastIndexOf('/');

2071         int cn = child.length();

2072         String path = "";

2073 

2074         if (cn == 0) {

2075             // 5.2 (6a)

2076             if (i >= 0)

2077                 path = base.substring(0, i + 1);

2078         } else {

2079             StringBuilder sb = new StringBuilder(base.length() + cn);

2080             // 5.2 (6a)

2081             if (i >= 0)

2082                 sb.append(base, 0, i + 1);

2083             // 5.2 (6b)

2084             sb.append(child);

2085             path = sb.toString();

2086         }

2087 

2088         // 5.2 (6c-f)

2089         String np = normalize(path);

2090 

2091         // 5.2 (6g): If the result is absolute but the path begins with "../",

2092         // then we simply leave the path as-is

2093 

2094         return np;

2095     }

2096 

2097     // RFC2396 5.2

2098     private static URI resolve(URI base, URI child) {

2099         // check if child if opaque first so that NPE is thrown

2100         // if child is null.

2101         if (child.isOpaque() || base.isOpaque())

2102             return child;

2103 

2104         // 5.2 (2): Reference to current document (lone fragment)

2105         if ((child.scheme == null) && (child.authority == null)

2106             && child.path.isEmpty() && (child.fragment != null)

2107             && (child.query == null)) {

2108             if ((base.fragment != null)

2109                 && child.fragment.equals(base.fragment)) {

2110                 return base;

2111             }

2112             URI ru = new URI();

2113             ru.scheme = base.scheme;

2114             ru.authority = base.authority;

2115             ru.userInfo = base.userInfo;

2116             ru.host = base.host;

2117             ru.port = base.port;

2118             ru.path = base.path;

2119             ru.fragment = child.fragment;

2120             ru.query = base.query;

2121             return ru;

2122         }

2123 

2124         // 5.2 (3): Child is absolute

2125         if (child.scheme != null)

2126             return child;

2127 

2128         URI ru = new URI();             // Resolved URI

2129         ru.scheme = base.scheme;

2130         ru.query = child.query;

2131         ru.fragment = child.fragment;

2132 

2133         // 5.2 (4): Authority

2134         if (child.authority == null) {

2135             ru.authority = base.authority;

2136             ru.host = base.host;

2137             ru.userInfo = base.userInfo;

2138             ru.port = base.port;

2139 

2140             String cp = (child.path == null) ? "" : child.path;

2141             if (!cp.isEmpty() && cp.charAt(0) == '/') {

2142                 // 5.2 (5): Child path is absolute

2143                 ru.path = child.path;

2144             } else {

2145                 // 5.2 (6): Resolve relative path

2146                 ru.path = resolvePath(base.path, cp, base.isAbsolute());

2147             }

2148         } else {

2149             ru.authority = child.authority;

2150             ru.host = child.host;

2151             ru.userInfo = child.userInfo;

2152             ru.host = child.host;

2153             ru.port = child.port;

2154             ru.path = child.path;

2155         }

2156 

2157         // 5.2 (7): Recombine (nothing to do here)

2158         return ru;

2159     }

2160 

2161     // If the given URI's path is normal then return the URI;

2162     // o.w., return a new URI containing the normalized path.

2163     //

2164     private static URI normalize(URI u) {

2165         if (u.isOpaque() || u.path == null || u.path.isEmpty())

2166             return u;

2167 

2168         String np = normalize(u.path);

2169         if (np == u.path)

2170             return u;

2171 

2172         URI v = new URI();

2173         v.scheme = u.scheme;

2174         v.fragment = u.fragment;

2175         v.authority = u.authority;

2176         v.userInfo = u.userInfo;

2177         v.host = u.host;

2178         v.port = u.port;

2179         v.path = np;

2180         v.query = u.query;

2181         return v;

2182     }

2183 

2184     // If both URIs are hierarchical, their scheme and authority components are

2185     // identical, and the base path is a prefix of the child's path, then

2186     // return a relative URI that, when resolved against the base, yields the

2187     // child; otherwise, return the child.

2188     //

2189     private static URI relativize(URI base, URI child) {

2190         // check if child if opaque first so that NPE is thrown

2191         // if child is null.

2192         if (child.isOpaque() || base.isOpaque())

2193             return child;

2194         if (!equalIgnoringCase(base.scheme, child.scheme)

2195             || !equal(base.authority, child.authority))

2196             return child;

2197 

2198         String bp = normalize(base.path);

2199         String cp = normalize(child.path);

2200         if (!bp.equals(cp)) {

2201             if (!bp.endsWith("/"))

2202                 bp = bp + "/";

2203             if (!cp.startsWith(bp))

2204                 return child;

2205         }

2206 

2207         URI v = new URI();

2208         v.path = cp.substring(bp.length());

2209         v.query = child.query;

2210         v.fragment = child.fragment;

2211         return v;

2212     }

2213 

2214 

2215 

2216     // -- Path normalization --

2217 

2218     // The following algorithm for path normalization avoids the creation of a

2219     // string object for each segment, as well as the use of a string buffer to

2220     // compute the final result, by using a single char array and editing it in

2221     // place.  The array is first split into segments, replacing each slash

2222     // with '\0' and creating a segment-index array, each element of which is

2223     // the index of the first char in the corresponding segment.  We then walk

2224     // through both arrays, removing ".", "..", and other segments as necessary

2225     // by setting their entries in the index array to -1.  Finally, the two

2226     // arrays are used to rejoin the segments and compute the final result.

2227     //

2228     // This code is based upon src/solaris/native/java/io/canonicalize_md.c

2229 

2230 

2231     // Check the given path to see if it might need normalization.  A path

2232     // might need normalization if it contains duplicate slashes, a "."

2233     // segment, or a ".." segment.  Return -1 if no further normalization is

2234     // possible, otherwise return the number of segments found.

2235     //

2236     // This method takes a string argument rather than a char array so that

2237     // this test can be performed without invoking path.toCharArray().

2238     //

2239     private static int needsNormalization(String path) {

2240         boolean normal = true;

2241         int ns = 0;                     // Number of segments

2242         int end = path.length() - 1;    // Index of last char in path

2243         int p = 0;                      // Index of next char in path

2244 

2245         // Skip initial slashes

2246         while (p <= end) {

2247             if (path.charAt(p) != '/') break;

2248             p++;

2249         }

2250         if (p > 1) normal = false;

2251 

2252         // Scan segments

2253         while (p <= end) {

2254 

2255             // Looking at "." or ".." ?

2256             if ((path.charAt(p) == '.')

2257                 && ((p == end)

2258                     || ((path.charAt(p + 1) == '/')

2259                         || ((path.charAt(p + 1) == '.')

2260                             && ((p + 1 == end)

2261                                 || (path.charAt(p + 2) == '/')))))) {

2262                 normal = false;

2263             }

2264             ns++;

2265 

2266             // Find beginning of next segment

2267             while (p <= end) {

2268                 if (path.charAt(p++) != '/')

2269                     continue;

2270 

2271                 // Skip redundant slashes

2272                 while (p <= end) {

2273                     if (path.charAt(p) != '/') break;

2274                     normal = false;

2275                     p++;

2276                 }

2277 

2278                 break;

2279             }

2280         }

2281 

2282         return normal ? -1 : ns;

2283     }

2284 

2285 

2286     // Split the given path into segments, replacing slashes with nulls and

2287     // filling in the given segment-index array.

2288     //

2289     // Preconditions:

2290     //   segs.length == Number of segments in path

2291     //

2292     // Postconditions:

2293     //   All slashes in path replaced by '\0'

2294     //   segs[i] == Index of first char in segment i (0 <= i < segs.length)

2295     //

2296     private static void split(char[] path, int[] segs) {

2297         int end = path.length - 1;      // Index of last char in path

2298         int p = 0;                      // Index of next char in path

2299         int i = 0;                      // Index of current segment

2300 

2301         // Skip initial slashes

2302         while (p <= end) {

2303             if (path[p] != '/') break;

2304             path[p] = '\0';

2305             p++;

2306         }

2307 

2308         while (p <= end) {

2309 

2310             // Note start of segment

2311             segs[i++] = p++;

2312 

2313             // Find beginning of next segment

2314             while (p <= end) {

2315                 if (path[p++] != '/')

2316                     continue;

2317                 path[p - 1] = '\0';

2318 

2319                 // Skip redundant slashes

2320                 while (p <= end) {

2321                     if (path[p] != '/') break;

2322                     path[p++] = '\0';

2323                 }

2324                 break;

2325             }

2326         }

2327 

2328         if (i != segs.length)

2329             throw new InternalError();  // ASSERT

2330     }

2331 

2332 

2333     // Join the segments in the given path according to the given segment-index

2334     // array, ignoring those segments whose index entries have been set to -1,

2335     // and inserting slashes as needed.  Return the length of the resulting

2336     // path.

2337     //

2338     // Preconditions:

2339     //   segs[i] == -1 implies segment i is to be ignored

2340     //   path computed by split, as above, with '\0' having replaced '/'

2341     //

2342     // Postconditions:

2343     //   path[0] .. path[return value] == Resulting path

2344     //

2345     private static int join(char[] path, int[] segs) {

2346         int ns = segs.length;           // Number of segments

2347         int end = path.length - 1;      // Index of last char in path

2348         int p = 0;                      // Index of next path char to write

2349 

2350         if (path[p] == '\0') {

2351             // Restore initial slash for absolute paths

2352             path[p++] = '/';

2353         }

2354 

2355         for (int i = 0; i < ns; i++) {

2356             int q = segs[i];            // Current segment

2357             if (q == -1)

2358                 // Ignore this segment

2359                 continue;

2360 

2361             if (p == q) {

2362                 // We're already at this segment, so just skip to its end

2363                 while ((p <= end) && (path[p] != '\0'))

2364                     p++;

2365                 if (p <= end) {

2366                     // Preserve trailing slash

2367                     path[p++] = '/';

2368                 }

2369             } else if (p < q) {

2370                 // Copy q down to p

2371                 while ((q <= end) && (path[q] != '\0'))

2372                     path[p++] = path[q++];

2373                 if (q <= end) {

2374                     // Preserve trailing slash

2375                     path[p++] = '/';

2376                 }

2377             } else

2378                 throw new InternalError(); // ASSERT false

2379         }

2380 

2381         return p;

2382     }

2383 

2384 

2385     // Remove "." segments from the given path, and remove segment pairs

2386     // consisting of a non-".." segment followed by a ".." segment.

2387     //

2388     private static void removeDots(char[] path, int[] segs) {

2389         int ns = segs.length;

2390         int end = path.length - 1;

2391 

2392         for (int i = 0; i < ns; i++) {

2393             int dots = 0;               // Number of dots found (0, 1, or 2)

2394 

2395             // Find next occurrence of "." or ".."

2396             do {

2397                 int p = segs[i];

2398                 if (path[p] == '.') {

2399                     if (p == end) {

2400                         dots = 1;

2401                         break;

2402                     } else if (path[p + 1] == '\0') {

2403                         dots = 1;

2404                         break;

2405                     } else if ((path[p + 1] == '.')

2406                                && ((p + 1 == end)

2407                                    || (path[p + 2] == '\0'))) {

2408                         dots = 2;

2409                         break;

2410                     }

2411                 }

2412                 i++;

2413             } while (i < ns);

2414             if ((i > ns) || (dots == 0))

2415                 break;

2416 

2417             if (dots == 1) {

2418                 // Remove this occurrence of "."

2419                 segs[i] = -1;

2420             } else {

2421                 // If there is a preceding non-".." segment, remove both that

2422                 // segment and this occurrence of ".."; otherwise, leave this

2423                 // ".." segment as-is.

2424                 int j;

2425                 for (j = i - 1; j >= 0; j--) {

2426                     if (segs[j] != -1) break;

2427                 }

2428                 if (j >= 0) {

2429                     int q = segs[j];

2430                     if (!((path[q] == '.')

2431                           && (path[q + 1] == '.')

2432                           && (path[q + 2] == '\0'))) {

2433                         segs[i] = -1;

2434                         segs[j] = -1;

2435                     }

2436                 }

2437             }

2438         }

2439     }

2440 

2441 

2442     // DEVIATION: If the normalized path is relative, and if the first

2443     // segment could be parsed as a scheme name, then prepend a "." segment

2444     //

2445     private static void maybeAddLeadingDot(char[] path, int[] segs) {

2446 

2447         if (path[0] == '\0')

2448             // The path is absolute

2449             return;

2450 

2451         int ns = segs.length;

2452         int f = 0;                      // Index of first segment

2453         while (f < ns) {

2454             if (segs[f] >= 0)

2455                 break;

2456             f++;

2457         }

2458         if ((f >= ns) || (f == 0))

2459             // The path is empty, or else the original first segment survived,

2460             // in which case we already know that no leading "." is needed

2461             return;

2462 

2463         int p = segs[f];

2464         while ((p < path.length) && (path[p] != ':') && (path[p] != '\0')) p++;

2465         if (p >= path.length || path[p] == '\0')

2466             // No colon in first segment, so no "." needed

2467             return;

2468 

2469         // At this point we know that the first segment is unused,

2470         // hence we can insert a "." segment at that position

2471         path[0] = '.';

2472         path[1] = '\0';

2473         segs[0] = 0;

2474     }

2475 

2476 

2477     // Normalize the given path string.  A normal path string has no empty

2478     // segments (i.e., occurrences of "//"), no segments equal to ".", and no

2479     // segments equal to ".." that are preceded by a segment not equal to "..".

2480     // In contrast to Unix-style pathname normalization, for URI paths we

2481     // always retain trailing slashes.

2482     //

2483     private static String normalize(String ps) {

2484 

2485         // Does this path need normalization?

2486         int ns = needsNormalization(ps);        // Number of segments

2487         if (ns < 0)

2488             // Nope -- just return it

2489             return ps;

2490 

2491         char[] path = ps.toCharArray();         // Path in char-array form

2492 

2493         // Split path into segments

2494         int[] segs = new int[ns];               // Segment-index array

2495         split(path, segs);

2496 

2497         // Remove dots

2498         removeDots(path, segs);

2499 

2500         // Prevent scheme-name confusion

2501         maybeAddLeadingDot(path, segs);

2502 

2503         // Join the remaining segments and return the result

2504         String s = new String(path, 0, join(path, segs));

2505         if (s.equals(ps)) {

2506             // string was already normalized

2507             return ps;

2508         }

2509         return s;

2510     }

2511 

2512 

2513 

2514     // -- Character classes for parsing --

2515 

2516     // RFC2396 precisely specifies which characters in the US-ASCII charset are

2517     // permissible in the various components of a URI reference.  We here

2518     // define a set of mask pairs to aid in enforcing these restrictions.  Each

2519     // mask pair consists of two longs, a low mask and a high mask.  Taken

2520     // together they represent a 128-bit mask, where bit i is set iff the

2521     // character with value i is permitted.

2522     //

2523     // This approach is more efficient than sequentially searching arrays of

2524     // permitted characters.  It could be made still more efficient by

2525     // precompiling the mask information so that a character's presence in a

2526     // given mask could be determined by a single table lookup.

2527 

2528     // To save startup time, we manually calculate the low-/highMask constants.

2529     // For reference, the following methods were used to calculate the values:

2530 

2531     // Compute the low-order mask for the characters in the given string

2532     //     private static long lowMask(String chars) {

2533     //        int n = chars.length();

2534     //        long m = 0;

2535     //        for (int i = 0; i < n; i++) {

2536     //            char c = chars.charAt(i);

2537     //            if (c < 64)

2538     //                m |= (1L << c);

2539     //        }

2540     //        return m;

2541     //    }

2542 

2543     // Compute the high-order mask for the characters in the given string

2544     //    private static long highMask(String chars) {

2545     //        int n = chars.length();

2546     //        long m = 0;

2547     //        for (int i = 0; i < n; i++) {

2548     //            char c = chars.charAt(i);

2549     //            if ((c >= 64) && (c < 128))

2550     //                m |= (1L << (c - 64));

2551     //        }

2552     //        return m;

2553     //    }

2554 

2555     // Compute a low-order mask for the characters

2556     // between first and last, inclusive

2557     //    private static long lowMask(char first, char last) {

2558     //        long m = 0;

2559     //        int f = Math.max(Math.min(first, 63), 0);

2560     //        int l = Math.max(Math.min(last, 63), 0);

2561     //        for (int i = f; i <= l; i++)

2562     //            m |= 1L << i;

2563     //        return m;

2564     //    }

2565 

2566     // Compute a high-order mask for the characters

2567     // between first and last, inclusive

2568     //    private static long highMask(char first, char last) {

2569     //        long m = 0;

2570     //        int f = Math.max(Math.min(first, 127), 64) - 64;

2571     //        int l = Math.max(Math.min(last, 127), 64) - 64;

2572     //        for (int i = f; i <= l; i++)

2573     //            m |= 1L << i;

2574     //        return m;

2575     //    }

2576 

2577     // Tell whether the given character is permitted by the given mask pair

2578     private static boolean match(char c, long lowMask, long highMask) {

2579         if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.

2580             return false;

2581         if (c < 64)

2582             return ((1L << c) & lowMask) != 0;

2583         if (c < 128)

2584             return ((1L << (c - 64)) & highMask) != 0;

2585         return false;

2586     }

2587 

2588     // Character-class masks, in reverse order from RFC2396 because

2589     // initializers for static fields cannot make forward references.

2590 

2591     // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |

2592     //            "8" | "9"

2593     private static final long L_DIGIT = 0x3FF000000000000L; // lowMask('0', '9');

2594     private static final long H_DIGIT = 0L;

2595 

2596     // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |

2597     //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |

2598     //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"

2599     private static final long L_UPALPHA = 0L;

2600     private static final long H_UPALPHA = 0x7FFFFFEL; // highMask('A', 'Z');

2601 

2602     // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |

2603     //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |

2604     //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"

2605     private static final long L_LOWALPHA = 0L;

2606     private static final long H_LOWALPHA = 0x7FFFFFE00000000L; // highMask('a', 'z');

2607 

2608     // alpha         = lowalpha | upalpha

2609     private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;

2610     private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;

2611 

2612     // alphanum      = alpha | digit

2613     private static final long L_ALPHANUM = L_DIGIT | L_ALPHA;

2614     private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;

2615 

2616     // hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |

2617     //                         "a" | "b" | "c" | "d" | "e" | "f"

2618     private static final long L_HEX = L_DIGIT;

2619     private static final long H_HEX = 0x7E0000007EL; // highMask('A', 'F') | highMask('a', 'f');

2620 

2621     // mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |

2622     //                 "(" | ")"

2623     private static final long L_MARK = 0x678200000000L; // lowMask("-_.!~*'()");

2624     private static final long H_MARK = 0x4000000080000000L; // highMask("-_.!~*'()");

2625 

2626     // unreserved    = alphanum | mark

2627     private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;

2628     private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;

2629 

2630     // reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |

2631     //                 "$" | "," | "[" | "]"

2632     // Added per RFC2732: "[", "]"

2633     private static final long L_RESERVED = 0xAC00985000000000L; // lowMask(";/?:@&=+$,[]");

2634     private static final long H_RESERVED = 0x28000001L; // highMask(";/?:@&=+$,[]");

2635 

2636     // The zero'th bit is used to indicate that escape pairs and non-US-ASCII

2637     // characters are allowed; this is handled by the scanEscape method below.

2638     private static final long L_ESCAPED = 1L;

2639     private static final long H_ESCAPED = 0L;

2640 

2641     // uric          = reserved | unreserved | escaped

2642     private static final long L_URIC = L_RESERVED | L_UNRESERVED | L_ESCAPED;

2643     private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED;

2644 

2645     // pchar         = unreserved | escaped |

2646     //                 ":" | "@" | "&" | "=" | "+" | "$" | ","

2647     private static final long L_PCHAR

2648         = L_UNRESERVED | L_ESCAPED | 0x2400185000000000L; // lowMask(":@&=+$,");

2649     private static final long H_PCHAR

2650         = H_UNRESERVED | H_ESCAPED | 0x1L; // highMask(":@&=+$,");

2651 

2652     // All valid path characters

2653     private static final long L_PATH = L_PCHAR | 0x800800000000000L; // lowMask(";/");

2654     private static final long H_PATH = H_PCHAR; // highMask(";/") == 0x0L;

2655 

2656     // Dash, for use in domainlabel and toplabel

2657     private static final long L_DASH = 0x200000000000L; // lowMask("-");

2658     private static final long H_DASH = 0x0L; // highMask("-");

2659 

2660     // Dot, for use in hostnames

2661     private static final long L_DOT = 0x400000000000L; // lowMask(".");

2662     private static final long H_DOT = 0x0L; // highMask(".");

2663 

2664     // userinfo      = *( unreserved | escaped |

2665     //                    ";" | ":" | "&" | "=" | "+" | "$" | "," )

2666     private static final long L_USERINFO

2667         = L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask(";:&=+$,");

2668     private static final long H_USERINFO

2669         = H_UNRESERVED | H_ESCAPED; // | highMask(";:&=+$,") == 0L;

2670 

2671     // reg_name      = 1*( unreserved | escaped | "$" | "," |

2672     //                     ";" | ":" | "@" | "&" | "=" | "+" )

2673     private static final long L_REG_NAME

2674         = L_UNRESERVED | L_ESCAPED | 0x2C00185000000000L; // lowMask("$,;:@&=+");

2675     private static final long H_REG_NAME

2676         = H_UNRESERVED | H_ESCAPED | 0x1L; // highMask("$,;:@&=+");

2677 

2678     // All valid characters for server-based authorities

2679     private static final long L_SERVER

2680         = L_USERINFO | L_ALPHANUM | L_DASH | 0x400400000000000L; // lowMask(".:@[]");

2681     private static final long H_SERVER

2682         = H_USERINFO | H_ALPHANUM | H_DASH | 0x28000001L; // highMask(".:@[]");

2683 

2684     // Special case of server authority that represents an IPv6 address

2685     // In this case, a % does not signify an escape sequence

2686     private static final long L_SERVER_PERCENT

2687         = L_SERVER | 0x2000000000L; // lowMask("%");

2688     private static final long H_SERVER_PERCENT

2689         = H_SERVER; // | highMask("%") == 0L;

2690 

2691     // scheme        = alpha *( alpha | digit | "+" | "-" | "." )

2692     private static final long L_SCHEME = L_ALPHA | L_DIGIT | 0x680000000000L; // lowMask("+-.");

2693     private static final long H_SCHEME = H_ALPHA | H_DIGIT; // | highMask("+-.") == 0L

2694 

2695     // scope_id = alpha | digit | "_" | "."

2696     private static final long L_SCOPE_ID

2697         = L_ALPHANUM | 0x400000000000L; // lowMask("_.");

2698     private static final long H_SCOPE_ID

2699         = H_ALPHANUM | 0x80000000L; // highMask("_.");

2700 

2701     // -- Escaping and encoding --

2702 

2703     private static final char[] hexDigits = {

2704         '0', '1', '2', '3', '4', '5', '6', '7',

2705         '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'

2706     };

2707 

2708     private static void appendEscape(StringBuilder sb, byte b) {

2709         sb.append('%');

2710         sb.append(hexDigits[(b >> 4) & 0x0f]);

2711         sb.append(hexDigits[(b >> 0) & 0x0f]);

2712     }

2713 

2714     private static void appendEncoded(StringBuilder sb, char c) {

2715         ByteBuffer bb = null;

2716         try {

2717             bb = ThreadLocalCoders.encoderFor("UTF-8")

2718                 .encode(CharBuffer.wrap("" + c));

2719         } catch (CharacterCodingException x) {

2720             assert false;

2721         }

2722         while (bb.hasRemaining()) {

2723             int b = bb.get() & 0xff;

2724             if (b >= 0x80)

2725                 appendEscape(sb, (byte)b);

2726             else

2727                 sb.append((char)b);

2728         }

2729     }

2730 

2731     // Quote any characters in s that are not permitted

2732     // by the given mask pair

2733     //

2734     private static String quote(String s, long lowMask, long highMask) {

2735         StringBuilder sb = null;

2736         boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);

2737         for (int i = 0; i < s.length(); i++) {

2738             char c = s.charAt(i);

2739             if (c < '\u0080') {

2740                 if (!match(c, lowMask, highMask)) {

2741                     if (sb == null) {

2742                         sb = new StringBuilder();

2743                         sb.append(s, 0, i);

2744                     }

2745                     appendEscape(sb, (byte)c);

2746                 } else {

2747                     if (sb != null)

2748                         sb.append(c);

2749                 }

2750             } else if (allowNonASCII

2751                        && (Character.isSpaceChar(c)

2752                            || Character.isISOControl(c))) {

2753                 if (sb == null) {

2754                     sb = new StringBuilder();

2755                     sb.append(s, 0, i);

2756                 }

2757                 appendEncoded(sb, c);

2758             } else {

2759                 if (sb != null)

2760                     sb.append(c);

2761             }

2762         }

2763         return (sb == null) ? s : sb.toString();

2764     }

2765 

2766     // Encodes all characters >= \u0080 into escaped, normalized UTF-8 octets,

2767     // assuming that s is otherwise legal

2768     //

2769     private static String encode(String s) {

2770         int n = s.length();

2771         if (n == 0)

2772             return s;

2773 

2774         // First check whether we actually need to encode

2775         for (int i = 0;;) {

2776             if (s.charAt(i) >= '\u0080')

2777                 break;

2778             if (++i >= n)

2779                 return s;

2780         }

2781 

2782         String ns = Normalizer.normalize(s, Normalizer.Form.NFC);

2783         ByteBuffer bb = null;

2784         try {

2785             bb = ThreadLocalCoders.encoderFor("UTF-8")

2786                 .encode(CharBuffer.wrap(ns));

2787         } catch (CharacterCodingException x) {

2788             assert false;

2789         }

2790 

2791         StringBuilder sb = new StringBuilder();

2792         while (bb.hasRemaining()) {

2793             int b = bb.get() & 0xff;

2794             if (b >= 0x80)

2795                 appendEscape(sb, (byte)b);

2796             else

2797                 sb.append((char)b);

2798         }

2799         return sb.toString();

2800     }

2801 

2802     private static int decode(char c) {

2803         if ((c >= '0') && (c <= '9'))

2804             return c - '0';

2805         if ((c >= 'a') && (c <= 'f'))

2806             return c - 'a' + 10;

2807         if ((c >= 'A') && (c <= 'F'))

2808             return c - 'A' + 10;

2809         assert false;

2810         return -1;

2811     }

2812 

2813     private static byte decode(char c1, char c2) {

2814         return (byte)(  ((decode(c1) & 0xf) << 4)

2815                       | ((decode(c2) & 0xf) << 0));

2816     }

2817 

2818     // Evaluates all escapes in s, applying UTF-8 decoding if needed.  Assumes

2819     // that escapes are well-formed syntactically, i.e., of the form %XX.  If a

2820     // sequence of escaped octets is not valid UTF-8 then the erroneous octets

2821     // are replaced with '\uFFFD'.

2822     // Exception: any "%" found between "[]" is left alone. It is an IPv6 literal

2823     //            with a scope_id

2824     //

2825     private static String decode(String s) {

2826         return decode(s, true);

2827     }

2828 

2829     // This method was introduced as a generalization of URI.decode method

2830     // to provide a fix for JDK-8037396

2831     private static String decode(String s, boolean ignorePercentInBrackets) {

2832         if (s == null)

2833             return s;

2834         int n = s.length();

2835         if (n == 0)

2836             return s;

2837         if (s.indexOf('%') < 0)

2838             return s;

2839 

2840         StringBuilder sb = new StringBuilder(n);

2841         ByteBuffer bb = ByteBuffer.allocate(n);

2842         CharBuffer cb = CharBuffer.allocate(n);

2843         CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8")

2844                 .onMalformedInput(CodingErrorAction.REPLACE)

2845                 .onUnmappableCharacter(CodingErrorAction.REPLACE);

2846 

2847         // This is not horribly efficient, but it will do for now

2848         char c = s.charAt(0);

2849         boolean betweenBrackets = false;

2850 

2851         for (int i = 0; i < n;) {

2852             assert c == s.charAt(i);    // Loop invariant

2853             if (c == '[') {

2854                 betweenBrackets = true;

2855             } else if (betweenBrackets && c == ']') {

2856                 betweenBrackets = false;

2857             }

2858             if (c != '%' || (betweenBrackets && ignorePercentInBrackets)) {

2859                 sb.append(c);

2860                 if (++i >= n)

2861                     break;

2862                 c = s.charAt(i);

2863                 continue;

2864             }

2865             bb.clear();

2866             int ui = i;

2867             for (;;) {

2868                 assert (n - i >= 2);

2869                 bb.put(decode(s.charAt(++i), s.charAt(++i)));

2870                 if (++i >= n)

2871                     break;

2872                 c = s.charAt(i);

2873                 if (c != '%')

2874                     break;

2875             }

2876             bb.flip();

2877             cb.clear();

2878             dec.reset();

2879             CoderResult cr = dec.decode(bb, cb, true);

2880             assert cr.isUnderflow();

2881             cr = dec.flush(cb);

2882             assert cr.isUnderflow();

2883             sb.append(cb.flip().toString());

2884         }

2885 

2886         return sb.toString();

2887     }

2888 

2889 

2890     // -- Parsing --

2891 

2892     // For convenience we wrap the input URI string in a new instance of the

2893     // following internal class.  This saves always having to pass the input

2894     // string as an argument to each internal scan/parse method.

2895 

2896     private class Parser {

2897 

2898         private String input;           // URI input string

2899         private boolean requireServerAuthority = false;

2900 

2901         Parser(String s) {

2902             input = s;

2903             string = s;

2904         }

2905 

2906         // -- Methods for throwing URISyntaxException in various ways --

2907 

2908         private void fail(String reason) throws URISyntaxException {

2909             throw new URISyntaxException(input, reason);

2910         }

2911 

2912         private void fail(String reason, int p) throws URISyntaxException {

2913             throw new URISyntaxException(input, reason, p);

2914         }

2915 

2916         private void failExpecting(String expected, int p)

2917             throws URISyntaxException

2918         {

2919             fail("Expected " + expected, p);

2920         }

2921 

2922 

2923         // -- Simple access to the input string --

2924 

2925         // Tells whether start < end and, if so, whether charAt(start) == c

2926         //

2927         private boolean at(int start, int end, char c) {

2928             return (start < end) && (input.charAt(start) == c);

2929         }

2930 

2931         // Tells whether start + s.length() < end and, if so,

2932         // whether the chars at the start position match s exactly

2933         //

2934         private boolean at(int start, int end, String s) {

2935             int p = start;

2936             int sn = s.length();

2937             if (sn > end - p)

2938                 return false;

2939             int i = 0;

2940             while (i < sn) {

2941                 if (input.charAt(p++) != s.charAt(i)) {

2942                     break;

2943                 }

2944                 i++;

2945             }

2946             return (i == sn);

2947         }

2948 

2949 

2950         // -- Scanning --

2951 

2952         // The various scan and parse methods that follow use a uniform

2953         // convention of taking the current start position and end index as

2954         // their first two arguments.  The start is inclusive while the end is

2955         // exclusive, just as in the String class, i.e., a start/end pair

2956         // denotes the left-open interval [start, end) of the input string.

2957         //

2958         // These methods never proceed past the end position.  They may return

2959         // -1 to indicate outright failure, but more often they simply return

2960         // the position of the first char after the last char scanned.  Thus

2961         // a typical idiom is

2962         //

2963         //     int p = start;

2964         //     int q = scan(p, end, ...);

2965         //     if (q > p)

2966         //         // We scanned something

2967         //         ...;

2968         //     else if (q == p)

2969         //         // We scanned nothing

2970         //         ...;

2971         //     else if (q == -1)

2972         //         // Something went wrong

2973         //         ...;

2974 

2975 

2976         // Scan a specific char: If the char at the given start position is

2977         // equal to c, return the index of the next char; otherwise, return the

2978         // start position.

2979         //

2980         private int scan(int start, int end, char c) {

2981             if ((start < end) && (input.charAt(start) == c))

2982                 return start + 1;

2983             return start;

2984         }

2985 

2986         // Scan forward from the given start position.  Stop at the first char

2987         // in the err string (in which case -1 is returned), or the first char

2988         // in the stop string (in which case the index of the preceding char is

2989         // returned), or the end of the input string (in which case the length

2990         // of the input string is returned).  May return the start position if

2991         // nothing matches.

2992         //

2993         private int scan(int start, int end, String err, String stop) {

2994             int p = start;

2995             while (p < end) {

2996                 char c = input.charAt(p);

2997                 if (err.indexOf(c) >= 0)

2998                     return -1;

2999                 if (stop.indexOf(c) >= 0)

3000                     break;

3001                 p++;

3002             }

3003             return p;

3004         }

3005 

3006         // Scan forward from the given start position.  Stop at the first char

3007         // in the stop string (in which case the index of the preceding char is

3008         // returned), or the end of the input string (in which case the length

3009         // of the input string is returned).  May return the start position if

3010         // nothing matches.

3011         //

3012         private int scan(int start, int end, String stop) {

3013             int p = start;

3014             while (p < end) {

3015                 char c = input.charAt(p);

3016                 if (stop.indexOf(c) >= 0)

3017                     break;

3018                 p++;

3019             }

3020             return p;

3021         }

3022 

3023         // Scan a potential escape sequence, starting at the given position,

3024         // with the given first char (i.e., charAt(start) == c).

3025         //

3026         // This method assumes that if escapes are allowed then visible

3027         // non-US-ASCII chars are also allowed.

3028         //

3029         private int scanEscape(int start, int n, char first)

3030             throws URISyntaxException

3031         {

3032             int p = start;

3033             char c = first;

3034             if (c == '%') {

3035                 // Process escape pair

3036                 if ((p + 3 <= n)

3037                     && match(input.charAt(p + 1), L_HEX, H_HEX)

3038                     && match(input.charAt(p + 2), L_HEX, H_HEX)) {

3039                     return p + 3;

3040                 }

3041                 fail("Malformed escape pair", p);

3042             } else if ((c > 128)

3043                        && !Character.isSpaceChar(c)

3044                        && !Character.isISOControl(c)) {

3045                 // Allow unescaped but visible non-US-ASCII chars

3046                 return p + 1;

3047             }

3048             return p;

3049         }

3050 

3051         // Scan chars that match the given mask pair

3052         //

3053         private int scan(int start, int n, long lowMask, long highMask)

3054             throws URISyntaxException

3055         {

3056             int p = start;

3057             while (p < n) {

3058                 char c = input.charAt(p);

3059                 if (match(c, lowMask, highMask)) {

3060                     p++;

3061                     continue;

3062                 }

3063                 if ((lowMask & L_ESCAPED) != 0) {

3064                     int q = scanEscape(p, n, c);

3065                     if (q > p) {

3066                         p = q;

3067                         continue;

3068                     }

3069                 }

3070                 break;

3071             }

3072             return p;

3073         }

3074 

3075         // Check that each of the chars in [start, end) matches the given mask

3076         //

3077         private void checkChars(int start, int end,

3078                                 long lowMask, long highMask,

3079                                 String what)

3080             throws URISyntaxException

3081         {

3082             int p = scan(start, end, lowMask, highMask);

3083             if (p < end)

3084                 fail("Illegal character in " + what, p);

3085         }

3086 

3087         // Check that the char at position p matches the given mask

3088         //

3089         private void checkChar(int p,

3090                                long lowMask, long highMask,

3091                                String what)

3092             throws URISyntaxException

3093         {

3094             checkChars(p, p + 1, lowMask, highMask, what);

3095         }

3096 

3097 

3098         // -- Parsing --

3099 

3100         // [<scheme>:]<scheme-specific-part>[#<fragment>]

3101         //

3102         void parse(boolean rsa) throws URISyntaxException {

3103             requireServerAuthority = rsa;

3104             int n = input.length();

3105             int p = scan(0, n, "/?#", ":");

3106             if ((p >= 0) && at(p, n, ':')) {

3107                 if (p == 0)

3108                     failExpecting("scheme name", 0);

3109                 checkChar(0, L_ALPHA, H_ALPHA, "scheme name");

3110                 checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");

3111                 scheme = input.substring(0, p);

3112                 p++;                    // Skip ':'

3113                 if (at(p, n, '/')) {

3114                     p = parseHierarchical(p, n);

3115                 } else {

3116                     // opaque; need to create the schemeSpecificPart

3117                     int q = scan(p, n, "#");

3118                     if (q <= p)

3119                         failExpecting("scheme-specific part", p);

3120                     checkChars(p, q, L_URIC, H_URIC, "opaque part");

3121                     schemeSpecificPart = input.substring(p, q);

3122                     p = q;

3123                 }

3124             } else {

3125                 p = parseHierarchical(0, n);

3126             }

3127             if (at(p, n, '#')) {

3128                 checkChars(p + 1, n, L_URIC, H_URIC, "fragment");

3129                 fragment = input.substring(p + 1, n);

3130                 p = n;

3131             }

3132             if (p < n)

3133                 fail("end of URI", p);

3134         }

3135 

3136         // [//authority]<path>[?<query>]

3137         //

3138         // DEVIATION from RFC2396: We allow an empty authority component as

3139         // long as it's followed by a non-empty path, query component, or

3140         // fragment component.  This is so that URIs such as "file:///foo/bar"

3141         // will parse.  This seems to be the intent of RFC2396, though the

3142         // grammar does not permit it.  If the authority is empty then the

3143         // userInfo, host, and port components are undefined.

3144         //

3145         // DEVIATION from RFC2396: We allow empty relative paths.  This seems

3146         // to be the intent of RFC2396, but the grammar does not permit it.

3147         // The primary consequence of this deviation is that "#f" parses as a

3148         // relative URI with an empty path.

3149         //

3150         private int parseHierarchical(int start, int n)

3151             throws URISyntaxException

3152         {

3153             int p = start;

3154             if (at(p, n, '/') && at(p + 1, n, '/')) {

3155                 p += 2;

3156                 int q = scan(p, n, "/?#");

3157                 if (q > p) {

3158                     p = parseAuthority(p, q);

3159                 } else if (q < n) {

3160                     // DEVIATION: Allow empty authority prior to non-empty

3161                     // path, query component or fragment identifier

3162                 } else

3163                     failExpecting("authority", p);

3164             }

3165             int q = scan(p, n, "?#"); // DEVIATION: May be empty

3166             checkChars(p, q, L_PATH, H_PATH, "path");

3167             path = input.substring(p, q);

3168             p = q;

3169             if (at(p, n, '?')) {

3170                 p++;

3171                 q = scan(p, n, "#");

3172                 checkChars(p, q, L_URIC, H_URIC, "query");

3173                 query = input.substring(p, q);

3174                 p = q;

3175             }

3176             return p;

3177         }

3178 

3179         // authority     = server | reg_name

3180         //

3181         // Ambiguity: An authority that is a registry name rather than a server

3182         // might have a prefix that parses as a server.  We use the fact that

3183         // the authority component is always followed by '/' or the end of the

3184         // input string to resolve this: If the complete authority did not

3185         // parse as a server then we try to parse it as a registry name.

3186         //

3187         private int parseAuthority(int start, int n)

3188             throws URISyntaxException

3189         {

3190             int p = start;

3191             int q = p;

3192             URISyntaxException ex = null;

3193 

3194             boolean serverChars;

3195             boolean regChars;

3196 

3197             if (scan(p, n, "]") > p) {

3198                 // contains a literal IPv6 address, therefore % is allowed

3199                 serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);

3200             } else {

3201                 serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);

3202             }

3203             regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);

3204 

3205             if (regChars && !serverChars) {

3206                 // Must be a registry-based authority

3207                 authority = input.substring(p, n);

3208                 return n;

3209             }

3210 

3211             if (serverChars) {

3212                 // Might be (probably is) a server-based authority, so attempt

3213                 // to parse it as such.  If the attempt fails, try to treat it

3214                 // as a registry-based authority.

3215                 try {

3216                     q = parseServer(p, n);

3217                     if (q < n)

3218                         failExpecting("end of authority", q);

3219                     authority = input.substring(p, n);

3220                 } catch (URISyntaxException x) {

3221                     // Undo results of failed parse

3222                     userInfo = null;

3223                     host = null;

3224                     port = -1;

3225                     if (requireServerAuthority) {

3226                         // If we're insisting upon a server-based authority,

3227                         // then just re-throw the exception

3228                         throw x;

3229                     } else {

3230                         // Save the exception in case it doesn't parse as a

3231                         // registry either

3232                         ex = x;

3233                         q = p;

3234                     }

3235                 }

3236             }

3237 

3238             if (q < n) {

3239                 if (regChars) {

3240                     // Registry-based authority

3241                     authority = input.substring(p, n);

3242                 } else if (ex != null) {

3243                     // Re-throw exception; it was probably due to

3244                     // a malformed IPv6 address

3245                     throw ex;

3246                 } else {

3247                     fail("Illegal character in authority", q);

3248                 }

3249             }

3250 

3251             return n;

3252         }

3253 

3254 

3255         // [<userinfo>@]<host>[:<port>]

3256         //

3257         private int parseServer(int start, int n)

3258             throws URISyntaxException

3259         {

3260             int p = start;

3261             int q;

3262 

3263             // userinfo

3264             q = scan(p, n, "/?#", "@");

3265             if ((q >= p) && at(q, n, '@')) {

3266                 checkChars(p, q, L_USERINFO, H_USERINFO, "user info");

3267                 userInfo = input.substring(p, q);

3268                 p = q + 1;              // Skip '@'

3269             }

3270 

3271             // hostname, IPv4 address, or IPv6 address

3272             if (at(p, n, '[')) {

3273                 // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732

3274                 p++;

3275                 q = scan(p, n, "/?#", "]");

3276                 if ((q > p) && at(q, n, ']')) {

3277                     // look for a "%" scope id

3278                     int r = scan (p, q, "%");

3279                     if (r > p) {

3280                         parseIPv6Reference(p, r);

3281                         if (r+1 == q) {

3282                             fail ("scope id expected");

3283                         }

3284                         checkChars (r+1, q, L_SCOPE_ID, H_SCOPE_ID,

3285                                                 "scope id");

3286                     } else {

3287                         parseIPv6Reference(p, q);

3288                     }

3289                     host = input.substring(p-1, q+1);

3290                     p = q + 1;

3291                 } else {

3292                     failExpecting("closing bracket for IPv6 address", q);

3293                 }

3294             } else {

3295                 q = parseIPv4Address(p, n);

3296                 if (q <= p)

3297                     q = parseHostname(p, n);

3298                 p = q;

3299             }

3300 

3301             // port

3302             if (at(p, n, ':')) {

3303                 p++;

3304                 q = scan(p, n, "/");

3305                 if (q > p) {

3306                     checkChars(p, q, L_DIGIT, H_DIGIT, "port number");

3307                     try {

3308                         port = Integer.parseInt(input, p, q, 10);

3309                     } catch (NumberFormatException x) {

3310                         fail("Malformed port number", p);

3311                     }

3312                     p = q;

3313                 }

3314             }

3315             if (p < n)

3316                 failExpecting("port number", p);

3317 

3318             return p;

3319         }

3320 

3321         // Scan a string of decimal digits whose value fits in a byte

3322         //

3323         private int scanByte(int start, int n)

3324             throws URISyntaxException

3325         {

3326             int p = start;

3327             int q = scan(p, n, L_DIGIT, H_DIGIT);

3328             if (q <= p) return q;

3329             if (Integer.parseInt(input, p, q, 10) > 255) return p;

3330             return q;

3331         }

3332 

3333         // Scan an IPv4 address.

3334         //

3335         // If the strict argument is true then we require that the given

3336         // interval contain nothing besides an IPv4 address; if it is false

3337         // then we only require that it start with an IPv4 address.

3338         //

3339         // If the interval does not contain or start with (depending upon the

3340         // strict argument) a legal IPv4 address characters then we return -1

3341         // immediately; otherwise we insist that these characters parse as a

3342         // legal IPv4 address and throw an exception on failure.

3343         //

3344         // We assume that any string of decimal digits and dots must be an IPv4

3345         // address.  It won't parse as a hostname anyway, so making that

3346         // assumption here allows more meaningful exceptions to be thrown.

3347         //

3348         private int scanIPv4Address(int start, int n, boolean strict)

3349             throws URISyntaxException

3350         {

3351             int p = start;

3352             int q;

3353             int m = scan(p, n, L_DIGIT | L_DOT, H_DIGIT | H_DOT);

3354             if ((m <= p) || (strict && (m != n)))

3355                 return -1;

3356             for (;;) {

3357                 // Per RFC2732: At most three digits per byte

3358                 // Further constraint: Each element fits in a byte

3359                 if ((q = scanByte(p, m)) <= p) break;   p = q;

3360                 if ((q = scan(p, m, '.')) <= p) break;  p = q;

3361                 if ((q = scanByte(p, m)) <= p) break;   p = q;

3362                 if ((q = scan(p, m, '.')) <= p) break;  p = q;

3363                 if ((q = scanByte(p, m)) <= p) break;   p = q;

3364                 if ((q = scan(p, m, '.')) <= p) break;  p = q;

3365                 if ((q = scanByte(p, m)) <= p) break;   p = q;

3366                 if (q < m) break;

3367                 return q;

3368             }

3369             fail("Malformed IPv4 address", q);

3370             return -1;

3371         }

3372 

3373         // Take an IPv4 address: Throw an exception if the given interval

3374         // contains anything except an IPv4 address

3375         //

3376         private int takeIPv4Address(int start, int n, String expected)

3377             throws URISyntaxException

3378         {

3379             int p = scanIPv4Address(start, n, true);

3380             if (p <= start)

3381                 failExpecting(expected, start);

3382             return p;

3383         }

3384 

3385         // Attempt to parse an IPv4 address, returning -1 on failure but

3386         // allowing the given interval to contain [:<characters>] after

3387         // the IPv4 address.

3388         //

3389         private int parseIPv4Address(int start, int n) {

3390             int p;

3391 

3392             try {

3393                 p = scanIPv4Address(start, n, false);

3394             } catch (URISyntaxException x) {

3395                 return -1;

3396             } catch (NumberFormatException nfe) {

3397                 return -1;

3398             }

3399 

3400             if (p > start && p < n) {

3401                 // IPv4 address is followed by something - check that

3402                 // it's a ":" as this is the only valid character to

3403                 // follow an address.

3404                 if (input.charAt(p) != ':') {

3405                     p = -1;

3406                 }

3407             }

3408 

3409             if (p > start)

3410                 host = input.substring(start, p);

3411 

3412             return p;

3413         }

3414 

3415         // hostname      = domainlabel [ "." ] | 1*( domainlabel "." ) toplabel [ "." ]

3416         // domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum

3417         // toplabel      = alpha | alpha *( alphanum | "-" ) alphanum

3418         //

3419         private int parseHostname(int start, int n)

3420             throws URISyntaxException

3421         {

3422             int p = start;

3423             int q;

3424             int l = -1;                 // Start of last parsed label

3425 

3426             do {

3427                 // domainlabel = alphanum [ *( alphanum | "-" ) alphanum ]

3428                 q = scan(p, n, L_ALPHANUM, H_ALPHANUM);

3429                 if (q <= p)

3430                     break;

3431                 l = p;

3432                 if (q > p) {

3433                     p = q;

3434                     q = scan(p, n, L_ALPHANUM | L_DASH, H_ALPHANUM | H_DASH);

3435                     if (q > p) {

3436                         if (input.charAt(q - 1) == '-')

3437                             fail("Illegal character in hostname", q - 1);

3438                         p = q;

3439                     }

3440                 }

3441                 q = scan(p, n, '.');

3442                 if (q <= p)

3443                     break;

3444                 p = q;

3445             } while (p < n);

3446 

3447             if ((p < n) && !at(p, n, ':'))

3448                 fail("Illegal character in hostname", p);

3449 

3450             if (l < 0)

3451                 failExpecting("hostname", start);

3452 

3453             // for a fully qualified hostname check that the rightmost

3454             // label starts with an alpha character.

3455             if (l > start && !match(input.charAt(l), L_ALPHA, H_ALPHA)) {

3456                 fail("Illegal character in hostname", l);

3457             }

3458 

3459             host = input.substring(start, p);

3460             return p;

3461         }

3462 

3463 

3464         // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture

3465         //

3466         // Bug: The grammar in RFC2373 Appendix B does not allow addresses of

3467         // the form ::12.34.56.78, which are clearly shown in the examples

3468         // earlier in the document.  Here is the original grammar:

3469         //

3470         //   IPv6address = hexpart [ ":" IPv4address ]

3471         //   hexpart     = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]

3472         //   hexseq      = hex4 *( ":" hex4)

3473         //   hex4        = 1*4HEXDIG

3474         //

3475         // We therefore use the following revised grammar:

3476         //

3477         //   IPv6address = hexseq [ ":" IPv4address ]

3478         //                 | hexseq [ "::" [ hexpost ] ]

3479         //                 | "::" [ hexpost ]

3480         //   hexpost     = hexseq | hexseq ":" IPv4address | IPv4address

3481         //   hexseq      = hex4 *( ":" hex4)

3482         //   hex4        = 1*4HEXDIG

3483         //

3484         // This covers all and only the following cases:

3485         //

3486         //   hexseq

3487         //   hexseq : IPv4address

3488         //   hexseq ::

3489         //   hexseq :: hexseq

3490         //   hexseq :: hexseq : IPv4address

3491         //   hexseq :: IPv4address

3492         //   :: hexseq

3493         //   :: hexseq : IPv4address

3494         //   :: IPv4address

3495         //   ::

3496         //

3497         // Additionally we constrain the IPv6 address as follows :-

3498         //

3499         //  i.  IPv6 addresses without compressed zeros should contain

3500         //      exactly 16 bytes.

3501         //

3502         //  ii. IPv6 addresses with compressed zeros should contain

3503         //      less than 16 bytes.

3504 

3505         private int ipv6byteCount = 0;

3506 

3507         private int parseIPv6Reference(int start, int n)

3508             throws URISyntaxException

3509         {

3510             int p = start;

3511             int q;

3512             boolean compressedZeros = false;

3513 

3514             q = scanHexSeq(p, n);

3515 

3516             if (q > p) {

3517                 p = q;

3518                 if (at(p, n, "::")) {

3519                     compressedZeros = true;

3520                     p = scanHexPost(p + 2, n);

3521                 } else if (at(p, n, ':')) {

3522                     p = takeIPv4Address(p + 1,  n, "IPv4 address");

3523                     ipv6byteCount += 4;

3524                 }

3525             } else if (at(p, n, "::")) {

3526                 compressedZeros = true;

3527                 p = scanHexPost(p + 2, n);

3528             }

3529             if (p < n)

3530                 fail("Malformed IPv6 address", start);

3531             if (ipv6byteCount > 16)

3532                 fail("IPv6 address too long", start);

3533             if (!compressedZeros && ipv6byteCount < 16)

3534                 fail("IPv6 address too short", start);

3535             if (compressedZeros && ipv6byteCount == 16)

3536                 fail("Malformed IPv6 address", start);

3537 

3538             return p;

3539         }

3540 

3541         private int scanHexPost(int start, int n)

3542             throws URISyntaxException

3543         {

3544             int p = start;

3545             int q;

3546 

3547             if (p == n)

3548                 return p;

3549 

3550             q = scanHexSeq(p, n);

3551             if (q > p) {

3552                 p = q;

3553                 if (at(p, n, ':')) {

3554                     p++;

3555                     p = takeIPv4Address(p, n, "hex digits or IPv4 address");

3556                     ipv6byteCount += 4;

3557                 }

3558             } else {

3559                 p = takeIPv4Address(p, n, "hex digits or IPv4 address");

3560                 ipv6byteCount += 4;

3561             }

3562             return p;

3563         }

3564 

3565         // Scan a hex sequence; return -1 if one could not be scanned

3566         //

3567         private int scanHexSeq(int start, int n)

3568             throws URISyntaxException

3569         {

3570             int p = start;

3571             int q;

3572 

3573             q = scan(p, n, L_HEX, H_HEX);

3574             if (q <= p)

3575                 return -1;

3576             if (at(q, n, '.'))          // Beginning of IPv4 address

3577                 return -1;

3578             if (q > p + 4)

3579                 fail("IPv6 hexadecimal digit sequence too long", p);

3580             ipv6byteCount += 2;

3581             p = q;

3582             while (p < n) {

3583                 if (!at(p, n, ':'))

3584                     break;

3585                 if (at(p + 1, n, ':'))

3586                     break;              // "::"

3587                 p++;

3588                 q = scan(p, n, L_HEX, H_HEX);

3589                 if (q <= p)

3590                     failExpecting("digits for an IPv6 address", p);

3591                 if (at(q, n, '.')) {    // Beginning of IPv4 address

3592                     p--;

3593                     break;

3594                 }

3595                 if (q > p + 4)

3596                     fail("IPv6 hexadecimal digit sequence too long", p);

3597                 ipv6byteCount += 2;

3598                 p = q;

3599             }

3600 

3601             return p;

3602         }

3603 

3604     }

3605     static {

3606         SharedSecrets.setJavaNetUriAccess(

3607             new JavaNetUriAccess() {

3608                 public URI create(String scheme, String path) {

3609                     return new URI(scheme, path);

3610                 }

3611             }

3612         );

3613     }

3614 }

3615