259 lines
11 KiB
Java
259 lines
11 KiB
Java
![]() |
/*
|
||
|
* Copyright (C) 2014 The Android Open Source Project
|
||
|
* Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
|
||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||
|
*
|
||
|
* This code is free software; you can redistribute it and/or modify it
|
||
|
* under the terms of the GNU General Public License version 2 only, as
|
||
|
* published by the Free Software Foundation. Oracle designates this
|
||
|
* particular file as subject to the "Classpath" exception as provided
|
||
|
* by Oracle in the LICENSE file that accompanied this code.
|
||
|
*
|
||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||
|
* accompanied this code).
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License version
|
||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||
|
*
|
||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||
|
* or visit www.oracle.com if you need additional information or have any
|
||
|
* questions.
|
||
|
*/
|
||
|
|
||
|
package java.net;
|
||
|
|
||
|
import java.io.*;
|
||
|
import java.nio.charset.Charset;
|
||
|
import java.nio.charset.IllegalCharsetNameException;
|
||
|
import java.nio.charset.UnsupportedCharsetException;
|
||
|
import java.util.Objects;
|
||
|
|
||
|
/**
|
||
|
* Utility class for HTML form decoding. This class contains static methods
|
||
|
* for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
|
||
|
* MIME format.
|
||
|
* <p>
|
||
|
* The conversion process is the reverse of that used by the URLEncoder class. It is assumed
|
||
|
* that all characters in the encoded string are one of the following:
|
||
|
* "{@code a}" through "{@code z}",
|
||
|
* "{@code A}" through "{@code Z}",
|
||
|
* "{@code 0}" through "{@code 9}", and
|
||
|
* "{@code -}", "{@code _}",
|
||
|
* "{@code .}", and "{@code *}". The
|
||
|
* character "{@code %}" is allowed but is interpreted
|
||
|
* as the start of a special escaped sequence.
|
||
|
* <p>
|
||
|
* The following rules are applied in the conversion:
|
||
|
*
|
||
|
* <ul>
|
||
|
* <li>The alphanumeric characters "{@code a}" through
|
||
|
* "{@code z}", "{@code A}" through
|
||
|
* "{@code Z}" and "{@code 0}"
|
||
|
* through "{@code 9}" remain the same.
|
||
|
* <li>The special characters "{@code .}",
|
||
|
* "{@code -}", "{@code *}", and
|
||
|
* "{@code _}" remain the same.
|
||
|
* <li>The plus sign "{@code +}" is converted into a
|
||
|
* space character " " .
|
||
|
* <li>A sequence of the form "<i>{@code %xy}</i>" will be
|
||
|
* treated as representing a byte where <i>xy</i> is the two-digit
|
||
|
* hexadecimal representation of the 8 bits. Then, all substrings
|
||
|
* that contain one or more of these byte sequences consecutively
|
||
|
* will be replaced by the character(s) whose encoding would result
|
||
|
* in those consecutive bytes.
|
||
|
* The encoding scheme used to decode these characters may be specified,
|
||
|
* or if unspecified, the default encoding of the platform will be used.
|
||
|
* </ul>
|
||
|
* <p>
|
||
|
* There are two possible ways in which this decoder could deal with
|
||
|
* illegal strings. It could either leave illegal characters alone or
|
||
|
* it could throw an {@link java.lang.IllegalArgumentException}.
|
||
|
* Which approach the decoder takes is left to the
|
||
|
* implementation.
|
||
|
*
|
||
|
* @author Mark Chamness
|
||
|
* @author Michael McCloskey
|
||
|
* @since 1.2
|
||
|
*/
|
||
|
|
||
|
public class URLDecoder {
|
||
|
|
||
|
// The platform default encoding
|
||
|
static String dfltEncName = URLEncoder.dfltEncName;
|
||
|
|
||
|
/**
|
||
|
* Decodes a {@code x-www-form-urlencoded} string.
|
||
|
* The platform's default encoding is used to determine what characters
|
||
|
* are represented by any consecutive sequences of the form
|
||
|
* "<i>{@code %xy}</i>".
|
||
|
* @param s the {@code String} to decode
|
||
|
* @deprecated The resulting string may vary depending on the platform's
|
||
|
* default encoding. Instead, use the decode(String,String) method
|
||
|
* to specify the encoding.
|
||
|
* @return the newly decoded {@code String}
|
||
|
*/
|
||
|
@Deprecated
|
||
|
public static String decode(String s) {
|
||
|
|
||
|
String str = null;
|
||
|
|
||
|
try {
|
||
|
str = decode(s, dfltEncName);
|
||
|
} catch (UnsupportedEncodingException e) {
|
||
|
// The system should always have the platform default
|
||
|
}
|
||
|
|
||
|
return str;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Decodes an {@code application/x-www-form-urlencoded} string using
|
||
|
* a specific encoding scheme.
|
||
|
*
|
||
|
* <p>
|
||
|
* This method behaves the same as {@linkplain String decode(String s, Charset charset)}
|
||
|
* except that it will {@linkplain java.nio.charset.Charset#forName look up the charset}
|
||
|
* using the given encoding name.
|
||
|
*
|
||
|
* @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
|
||
|
* when illegal strings are encountered.
|
||
|
*
|
||
|
* @param s the {@code String} to decode
|
||
|
* @param enc The name of a supported
|
||
|
* <a href="../lang/package-summary.html#charenc">character
|
||
|
* encoding</a>.
|
||
|
* @return the newly decoded {@code String}
|
||
|
* @throws UnsupportedEncodingException
|
||
|
* If character encoding needs to be consulted, but
|
||
|
* named character encoding is not supported
|
||
|
* @see URLEncoder#encode(java.lang.String, java.lang.String)
|
||
|
* @since 1.4
|
||
|
*/
|
||
|
public static String decode(String s, String enc) throws UnsupportedEncodingException {
|
||
|
if (enc.isEmpty()) {
|
||
|
throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
|
||
|
}
|
||
|
|
||
|
try {
|
||
|
Charset charset = Charset.forName(enc);
|
||
|
return decode(s, charset);
|
||
|
} catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
|
||
|
throw new UnsupportedEncodingException(enc);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Decodes an {@code application/x-www-form-urlencoded} string using
|
||
|
* a specific {@linkplain java.nio.charset.Charset Charset}.
|
||
|
* The supplied charset is used to determine
|
||
|
* what characters are represented by any consecutive sequences of the
|
||
|
* form "<i>{@code %xy}</i>".
|
||
|
* <p>
|
||
|
* <em><strong>Note:</strong> The <a href=
|
||
|
* "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
|
||
|
* World Wide Web Consortium Recommendation</a> states that
|
||
|
* UTF-8 should be used. Not doing so may introduce
|
||
|
* incompatibilities.</em>
|
||
|
*
|
||
|
* @implNote This implementation will throw an {@link java.lang.IllegalArgumentException}
|
||
|
* when illegal strings are encountered.
|
||
|
*
|
||
|
* @param s the {@code String} to decode
|
||
|
* @param charset the given charset
|
||
|
* @return the newly decoded {@code String}
|
||
|
* @throws NullPointerException if {@code s} or {@code charset} is {@code null}
|
||
|
* @throws IllegalArgumentException if the implementation encounters illegal
|
||
|
* characters
|
||
|
* @see URLEncoder#encode(java.lang.String, java.nio.charset.Charset)
|
||
|
* @since 10
|
||
|
*/
|
||
|
public static String decode(String s, Charset charset) {
|
||
|
Objects.requireNonNull(charset, "Charset");
|
||
|
boolean needToChange = false;
|
||
|
int numChars = s.length();
|
||
|
StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars);
|
||
|
int i = 0;
|
||
|
|
||
|
char c;
|
||
|
byte[] bytes = null;
|
||
|
while (i < numChars) {
|
||
|
c = s.charAt(i);
|
||
|
switch (c) {
|
||
|
case '+':
|
||
|
sb.append(' ');
|
||
|
i++;
|
||
|
needToChange = true;
|
||
|
break;
|
||
|
case '%':
|
||
|
/*
|
||
|
* Starting with this instance of %, process all
|
||
|
* consecutive substrings of the form %xy. Each
|
||
|
* substring %xy will yield a byte. Convert all
|
||
|
* consecutive bytes obtained this way to whatever
|
||
|
* character(s) they represent in the provided
|
||
|
* encoding.
|
||
|
*/
|
||
|
|
||
|
try {
|
||
|
|
||
|
// (numChars-i)/3 is an upper bound for the number
|
||
|
// of remaining bytes
|
||
|
if (bytes == null)
|
||
|
bytes = new byte[(numChars-i)/3];
|
||
|
int pos = 0;
|
||
|
|
||
|
while ( ((i+2) < numChars) &&
|
||
|
(c=='%')) {
|
||
|
// BEGIN Android-changed: App compat. Forbid non-hex chars after '%'.
|
||
|
if (!isValidHexChar(s.charAt(i+1)) || !isValidHexChar(s.charAt(i+2))) {
|
||
|
throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern : "
|
||
|
+ s.substring(i, i + 3));
|
||
|
}
|
||
|
// END Android-changed: App compat. Forbid non-hex chars after '%'.
|
||
|
int v = Integer.parseInt(s.substring(i+1,i+3),16);
|
||
|
if (v < 0)
|
||
|
// Android-changed: Improve error message by printing the string value.
|
||
|
throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value : "
|
||
|
+ s.substring(i, i + 3));
|
||
|
bytes[pos++] = (byte) v;
|
||
|
i+= 3;
|
||
|
if (i < numChars)
|
||
|
c = s.charAt(i);
|
||
|
}
|
||
|
|
||
|
// A trailing, incomplete byte encoding such as
|
||
|
// "%x" will cause an exception to be thrown
|
||
|
|
||
|
if ((i < numChars) && (c=='%'))
|
||
|
throw new IllegalArgumentException(
|
||
|
"URLDecoder: Incomplete trailing escape (%) pattern");
|
||
|
|
||
|
sb.append(new String(bytes, 0, pos, charset));
|
||
|
} catch (NumberFormatException e) {
|
||
|
throw new IllegalArgumentException(
|
||
|
"URLDecoder: Illegal hex characters in escape (%) pattern - "
|
||
|
+ e.getMessage());
|
||
|
}
|
||
|
needToChange = true;
|
||
|
break;
|
||
|
default:
|
||
|
sb.append(c);
|
||
|
i++;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return (needToChange? sb.toString() : s);
|
||
|
}
|
||
|
|
||
|
// BEGIN Android-added: App compat. Forbid non-hex chars after '%'.
|
||
|
private static boolean isValidHexChar(char c) {
|
||
|
return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
|
||
|
}
|
||
|
// END Android-added: App compat. Forbid non-hex chars after '%'.
|
||
|
}
|