script-astra/Android/Sdk/sources/android-35/android/webkit/URLUtil.java

722 lines
27 KiB
Java
Raw Normal View History

2025-01-20 15:15:20 +00:00
/*
* Copyright (C) 2006 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package android.webkit;
import android.annotation.NonNull;
import android.annotation.Nullable;
import android.compat.Compatibility;
import android.compat.annotation.ChangeId;
import android.compat.annotation.EnabledSince;
import android.compat.annotation.UnsupportedAppUsage;
import android.net.ParseException;
import android.net.Uri;
import android.net.WebAddress;
import android.os.Build;
import android.util.Log;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.Charset;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public final class URLUtil {
/**
* This feature enables parsing of Content-Disposition headers that conform to RFC 6266. In
* particular, this enables parsing of {@code filename*} values which can use a different
* character encoding.
*
* @hide
*/
@ChangeId
@EnabledSince(targetSdkVersion = Build.VERSION_CODES.VANILLA_ICE_CREAM)
static final long PARSE_CONTENT_DISPOSITION_USING_RFC_6266 = 319400769L;
private static final String LOGTAG = "webkit";
private static final boolean TRACE = false;
// to refer to bar.png under your package's asset/foo/ directory, use
// "file:///android_asset/foo/bar.png".
static final String ASSET_BASE = "file:///android_asset/";
// to refer to bar.png under your package's res/drawable/ directory, use
// "file:///android_res/drawable/bar.png". Use "drawable" to refer to
// "drawable-hdpi" directory as well.
static final String RESOURCE_BASE = "file:///android_res/";
static final String FILE_BASE = "file:";
static final String PROXY_BASE = "file:///cookieless_proxy/";
static final String CONTENT_BASE = "content:";
/** Cleans up (if possible) user-entered web addresses */
public static String guessUrl(String inUrl) {
String retVal = inUrl;
WebAddress webAddress;
if (TRACE) Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
if (inUrl.length() == 0) return inUrl;
if (inUrl.startsWith("about:")) return inUrl;
// Do not try to interpret data scheme URLs
if (inUrl.startsWith("data:")) return inUrl;
// Do not try to interpret file scheme URLs
if (inUrl.startsWith("file:")) return inUrl;
// Do not try to interpret javascript scheme URLs
if (inUrl.startsWith("javascript:")) return inUrl;
// bug 762454: strip period off end of url
if (inUrl.endsWith(".") == true) {
inUrl = inUrl.substring(0, inUrl.length() - 1);
}
try {
webAddress = new WebAddress(inUrl);
} catch (ParseException ex) {
if (TRACE) {
Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
}
return retVal;
}
// Check host
if (webAddress.getHost().indexOf('.') == -1) {
// no dot: user probably entered a bare domain. try .com
webAddress.setHost("www." + webAddress.getHost() + ".com");
}
return webAddress.toString();
}
/**
* Inserts the {@code inQuery} in the {@code template} after URL-encoding it. The encoded query
* will replace the {@code queryPlaceHolder}.
*/
public static String composeSearchUrl(
String inQuery, String template, String queryPlaceHolder) {
int placeHolderIndex = template.indexOf(queryPlaceHolder);
if (placeHolderIndex < 0) {
return null;
}
String query;
StringBuilder buffer = new StringBuilder();
buffer.append(template.substring(0, placeHolderIndex));
try {
query = java.net.URLEncoder.encode(inQuery, "utf-8");
buffer.append(query);
} catch (UnsupportedEncodingException ex) {
return null;
}
buffer.append(template.substring(placeHolderIndex + queryPlaceHolder.length()));
return buffer.toString();
}
public static byte[] decode(byte[] url) throws IllegalArgumentException {
if (url.length == 0) {
return new byte[0];
}
// Create a new byte array with the same length to ensure capacity
byte[] tempData = new byte[url.length];
int tempCount = 0;
for (int i = 0; i < url.length; i++) {
byte b = url[i];
if (b == '%') {
if (url.length - i > 2) {
b = (byte) (parseHex(url[i + 1]) * 16 + parseHex(url[i + 2]));
i += 2;
} else {
throw new IllegalArgumentException("Invalid format");
}
}
tempData[tempCount++] = b;
}
byte[] retData = new byte[tempCount];
System.arraycopy(tempData, 0, retData, 0, tempCount);
return retData;
}
/**
* @return {@code true} if the url is correctly URL encoded
*/
@UnsupportedAppUsage
static boolean verifyURLEncoding(String url) {
int count = url.length();
if (count == 0) {
return false;
}
int index = url.indexOf('%');
while (index >= 0 && index < count) {
if (index < count - 2) {
try {
parseHex((byte) url.charAt(++index));
parseHex((byte) url.charAt(++index));
} catch (IllegalArgumentException e) {
return false;
}
} else {
return false;
}
index = url.indexOf('%', index + 1);
}
return true;
}
private static int parseHex(byte b) {
if (b >= '0' && b <= '9') return (b - '0');
if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
throw new IllegalArgumentException("Invalid hex char '" + b + "'");
}
/**
* @return {@code true} if the url is an asset file.
*/
public static boolean isAssetUrl(String url) {
return (null != url) && url.startsWith(ASSET_BASE);
}
/**
* @return {@code true} if the url is a resource file.
* @hide
*/
@UnsupportedAppUsage
public static boolean isResourceUrl(String url) {
return (null != url) && url.startsWith(RESOURCE_BASE);
}
/**
* @return {@code true} if the url is a proxy url to allow cookieless network requests from a
* file url.
* @deprecated Cookieless proxy is no longer supported.
*/
@Deprecated
public static boolean isCookielessProxyUrl(String url) {
return (null != url) && url.startsWith(PROXY_BASE);
}
/**
* @return {@code true} if the url is a local file.
*/
public static boolean isFileUrl(String url) {
return (null != url)
&& (url.startsWith(FILE_BASE)
&& !url.startsWith(ASSET_BASE)
&& !url.startsWith(PROXY_BASE));
}
/**
* @return {@code true} if the url is an about: url.
*/
public static boolean isAboutUrl(String url) {
return (null != url) && url.startsWith("about:");
}
/**
* @return {@code true} if the url is a data: url.
*/
public static boolean isDataUrl(String url) {
return (null != url) && url.startsWith("data:");
}
/**
* @return {@code true} if the url is a javascript: url.
*/
public static boolean isJavaScriptUrl(String url) {
return (null != url) && url.startsWith("javascript:");
}
/**
* @return {@code true} if the url is an http: url.
*/
public static boolean isHttpUrl(String url) {
return (null != url)
&& (url.length() > 6)
&& url.substring(0, 7).equalsIgnoreCase("http://");
}
/**
* @return {@code true} if the url is an https: url.
*/
public static boolean isHttpsUrl(String url) {
return (null != url)
&& (url.length() > 7)
&& url.substring(0, 8).equalsIgnoreCase("https://");
}
/**
* @return {@code true} if the url is a network url.
*/
public static boolean isNetworkUrl(String url) {
if (url == null || url.length() == 0) {
return false;
}
return isHttpUrl(url) || isHttpsUrl(url);
}
/**
* @return {@code true} if the url is a content: url.
*/
public static boolean isContentUrl(String url) {
return (null != url) && url.startsWith(CONTENT_BASE);
}
/**
* @return {@code true} if the url is valid.
*/
public static boolean isValidUrl(String url) {
if (url == null || url.length() == 0) {
return false;
}
return (isAssetUrl(url)
|| isResourceUrl(url)
|| isFileUrl(url)
|| isAboutUrl(url)
|| isHttpUrl(url)
|| isHttpsUrl(url)
|| isJavaScriptUrl(url)
|| isContentUrl(url));
}
/** Strips the url of the anchor. */
public static String stripAnchor(String url) {
int anchorIndex = url.indexOf('#');
if (anchorIndex != -1) {
return url.substring(0, anchorIndex);
}
return url;
}
/**
* Guesses canonical filename that a download would have, using the URL and contentDisposition.
*
* <p>File extension, if not defined, is added based on the mimetype.
*
* <p>The {@code contentDisposition} argument will be treated differently depending on
* targetSdkVersion.
*
* <ul>
* <li>For targetSDK versions &lt; {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
* 2616.
* <li>For targetSDK versions &gt;= {@code VANILLA_ICE_CREAM} it will be parsed based on RFC
* 6266.
* </ul>
*
* In practice, this means that from {@code VANILLA_ICE_CREAM}, this method will be able to
* parse {@code filename*} directives in the {@code contentDisposition} string.
*
* <p>The function also changed in the following ways in {@code VANILLA_ICE_CREAM}:
*
* <ul>
* <li>If the suggested file type extension doesn't match the passed {@code mimeType}, the
* method will append the appropriate extension instead of replacing the current
* extension.
* <li>If the suggested file name contains a path separator ({@code "/"}), the method will
* replace this with the underscore character ({@code "_"}) instead of splitting the
* result and only using the last part.
* </ul>
*
* @param url Url to the content
* @param contentDisposition Content-Disposition HTTP header or {@code null}
* @param mimeType Mime-type of the content or {@code null}
* @return suggested filename
*/
public static String guessFileName(
String url, @Nullable String contentDisposition, @Nullable String mimeType) {
if (android.os.Flags.androidOsBuildVanillaIceCream()) {
if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
return guessFileNameRfc6266(url, contentDisposition, mimeType);
}
}
return guessFileNameRfc2616(url, contentDisposition, mimeType);
}
/** Legacy implementation of guessFileName, based on RFC 2616. */
private static String guessFileNameRfc2616(
String url, @Nullable String contentDisposition, @Nullable String mimeType) {
String filename = null;
String extension = null;
// If we couldn't do anything with the hint, move toward the content disposition
if (contentDisposition != null) {
filename = parseContentDispositionRfc2616(contentDisposition);
if (filename != null) {
int index = filename.lastIndexOf('/') + 1;
if (index > 0) {
filename = filename.substring(index);
}
}
}
// If all the other http-related approaches failed, use the plain uri
if (filename == null) {
String decodedUrl = Uri.decode(url);
if (decodedUrl != null) {
int queryIndex = decodedUrl.indexOf('?');
// If there is a query string strip it, same as desktop browsers
if (queryIndex > 0) {
decodedUrl = decodedUrl.substring(0, queryIndex);
}
if (!decodedUrl.endsWith("/")) {
int index = decodedUrl.lastIndexOf('/') + 1;
if (index > 0) {
filename = decodedUrl.substring(index);
}
}
}
}
// Finally, if couldn't get filename from URI, get a generic filename
if (filename == null) {
filename = "downloadfile";
}
// Split filename between base and extension
// Add an extension if filename does not have one
int dotIndex = filename.indexOf('.');
if (dotIndex < 0) {
if (mimeType != null) {
extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
if (extension != null) {
extension = "." + extension;
}
}
if (extension == null) {
if (mimeType != null && mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
if (mimeType.equalsIgnoreCase("text/html")) {
extension = ".html";
} else {
extension = ".txt";
}
} else {
extension = ".bin";
}
}
} else {
if (mimeType != null) {
// Compare the last segment of the extension against the mime type.
// If there's a mismatch, discard the entire extension.
int lastDotIndex = filename.lastIndexOf('.');
String typeFromExt =
MimeTypeMap.getSingleton()
.getMimeTypeFromExtension(filename.substring(lastDotIndex + 1));
if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
if (extension != null) {
extension = "." + extension;
}
}
}
if (extension == null) {
extension = filename.substring(dotIndex);
}
filename = filename.substring(0, dotIndex);
}
return filename + extension;
}
/**
* Guesses canonical filename that a download would have, using the URL and contentDisposition.
* Uses RFC 6266 for parsing the contentDisposition header value.
*/
@NonNull
private static String guessFileNameRfc6266(
@NonNull String url, @Nullable String contentDisposition, @Nullable String mimeType) {
String filename = getFilenameSuggestion(url, contentDisposition);
// Split filename between base and extension
// Add an extension if filename does not have one
String extensionFromMimeType = suggestExtensionFromMimeType(mimeType);
if (filename.indexOf('.') < 0) {
// Filename does not have an extension, use the suggested one.
return filename + extensionFromMimeType;
}
// Filename already contains at least one dot.
// Compare the last segment of the extension against the mime type.
// If there's a mismatch, add the suggested extension instead.
if (mimeType != null && extensionDifferentFromMimeType(filename, mimeType)) {
return filename + extensionFromMimeType;
}
return filename;
}
/**
* Get the suggested file name from the {@code contentDisposition} or {@code url}. Will ensure
* that the filename contains no path separators by replacing them with the {@code "_"}
* character.
*/
@NonNull
private static String getFilenameSuggestion(String url, @Nullable String contentDisposition) {
// First attempt to parse the Content-Disposition header if available
if (contentDisposition != null) {
String filename = getFilenameFromContentDispositionRfc6266(contentDisposition);
if (filename != null) {
return replacePathSeparators(filename);
}
}
// Try to generate a filename based on the URL.
if (url != null) {
Uri parsedUri = Uri.parse(url);
String lastPathSegment = parsedUri.getLastPathSegment();
if (lastPathSegment != null) {
return replacePathSeparators(lastPathSegment);
}
}
// Finally, if couldn't get filename from URI, get a generic filename.
return "downloadfile";
}
/**
* Replace all instances of {@code "/"} with {@code "_"} to avoid filenames that navigate the
* path.
*/
@NonNull
private static String replacePathSeparators(@NonNull String raw) {
return raw.replaceAll("/", "_");
}
/**
* Check if the {@code filename} has an extension that is different from the expected one based
* on the {@code mimeType}.
*/
private static boolean extensionDifferentFromMimeType(
@NonNull String filename, @NonNull String mimeType) {
int lastDotIndex = filename.lastIndexOf('.');
String typeFromExt =
MimeTypeMap.getSingleton()
.getMimeTypeFromExtension(filename.substring(lastDotIndex + 1));
return typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType);
}
/**
* Get a candidate file extension (including the {@code .}) for the given mimeType. will return
* {@code ".bin"} if {@code mimeType} is {@code null}
*
* @param mimeType Reported mimetype
* @return A file extension, including the {@code .}
*/
@NonNull
private static String suggestExtensionFromMimeType(@Nullable String mimeType) {
if (mimeType == null) {
return ".bin";
}
String extensionFromMimeType =
MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
if (extensionFromMimeType != null) {
return "." + extensionFromMimeType;
}
if (mimeType.equalsIgnoreCase("text/html")) {
return ".html";
} else if (mimeType.toLowerCase(Locale.ROOT).startsWith("text/")) {
return ".txt";
} else {
return ".bin";
}
}
/**
* Parse the Content-Disposition HTTP Header.
*
* <p>Behavior depends on targetSdkVersion.
*
* <ul>
* <li>For targetSDK versions &lt; {@code VANILLA_ICE_CREAM} it will parse based on RFC 2616.
* <li>For targetSDK versions &gt;= {@code VANILLA_ICE_CREAM} it will parse based on RFC 6266.
* </ul>
*/
@UnsupportedAppUsage
static String parseContentDisposition(String contentDisposition) {
if (android.os.Flags.androidOsBuildVanillaIceCream()) {
if (Compatibility.isChangeEnabled(PARSE_CONTENT_DISPOSITION_USING_RFC_6266)) {
return getFilenameFromContentDispositionRfc6266(contentDisposition);
}
}
return parseContentDispositionRfc2616(contentDisposition);
}
/** Regex used to parse content-disposition headers */
private static final Pattern CONTENT_DISPOSITION_PATTERN =
Pattern.compile(
"attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
Pattern.CASE_INSENSITIVE);
/**
* Parse the Content-Disposition HTTP Header. The format of the header is defined here: <a
* href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html">rfc2616 Section 19</a>. This
* header provides a filename for content that is going to be downloaded to the file system. We
* only support the attachment type. Note that RFC 2616 specifies the filename value must be
* double-quoted. Unfortunately some servers do not quote the value so to maintain consistent
* behaviour with other browsers, we allow unquoted values too.
*/
private static String parseContentDispositionRfc2616(String contentDisposition) {
try {
Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
if (m.find()) {
return m.group(2);
}
} catch (IllegalStateException ex) {
// This function is defined as returning null when it can't parse the header
}
return null;
}
/**
* Pattern for parsing individual content disposition key-value pairs.
*
* <p>The pattern will attempt to parse the value as either single-, double-, or unquoted. For
* the single- and double-quoted options, the pattern allows escaped quotes as part of the
* value, as per <a href="https://datatracker.ietf.org/doc/html/rfc2616#section-2.2">rfc2616
* section-2.2</a>
*/
@SuppressWarnings("RegExpRepeatedSpace") // Spaces are only for readability.
private static final Pattern DISPOSITION_PATTERN =
Pattern.compile(
"""
\\s*(\\S+?) # Group 1: parameter name
\\s*=\\s* # Match equals sign
(?: # non-capturing group of options
'( (?: [^'\\\\] | \\\\. )* )' # Group 2: single-quoted
| "( (?: [^"\\\\] | \\\\. )* )" # Group 3: double-quoted
| ( [^'"][^;\\s]* ) # Group 4: un-quoted parameter
)\\s*;? # Optional end semicolon""",
Pattern.COMMENTS);
/**
* Extract filename from a {@code Content-Disposition} header value.
*
* <p>This method implements the parsing defined in <a
* href="https://datatracker.ietf.org/doc/html/rfc6266">RFC 6266</a>, supporting both the {@code
* filename} and {@code filename*} disposition parameters. If the passed header value has the
* {@code "inline"} disposition type, this method will return {@code null} to indicate that a
* download was not intended.
*
* <p>If both {@code filename*} and {@code filename} is present, the former will be returned, as
* per the RFC. Invalid encoded values will be ignored.
*
* @param contentDisposition Value of {@code Content-Disposition} header.
* @return The filename suggested by the header or {@code null} if no filename could be parsed
* from the header value.
*/
@Nullable
private static String getFilenameFromContentDispositionRfc6266(
@NonNull String contentDisposition) {
String[] parts = contentDisposition.trim().split(";", 2);
if (parts.length < 2) {
// Need at least 2 parts, the `disposition-type` and at least one `disposition-parm`.
return null;
}
String dispositionType = parts[0].trim();
if ("inline".equalsIgnoreCase(dispositionType)) {
// "inline" should not result in a download.
// Unknown disposition types should be handles as "attachment"
// https://datatracker.ietf.org/doc/html/rfc6266#section-4.2
return null;
}
String dispositionParameters = parts[1];
Matcher matcher = DISPOSITION_PATTERN.matcher(dispositionParameters);
String filename = null;
String filenameExt = null;
while (matcher.find()) {
String parameter = matcher.group(1);
String value;
if (matcher.group(2) != null) {
value = removeSlashEscapes(matcher.group(2)); // Value was single-quoted
} else if (matcher.group(3) != null) {
value = removeSlashEscapes(matcher.group(3)); // Value was double-quoted
} else {
value = matcher.group(4); // Value was un-quoted
}
if (parameter == null || value == null) {
continue;
}
if ("filename*".equalsIgnoreCase(parameter)) {
filenameExt = parseExtValueString(value);
} else if ("filename".equalsIgnoreCase(parameter)) {
filename = value;
}
}
// RFC 6266 dictates the filenameExt should be preferred if present.
if (filenameExt != null) {
return filenameExt;
}
return filename;
}
/** Replace escapes of the \X form with X. */
private static String removeSlashEscapes(String raw) {
if (raw == null) {
return null;
}
return raw.replaceAll("\\\\(.)", "$1");
}
/**
* Parse an extended value string which can be percent-encoded. Return {@code} null if unable to
* parse the string.
*/
private static String parseExtValueString(String raw) {
String[] parts = raw.split("'", 3);
if (parts.length < 3) {
return null;
}
String encoding = parts[0];
// Intentionally ignore parts[1] (language).
String valueChars = parts[2];
try {
// The URLDecoder force-decodes + as " "
// so preemptively replace all values with the encoded value to preserve them.
Charset charset = Charset.forName(encoding);
String valueWithEncodedPlus = encodePlusCharacters(valueChars, charset);
return URLDecoder.decode(valueWithEncodedPlus, charset);
} catch (RuntimeException ignored) {
return null; // Ignoring an un-parsable value is within spec.
}
}
/**
* Replace all instances of {@code "+"} with the percent-encoded equivalent for the given {@code
* charset}.
*/
@NonNull
private static String encodePlusCharacters(@NonNull String valueChars, Charset charset) {
StringBuilder sb = new StringBuilder();
for (byte b : charset.encode("+").array()) {
// Formatting a byte is not possible with TextUtils.formatSimple
sb.append(String.format("%02x", b));
}
return valueChars.replaceAll("\\+", sb.toString());
}
}