257 lines
9.9 KiB
Java
257 lines
9.9 KiB
Java
![]() |
// Copyright 2018 The Chromium Authors
|
||
|
// Use of this source code is governed by a BSD-style license that can be
|
||
|
// found in the LICENSE file.
|
||
|
|
||
|
package org.chromium.base;
|
||
|
|
||
|
import android.text.TextUtils;
|
||
|
import android.util.Patterns;
|
||
|
|
||
|
import java.util.regex.Matcher;
|
||
|
import java.util.regex.Pattern;
|
||
|
|
||
|
/** Provides public methods for detecting and eliding sensitive PII. */
|
||
|
public class PiiElider {
|
||
|
private static final String EMAIL_ELISION = "XXX@EMAIL.ELIDED";
|
||
|
|
||
|
private static final String URL_ELISION = "HTTP://WEBADDRESS.ELIDED";
|
||
|
|
||
|
private static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
|
||
|
|
||
|
private static final String IP_ADDRESS =
|
||
|
"((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
|
||
|
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
|
||
|
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
|
||
|
+ "|[1-9][0-9]|[0-9]))";
|
||
|
|
||
|
private static final String IRI =
|
||
|
"[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}";
|
||
|
|
||
|
private static final String GOOD_GTLD_CHAR = "a-zA-Z\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
|
||
|
private static final String GTLD = "[" + GOOD_GTLD_CHAR + "]{2,63}";
|
||
|
private static final String HOST_NAME = "(" + IRI + "\\.)+" + GTLD;
|
||
|
|
||
|
private static final String URI_ENCODED_CHAR = "(%[a-fA-F0-9]{2})";
|
||
|
|
||
|
private static final String URI_CHAR = "([a-zA-Z0-9$_.+!*'(),;?&=-]|" + URI_ENCODED_CHAR + ")";
|
||
|
|
||
|
private static final String PATH_CHAR =
|
||
|
// Either a single valid path component character or a URI-encoded character.
|
||
|
"(([" + GOOD_IRI_CHAR + ";/?:@&=#~.+!*'(),_-])|" + URI_ENCODED_CHAR + ")";
|
||
|
|
||
|
private static final String URI_SCHEME =
|
||
|
"((http|https|Http|Https|rtsp|Rtsp)://"
|
||
|
+ "("
|
||
|
+ URI_CHAR
|
||
|
+ "{1,64}(:"
|
||
|
+ URI_CHAR
|
||
|
+ "{1,25})?@)?)";
|
||
|
|
||
|
private static final String DOMAIN_NAME = "(" + HOST_NAME + "|" + IP_ADDRESS + ")";
|
||
|
|
||
|
private static final String PORT = "(:\\d{1,5})";
|
||
|
|
||
|
private static final String URL_WITH_OPTIONAL_SCHEME_AND_PORT =
|
||
|
"(" + URI_SCHEME + "?" + DOMAIN_NAME + PORT + "?)";
|
||
|
|
||
|
private static final String PATH_COMPONENT = "(" + PATH_CHAR + "+)";
|
||
|
|
||
|
// Based on: http://www.faqs.org/rfcs/rfc2396.html#:~:text=Scheme%20Component
|
||
|
private static final String INTENT_SCHEME = "[a-zA-Z][a-zA-Z0-9+.-]+://";
|
||
|
|
||
|
private static final String INTENT = "(" + INTENT_SCHEME + PATH_COMPONENT + ")";
|
||
|
|
||
|
private static final String URL_OR_INTENT =
|
||
|
"(" + URL_WITH_OPTIONAL_SCHEME_AND_PORT + "|" + INTENT + ")";
|
||
|
|
||
|
private static final Pattern WEB_URL =
|
||
|
Pattern.compile(
|
||
|
"(\\b|^)" // Always start on a word boundary or start of string.
|
||
|
+ "("
|
||
|
+ URL_OR_INTENT
|
||
|
+ ")" // Main URL or Intent scheme/domain/root path.
|
||
|
+ "(/"
|
||
|
+ PATH_CHAR
|
||
|
+ "*)?" // Rest of the URI path.
|
||
|
+ "(\\b|$)"); // Always end on a word boundary or end of string.
|
||
|
|
||
|
// Example variant info chromium-TrichromeChromeGoogle6432.aab
|
||
|
private static final String CHROME_VARIANT_INFO = "chromium-[^\\.]+\\.aab";
|
||
|
private static final Pattern LIKELY_EXCEPTION_LOG =
|
||
|
Pattern.compile(
|
||
|
"\\sat\\s"
|
||
|
// These are all package prefixes of classes that are likely to
|
||
|
// exist on a stacktrace and are very unlikely to be a PII url.
|
||
|
+ "(org\\.chromium|com\\.google|java|android|com\\.android)\\.[^ ]+.|"
|
||
|
// if a line has what looks like line number info, it's probably an
|
||
|
// exception log.
|
||
|
+ "\\("
|
||
|
+ CHROME_VARIANT_INFO
|
||
|
+ "[^:]+:\\d+\\)|"
|
||
|
// When a class is not found it can fail to satisfy our isClass
|
||
|
// check but is still worth noting what it was.
|
||
|
+ "Caused by: java\\.lang\\."
|
||
|
+ "(ClassNotFoundException|NoClassDefFoundError):");
|
||
|
|
||
|
private static final String IP_ELISION = "1.2.3.4";
|
||
|
private static final String MAC_ELISION = "01:23:45:67:89:AB";
|
||
|
private static final String CONSOLE_ELISION = "[ELIDED:CONSOLE(0)] ELIDED CONSOLE MESSAGE";
|
||
|
|
||
|
private static final Pattern MAC_ADDRESS =
|
||
|
Pattern.compile("([0-9a-fA-F]{2}[-:]+){5}[0-9a-fA-F]{2}");
|
||
|
|
||
|
private static final Pattern CONSOLE_MSG = Pattern.compile("\\[\\w*:CONSOLE.*\\].*");
|
||
|
|
||
|
private static final String[] APP_NAMESPACE =
|
||
|
new String[] {"org.chromium.", "com.google.", "com.chrome."};
|
||
|
|
||
|
private static final String[] SYSTEM_NAMESPACE =
|
||
|
new String[] {
|
||
|
"android.",
|
||
|
"com.android.",
|
||
|
"dalvik.",
|
||
|
"java.",
|
||
|
"javax.",
|
||
|
"org.apache.",
|
||
|
"org.json.",
|
||
|
"org.w3c.dom.",
|
||
|
"org.xml.",
|
||
|
"org.xmlpull.",
|
||
|
"System."
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Elides any emails in the specified {@link String} with
|
||
|
* {@link #EMAIL_ELISION}.
|
||
|
*
|
||
|
* @param original String potentially containing emails.
|
||
|
* @return String with elided emails.
|
||
|
*/
|
||
|
public static String elideEmail(String original) {
|
||
|
return Patterns.EMAIL_ADDRESS.matcher(original).replaceAll(EMAIL_ELISION);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Elides any URLs in the specified {@link String} with
|
||
|
* {@link #URL_ELISION}.
|
||
|
*
|
||
|
* @param original String potentially containing URLs.
|
||
|
* @return String with elided URLs.
|
||
|
*/
|
||
|
public static String elideUrl(String original) {
|
||
|
// Url-matching is fussy. If something looks like an exception message, just return.
|
||
|
if (LIKELY_EXCEPTION_LOG.matcher(original).find()) return original;
|
||
|
StringBuilder buffer = new StringBuilder(original);
|
||
|
Matcher matcher = WEB_URL.matcher(buffer);
|
||
|
int start = 0;
|
||
|
while (matcher.find(start)) {
|
||
|
start = matcher.start();
|
||
|
int end = matcher.end();
|
||
|
String url = buffer.substring(start, end);
|
||
|
if (!likelyToBeAppNamespace(url)
|
||
|
&& !likelyToBeSystemNamespace(url)
|
||
|
&& !likelyToBeClassOrMethodName(url)) {
|
||
|
buffer.replace(start, end, URL_ELISION);
|
||
|
end = start + URL_ELISION.length();
|
||
|
matcher = WEB_URL.matcher(buffer);
|
||
|
}
|
||
|
start = end;
|
||
|
}
|
||
|
return buffer.toString();
|
||
|
}
|
||
|
|
||
|
private static boolean likelyToBeClassOrMethodName(String url) {
|
||
|
if (isClassName(url)) return true;
|
||
|
|
||
|
// Since the suspected URL could actually be a method name, check if the portion preceding
|
||
|
// the last subdomain is a class name.
|
||
|
int indexOfLastPeriod = url.lastIndexOf(".");
|
||
|
if (indexOfLastPeriod == -1) return false;
|
||
|
return isClassName(url.substring(0, indexOfLastPeriod));
|
||
|
}
|
||
|
|
||
|
private static boolean isClassName(String url) {
|
||
|
try {
|
||
|
Class.forName(url, false, ContextUtils.getApplicationContext().getClassLoader());
|
||
|
return true;
|
||
|
} catch (Throwable e) {
|
||
|
// Some examples: ClassNotFoundException, NoClassDefFoundException, VerifyError.
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
private static boolean likelyToBeAppNamespace(String url) {
|
||
|
for (String ns : APP_NAMESPACE) {
|
||
|
if (url.startsWith(ns)) {
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
private static boolean likelyToBeSystemNamespace(String url) {
|
||
|
for (String ns : SYSTEM_NAMESPACE) {
|
||
|
if (url.startsWith(ns)) {
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Elides any IP addresses in the specified {@link String} with
|
||
|
* {@link #IP_ELISION}.
|
||
|
*
|
||
|
* @param original String potentially containing IPs.
|
||
|
* @return String with elided IPs.
|
||
|
*/
|
||
|
public static String elideIp(String original) {
|
||
|
return Patterns.IP_ADDRESS.matcher(original).replaceAll(IP_ELISION);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Elides any MAC addresses in the specified {@link String} with
|
||
|
* {@link #MAC_ELISION}.
|
||
|
*
|
||
|
* @param original String potentially containing MACs.
|
||
|
* @return String with elided MACs.
|
||
|
*/
|
||
|
public static String elideMac(String original) {
|
||
|
return MAC_ADDRESS.matcher(original).replaceAll(MAC_ELISION);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Elides any console messages in the specified {@link String} with
|
||
|
* {@link #CONSOLE_ELISION}.
|
||
|
*
|
||
|
* @param original String potentially containing console messages.
|
||
|
* @return String with elided console messages.
|
||
|
*/
|
||
|
public static String elideConsole(String original) {
|
||
|
return CONSOLE_MSG.matcher(original).replaceAll(CONSOLE_ELISION);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Elides any URL in the exception messages contained inside a stacktrace with
|
||
|
* {@link #URL_ELISION}.
|
||
|
*
|
||
|
* @param stacktrace Multiline stacktrace as a string.
|
||
|
* @return Stacktrace with elided URLs.
|
||
|
*/
|
||
|
public static String sanitizeStacktrace(String stacktrace) {
|
||
|
if (TextUtils.isEmpty(stacktrace)) {
|
||
|
return "";
|
||
|
}
|
||
|
String[] frames = stacktrace.split("\\n");
|
||
|
// Sanitize first stacktrace line which contains the exception message.
|
||
|
frames[0] = elideUrl(frames[0]);
|
||
|
for (int i = 1; i < frames.length; i++) {
|
||
|
// Nested exceptions should also have their message sanitized.
|
||
|
if (frames[i].startsWith("Caused by:")) {
|
||
|
frames[i] = elideUrl(frames[i]);
|
||
|
}
|
||
|
}
|
||
|
return TextUtils.join("\n", frames);
|
||
|
}
|
||
|
}
|