// Copyright 2018 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. package org.chromium.base; import android.text.TextUtils; import android.util.Patterns; import java.util.regex.Matcher; import java.util.regex.Pattern; /** Provides public methods for detecting and eliding sensitive PII. */ public class PiiElider { private static final String EMAIL_ELISION = "XXX@EMAIL.ELIDED"; private static final String URL_ELISION = "HTTP://WEBADDRESS.ELIDED"; private static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF"; private static final String IP_ADDRESS = "((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]" + "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]" + "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}" + "|[1-9][0-9]|[0-9]))"; private static final String IRI = "[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}"; private static final String GOOD_GTLD_CHAR = "a-zA-Z\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF"; private static final String GTLD = "[" + GOOD_GTLD_CHAR + "]{2,63}"; private static final String HOST_NAME = "(" + IRI + "\\.)+" + GTLD; private static final String URI_ENCODED_CHAR = "(%[a-fA-F0-9]{2})"; private static final String URI_CHAR = "([a-zA-Z0-9$_.+!*'(),;?&=-]|" + URI_ENCODED_CHAR + ")"; private static final String PATH_CHAR = // Either a single valid path component character or a URI-encoded character. "(([" + GOOD_IRI_CHAR + ";/?:@&=#~.+!*'(),_-])|" + URI_ENCODED_CHAR + ")"; private static final String URI_SCHEME = "((http|https|Http|Https|rtsp|Rtsp)://" + "(" + URI_CHAR + "{1,64}(:" + URI_CHAR + "{1,25})?@)?)"; private static final String DOMAIN_NAME = "(" + HOST_NAME + "|" + IP_ADDRESS + ")"; private static final String PORT = "(:\\d{1,5})"; private static final String URL_WITH_OPTIONAL_SCHEME_AND_PORT = "(" + URI_SCHEME + "?" + DOMAIN_NAME + PORT + "?)"; private static final String PATH_COMPONENT = "(" + PATH_CHAR + "+)"; // Based on: http://www.faqs.org/rfcs/rfc2396.html#:~:text=Scheme%20Component private static final String INTENT_SCHEME = "[a-zA-Z][a-zA-Z0-9+.-]+://"; private static final String INTENT = "(" + INTENT_SCHEME + PATH_COMPONENT + ")"; private static final String URL_OR_INTENT = "(" + URL_WITH_OPTIONAL_SCHEME_AND_PORT + "|" + INTENT + ")"; private static final Pattern WEB_URL = Pattern.compile( "(\\b|^)" // Always start on a word boundary or start of string. + "(" + URL_OR_INTENT + ")" // Main URL or Intent scheme/domain/root path. + "(/" + PATH_CHAR + "*)?" // Rest of the URI path. + "(\\b|$)"); // Always end on a word boundary or end of string. // Example variant info chromium-TrichromeChromeGoogle6432.aab private static final String CHROME_VARIANT_INFO = "chromium-[^\\.]+\\.aab"; private static final Pattern LIKELY_EXCEPTION_LOG = Pattern.compile( "\\sat\\s" // These are all package prefixes of classes that are likely to // exist on a stacktrace and are very unlikely to be a PII url. + "(org\\.chromium|com\\.google|java|android|com\\.android)\\.[^ ]+.|" // if a line has what looks like line number info, it's probably an // exception log. + "\\(" + CHROME_VARIANT_INFO + "[^:]+:\\d+\\)|" // When a class is not found it can fail to satisfy our isClass // check but is still worth noting what it was. + "Caused by: java\\.lang\\." + "(ClassNotFoundException|NoClassDefFoundError):"); private static final String IP_ELISION = "1.2.3.4"; private static final String MAC_ELISION = "01:23:45:67:89:AB"; private static final String CONSOLE_ELISION = "[ELIDED:CONSOLE(0)] ELIDED CONSOLE MESSAGE"; private static final Pattern MAC_ADDRESS = Pattern.compile("([0-9a-fA-F]{2}[-:]+){5}[0-9a-fA-F]{2}"); private static final Pattern CONSOLE_MSG = Pattern.compile("\\[\\w*:CONSOLE.*\\].*"); private static final String[] APP_NAMESPACE = new String[] {"org.chromium.", "com.google.", "com.chrome."}; private static final String[] SYSTEM_NAMESPACE = new String[] { "android.", "com.android.", "dalvik.", "java.", "javax.", "org.apache.", "org.json.", "org.w3c.dom.", "org.xml.", "org.xmlpull.", "System." }; /** * Elides any emails in the specified {@link String} with * {@link #EMAIL_ELISION}. * * @param original String potentially containing emails. * @return String with elided emails. */ public static String elideEmail(String original) { return Patterns.EMAIL_ADDRESS.matcher(original).replaceAll(EMAIL_ELISION); } /** * Elides any URLs in the specified {@link String} with * {@link #URL_ELISION}. * * @param original String potentially containing URLs. * @return String with elided URLs. */ public static String elideUrl(String original) { // Url-matching is fussy. If something looks like an exception message, just return. if (LIKELY_EXCEPTION_LOG.matcher(original).find()) return original; StringBuilder buffer = new StringBuilder(original); Matcher matcher = WEB_URL.matcher(buffer); int start = 0; while (matcher.find(start)) { start = matcher.start(); int end = matcher.end(); String url = buffer.substring(start, end); if (!likelyToBeAppNamespace(url) && !likelyToBeSystemNamespace(url) && !likelyToBeClassOrMethodName(url)) { buffer.replace(start, end, URL_ELISION); end = start + URL_ELISION.length(); matcher = WEB_URL.matcher(buffer); } start = end; } return buffer.toString(); } private static boolean likelyToBeClassOrMethodName(String url) { if (isClassName(url)) return true; // Since the suspected URL could actually be a method name, check if the portion preceding // the last subdomain is a class name. int indexOfLastPeriod = url.lastIndexOf("."); if (indexOfLastPeriod == -1) return false; return isClassName(url.substring(0, indexOfLastPeriod)); } private static boolean isClassName(String url) { try { Class.forName(url, false, ContextUtils.getApplicationContext().getClassLoader()); return true; } catch (Throwable e) { // Some examples: ClassNotFoundException, NoClassDefFoundException, VerifyError. } return false; } private static boolean likelyToBeAppNamespace(String url) { for (String ns : APP_NAMESPACE) { if (url.startsWith(ns)) { return true; } } return false; } private static boolean likelyToBeSystemNamespace(String url) { for (String ns : SYSTEM_NAMESPACE) { if (url.startsWith(ns)) { return true; } } return false; } /** * Elides any IP addresses in the specified {@link String} with * {@link #IP_ELISION}. * * @param original String potentially containing IPs. * @return String with elided IPs. */ public static String elideIp(String original) { return Patterns.IP_ADDRESS.matcher(original).replaceAll(IP_ELISION); } /** * Elides any MAC addresses in the specified {@link String} with * {@link #MAC_ELISION}. * * @param original String potentially containing MACs. * @return String with elided MACs. */ public static String elideMac(String original) { return MAC_ADDRESS.matcher(original).replaceAll(MAC_ELISION); } /** * Elides any console messages in the specified {@link String} with * {@link #CONSOLE_ELISION}. * * @param original String potentially containing console messages. * @return String with elided console messages. */ public static String elideConsole(String original) { return CONSOLE_MSG.matcher(original).replaceAll(CONSOLE_ELISION); } /** * Elides any URL in the exception messages contained inside a stacktrace with * {@link #URL_ELISION}. * * @param stacktrace Multiline stacktrace as a string. * @return Stacktrace with elided URLs. */ public static String sanitizeStacktrace(String stacktrace) { if (TextUtils.isEmpty(stacktrace)) { return ""; } String[] frames = stacktrace.split("\\n"); // Sanitize first stacktrace line which contains the exception message. frames[0] = elideUrl(frames[0]); for (int i = 1; i < frames.length; i++) { // Nested exceptions should also have their message sanitized. if (frames[i].startsWith("Caused by:")) { frames[i] = elideUrl(frames[i]); } } return TextUtils.join("\n", frames); } }