// Protocol Buffers - Google's data interchange format // Copyright 2008 Google Inc. All rights reserved. // https://developers.google.com/protocol-buffers/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. package com.google.protobuf; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InvalidObjectException; import java.io.ObjectInputStream; import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; /** * Class to represent {@code ByteStrings} formed by concatenation of other ByteStrings, without * copying the data in the pieces. The concatenation is represented as a tree whose leaf nodes are * each a {@link com.google.protobuf.ByteString.LeafByteString}. * *
Most of the operation here is inspired by the now-famous paper * BAP95 Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and michael plass * *
The algorithms described in the paper have been implemented for character strings in {@code * com.google.common.string.Rope} and in the c++ class {@code cord.cc}. * *
Fundamentally the Rope algorithm represents the collection of pieces as a binary tree. BAP95 * uses a Fibonacci bound relating depth to a minimum sequence length, sequences that are too short * relative to their depth cause a tree rebalance. More precisely, a tree of depth d is "balanced" * in the terminology of BAP95 if its length is at least F(d+2), where F(n) is the n-th Fibonacci * number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum lengths 1, 2, 3, 5, 8, 13,... * * @author carlanton@google.com (Carl Haverl) */ final class RopeByteString extends ByteString { /** * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of depth n is "balanced", * i.e flat enough, if its length is at least Fn+2, e.g. a "balanced" {@link RopeByteString} of * depth 1 must have length at least 2, of depth 4 must have length >= 8, etc. * *
There's nothing special about using the Fibonacci numbers for this, but they are a * reasonable sequence for encapsulating the idea that we are OK with longer strings being encoded * in deeper binary trees. * *
For 32-bit integers, this array has length 46. * *
The correctness of this constant array is validated in tests. */ static final int[] minLengthByDepth = { 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578, 5702887, 9227465, 14930352, 24157817, 39088169, 63245986, 102334155, 165580141, 267914296, 433494437, 701408733, 1134903170, 1836311903, Integer.MAX_VALUE }; private final int totalLength; private final ByteString left; private final ByteString right; private final int leftLength; private final int treeDepth; /** * Create a new RopeByteString, which can be thought of as a new tree node, by recording * references to the two given strings. * * @param left string on the left of this node, should have {@code size() > 0} * @param right string on the right of this node, should have {@code size() > 0} */ private RopeByteString(ByteString left, ByteString right) { this.left = left; this.right = right; leftLength = left.size(); totalLength = leftLength + right.size(); treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1; } /** * Concatenate the given strings while performing various optimizations to slow the growth rate of * tree depth and tree node count. The result is either a {@link * com.google.protobuf.ByteString.LeafByteString} or a {@link RopeByteString} depending on which * optimizations, if any, were applied. * *
Small pieces of length less than {@link ByteString#CONCATENATE_BY_COPY_SIZE} may be copied * by value here, as in BAP95. Large pieces are referenced without copy. * * @param left string on the left * @param right string on the right * @return concatenation representing the same sequence as the given strings */ static ByteString concatenate(ByteString left, ByteString right) { if (right.size() == 0) { return left; } if (left.size() == 0) { return right; } final int newLength = left.size() + right.size(); if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) { // Optimization from BAP95: For short (leaves in paper, but just short // here) total length, do a copy of data to a new leaf. return concatenateBytes(left, right); } if (left instanceof RopeByteString) { final RopeByteString leftRope = (RopeByteString) left; if (leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) { // Optimization from BAP95: As an optimization of the case where the // ByteString is constructed by repeated concatenate, recognize the case // where a short string is concatenated to a left-hand node whose // right-hand branch is short. In the paper this applies to leaves, but // we just look at the length here. This has the advantage of shedding // references to unneeded data when substrings have been taken. // // When we recognize this case, we do a copy of the data and create a // new parent node so that the depth of the result is the same as the // given left tree. ByteString newRight = concatenateBytes(leftRope.right, right); return new RopeByteString(leftRope.left, newRight); } if (leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth() && leftRope.getTreeDepth() > right.getTreeDepth()) { // Typically for concatenate-built strings the left-side is deeper than // the right. This is our final attempt to concatenate without // increasing the tree depth. We'll redo the node on the RHS. This // is yet another optimization for building the string by repeatedly // concatenating on the right. ByteString newRight = new RopeByteString(leftRope.right, right); return new RopeByteString(leftRope.left, newRight); } } // Fine, we'll add a node and increase the tree depth--unless we rebalance ;^) int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1; if (newLength >= minLength(newDepth)) { // The tree is shallow enough, so don't rebalance return new RopeByteString(left, right); } return new Balancer().balance(left, right); } /** * Concatenates two strings by copying data values. This is called in a few cases in order to * reduce the growth of the number of tree nodes. * * @param left string on the left * @param right string on the right * @return string formed by copying data bytes */ private static ByteString concatenateBytes(ByteString left, ByteString right) { int leftSize = left.size(); int rightSize = right.size(); byte[] bytes = new byte[leftSize + rightSize]; left.copyTo(bytes, 0, 0, leftSize); right.copyTo(bytes, 0, leftSize, rightSize); return ByteString.wrap(bytes); // Constructor wraps bytes } /** * Create a new RopeByteString for testing only while bypassing all the defenses of {@link * #concatenate(ByteString, ByteString)}. This allows testing trees of specific structure. We are * also able to insert empty leaves, though these are dis-allowed, so that we can make sure the * implementation can withstand their presence. * * @param left string on the left of this node * @param right string on the right of this node * @return an unsafe instance for testing only */ static RopeByteString newInstanceForTest(ByteString left, ByteString right) { return new RopeByteString(left, right); } /** * Returns the minimum length for which a tree of the given depth is considered balanced according * to BAP95, which means the tree is flat-enough with respect to the bounds. Defaults to {@code * Integer.MAX_VALUE} if {@code depth >= minLengthByDepth.length} in order to avoid an {@code * ArrayIndexOutOfBoundsException}. * * @param depth tree depth * @return minimum balanced length */ static int minLength(int depth) { if (depth >= minLengthByDepth.length) { return Integer.MAX_VALUE; } return minLengthByDepth[depth]; } /** * Gets the byte at the given index. Throws {@link ArrayIndexOutOfBoundsException} for * backwards-compatibility reasons although it would more properly be {@link * IndexOutOfBoundsException}. * * @param index index of byte * @return the value * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size */ @Override public byte byteAt(int index) { checkIndex(index, totalLength); return internalByteAt(index); } @Override byte internalByteAt(int index) { // Find the relevant piece by recursive descent if (index < leftLength) { return left.internalByteAt(index); } return right.internalByteAt(index - leftLength); } @Override public int size() { return totalLength; } @Override public ByteIterator iterator() { return new AbstractByteIterator() { final PieceIterator pieces = new PieceIterator(RopeByteString.this); ByteIterator current = nextPiece(); private ByteIterator nextPiece() { // NOTE: PieceIterator is guaranteed to return non-empty pieces, so this method will always // return non-empty iterators (or null) return pieces.hasNext() ? pieces.next().iterator() : null; } @Override public boolean hasNext() { return current != null; } @Override public byte nextByte() { if (current == null) { throw new NoSuchElementException(); } byte b = current.nextByte(); if (!current.hasNext()) { current = nextPiece(); } return b; } }; } // ================================================================= // Pieces @Override protected int getTreeDepth() { return treeDepth; } /** * Determines if the tree is balanced according to BAP95, which means the tree is flat-enough with * respect to the bounds. Note that this definition of balanced is one where sub-trees of balanced * trees are not necessarily balanced. * * @return true if the tree is balanced */ @Override protected boolean isBalanced() { return totalLength >= minLength(treeDepth); } /** * Takes a substring of this one. This involves recursive descent along the left and right edges * of the substring, and referencing any wholly contained segments in between. Any leaf nodes * entirely uninvolved in the substring will not be referenced by the substring. * *
Substrings of {@code length < 2} should result in at most a single recursive call chain,
* terminating at a leaf node. Thus the result will be a {@link
* com.google.protobuf.ByteString.LeafByteString}.
*
* @param beginIndex start at this index
* @param endIndex the last character is the one before this index
* @return substring leaf node or tree
*/
@Override
public ByteString substring(int beginIndex, int endIndex) {
final int length = checkRange(beginIndex, endIndex, totalLength);
if (length == 0) {
// Empty substring
return ByteString.EMPTY;
}
if (length == totalLength) {
// The whole string
return this;
}
// Proper substring
if (endIndex <= leftLength) {
// Substring on the left
return left.substring(beginIndex, endIndex);
}
if (beginIndex >= leftLength) {
// Substring on the right
return right.substring(beginIndex - leftLength, endIndex - leftLength);
}
// Split substring
ByteString leftSub = left.substring(beginIndex);
ByteString rightSub = right.substring(0, endIndex - leftLength);
// Intentionally not rebalancing, since in many cases these two
// substrings will already be less deep than the top-level
// RopeByteString we're taking a substring of.
return new RopeByteString(leftSub, rightSub);
}
// =================================================================
// ByteString -> byte[]
@Override
protected void copyToInternal(
byte[] target, int sourceOffset, int targetOffset, int numberToCopy) {
if (sourceOffset + numberToCopy <= leftLength) {
left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
} else if (sourceOffset >= leftLength) {
right.copyToInternal(target, sourceOffset - leftLength, targetOffset, numberToCopy);
} else {
int leftLength = this.leftLength - sourceOffset;
left.copyToInternal(target, sourceOffset, targetOffset, leftLength);
right.copyToInternal(target, 0, targetOffset + leftLength, numberToCopy - leftLength);
}
}
@Override
public void copyTo(ByteBuffer target) {
left.copyTo(target);
right.copyTo(target);
}
@Override
public ByteBuffer asReadOnlyByteBuffer() {
ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray());
return byteBuffer.asReadOnlyBuffer();
}
@Override
public List One surprising aspect of the algorithm is the result of balancing is not necessarily
* balanced, though it is nearly balanced. For details, see BAP95.
*/
private static class Balancer {
// Stack containing the part of the string, starting from the left, that
// we've already traversed. The final string should be the equivalent of
// concatenating the strings on the stack from bottom to top.
private final ArrayDeque If the length bin for our string, and all shorter length bins, are empty, we just push it
* on the stack. Otherwise, we need to start concatenating, putting the given string in the
* "middle" and continuing until we land in an empty length bin that matches the length of our
* concatenation.
*
* @param byteString string to place on the balance stack
*/
private void insert(ByteString byteString) {
int depthBin = getDepthBinForLength(byteString.size());
int binEnd = minLength(depthBin + 1);
// BAP95: Concatenate all trees occupying bins representing the length of
// our new piece or of shorter pieces, to the extent that is possible.
// The goal is to clear the bin which our piece belongs in, but that may
// not be entirely possible if there aren't enough longer bins occupied.
if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) {
prefixesStack.push(byteString);
} else {
int binStart = minLength(depthBin);
// Concatenate the subtrees of shorter length
ByteString newTree = prefixesStack.pop();
while (!prefixesStack.isEmpty() && prefixesStack.peek().size() < binStart) {
ByteString left = prefixesStack.pop();
newTree = new RopeByteString(left, newTree);
}
// Concatenate the given string
newTree = new RopeByteString(newTree, byteString);
// Continue concatenating until we land in an empty bin
while (!prefixesStack.isEmpty()) {
depthBin = getDepthBinForLength(newTree.size());
binEnd = minLength(depthBin + 1);
if (prefixesStack.peek().size() < binEnd) {
ByteString left = prefixesStack.pop();
newTree = new RopeByteString(left, newTree);
} else {
break;
}
}
prefixesStack.push(newTree);
}
}
private int getDepthBinForLength(int length) {
int depth = Arrays.binarySearch(minLengthByDepth, length);
if (depth < 0) {
// It wasn't an exact match, so convert to the index of the containing
// fragment, which is one less even than the insertion point.
int insertionPoint = -(depth + 1);
depth = insertionPoint - 1;
}
return depth;
}
}
/**
* This class is a continuable tree traversal, which keeps the state information which would exist
* on the stack in a recursive traversal instead on a stack of "Bread Crumbs". The maximum depth
* of the stack in this iterator is the same as the depth of the tree being traversed.
*
* This iterator is used to implement {@link RopeByteString#equalsFragments(ByteString)}.
*/
private static final class PieceIterator implements Iterator Note that {@link InputStream#read(byte[], int, int)} and {@link
* ByteArrayInputStream#read(byte[], int, int)} behave inconsistently when reading 0 bytes at
* EOF; the interface defines the return value to be 0 and the latter returns -1. We use the
* latter behavior so that all ByteString streams are consistent.
*
* @return -1 if at EOF, otherwise the actual number of bytes read.
*/
@Override
public int read(byte[] b, int offset, int length) {
if (b == null) {
throw new NullPointerException();
} else if (offset < 0 || length < 0 || length > b.length - offset) {
throw new IndexOutOfBoundsException();
}
int bytesRead = readSkipInternal(b, offset, length);
if (bytesRead == 0 && (length > 0 || availableInternal() == 0)) {
// Modeling ByteArrayInputStream.read(byte[], int, int) behavior noted above:
// It's ok to read 0 bytes on purpose (length == 0) from a stream that isn't at EOF.
// It's not ok to try to read bytes (even 0 bytes) from a stream that is at EOF.
return -1;
} else {
return bytesRead;
}
}
@Override
public long skip(long length) {
if (length < 0) {
throw new IndexOutOfBoundsException();
} else if (length > Integer.MAX_VALUE) {
length = Integer.MAX_VALUE;
}
return readSkipInternal(null, 0, (int) length);
}
/**
* Internal implementation of read and skip. If b != null, then read the next {@code length}
* bytes into the buffer {@code b} at offset {@code offset}. If b == null, then skip the next
* {@code length} bytes.
*
* This method assumes that all error checking has already happened.
*
* Returns the actual number of bytes read or skipped.
*/
private int readSkipInternal(byte[] b, int offset, int length) {
int bytesRemaining = length;
while (bytesRemaining > 0) {
advanceIfCurrentPieceFullyRead();
if (currentPiece == null) {
break;
} else {
// Copy the bytes from this piece.
int currentPieceRemaining = currentPieceSize - currentPieceIndex;
int count = Math.min(currentPieceRemaining, bytesRemaining);
if (b != null) {
currentPiece.copyTo(b, currentPieceIndex, offset, count);
offset += count;
}
currentPieceIndex += count;
bytesRemaining -= count;
}
}
// Return the number of bytes read.
return length - bytesRemaining;
}
@Override
public int read() throws IOException {
advanceIfCurrentPieceFullyRead();
if (currentPiece == null) {
return -1;
} else {
return currentPiece.byteAt(currentPieceIndex++) & 0xFF;
}
}
@Override
public int available() throws IOException {
return availableInternal();
}
@Override
public boolean markSupported() {
return true;
}
@Override
public void mark(int readAheadLimit) {
// Set the mark to our position in the byte string
mark = currentPieceOffsetInRope + currentPieceIndex;
}
@Override
public synchronized void reset() {
// Just reinitialize and skip the specified number of bytes.
initialize();
readSkipInternal(null, 0, mark);
}
/** Common initialization code used by both the constructor and reset() */
private void initialize() {
pieceIterator = new PieceIterator(RopeByteString.this);
currentPiece = pieceIterator.next();
currentPieceSize = currentPiece.size();
currentPieceIndex = 0;
currentPieceOffsetInRope = 0;
}
/**
* Skips to the next piece if we have read all the data in the current piece. Sets currentPiece
* to null if we have reached the end of the input.
*/
private void advanceIfCurrentPieceFullyRead() {
if (currentPiece != null && currentPieceIndex == currentPieceSize) {
// Generally, we can only go through this loop at most once, since
// empty strings can't end up in a rope. But better to test.
currentPieceOffsetInRope += currentPieceSize;
currentPieceIndex = 0;
if (pieceIterator.hasNext()) {
currentPiece = pieceIterator.next();
currentPieceSize = currentPiece.size();
} else {
currentPiece = null;
currentPieceSize = 0;
}
}
}
/** Computes the number of bytes still available to read. */
private int availableInternal() {
int bytesRead = currentPieceOffsetInRope + currentPieceIndex;
return RopeByteString.this.size() - bytesRead;
}
}
}