Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.apache.parquet.column.values.deltastrings;

import java.util.Arrays;
import org.apache.parquet.bytes.ByteBufferAllocator;
import org.apache.parquet.bytes.BytesInput;
import org.apache.parquet.column.Encoding;
Expand Down Expand Up @@ -88,14 +89,19 @@ public String memUsageString(String prefix) {

@Override
public void writeBytes(Binary v) {
int i = 0;
byte[] vb = v.getBytes();
int length = previous.length < vb.length ? previous.length : vb.length;
// find the number of matching prefix bytes between this value and the previous one
for (i = 0; (i < length) && (previous[i] == vb[i]); i++)
;
byte[] vb = v.getBytesUnsafe();
int length = Math.min(previous.length, vb.length);
// Find the number of matching prefix bytes between this value and the previous one.
// Arrays.mismatch is intrinsified by the JVM to use SIMD instructions.
int i = Arrays.mismatch(previous, 0, length, vb, 0, length);
if (i < 0) {
i = length; // all bytes in the common range matched
}
prefixLengthWriter.writeInteger(i);
suffixWriter.writeBytes(v.slice(i, vb.length - i));
previous = vb;
// Retain an owned copy for prefix comparison with the next value.
// getBytesUnsafe() may return the backing array directly, so we must copy
// if the Binary's backing bytes may be reused by the caller.
previous = v.isBackingBytesReused() ? v.getBytes() : vb;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.parquet.column.values.deltastrings;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import org.apache.parquet.bytes.ByteBufferInputStream;
import org.apache.parquet.bytes.DirectByteBufferAllocator;
import org.apache.parquet.column.values.Utils;
Expand Down Expand Up @@ -128,4 +129,24 @@ public void testWriterReset() throws Exception {

assertReadWrite(writer, new DeltaByteArrayReader(), values);
}

@Test
public void testReusedBackingArrayRegression() throws Exception {
DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
DeltaByteArrayReader reader = new DeltaByteArrayReader();

byte[] buffer = "parquet-000".getBytes(StandardCharsets.UTF_8);
writer.writeBytes(Binary.fromReusedByteArray(buffer));

System.arraycopy("parquet-111".getBytes(StandardCharsets.UTF_8), 0, buffer, 0, buffer.length);
writer.writeBytes(Binary.fromReusedByteArray(buffer));

System.arraycopy("parquet-222".getBytes(StandardCharsets.UTF_8), 0, buffer, 0, buffer.length);
writer.writeBytes(Binary.fromReusedByteArray(buffer));

Binary[] decoded = Utils.readData(reader, writer.getBytes().toInputStream(), 3);
Assert.assertEquals(Binary.fromString("parquet-000"), decoded[0]);
Assert.assertEquals(Binary.fromString("parquet-111"), decoded[1]);
Assert.assertEquals(Binary.fromString("parquet-222"), decoded[2]);
}
}