mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-13 09:26:37 +00:00
Addresses issues with duckdb use of delta encodings (#77)
* Addresses issues with duckdb use of delta encodings * Shrunk size of test data
This commit is contained in:
parent
7f31c1e5b6
commit
dbf3065f8e
@ -48,6 +48,13 @@ export function readDataPage(bytes, daph, { type, element, schemaPath }) {
|
||||
}
|
||||
} else if (daph.encoding === 'BYTE_STREAM_SPLIT') {
|
||||
dataPage = byteStreamSplit(reader, nValues, type, element.type_length)
|
||||
} else if (daph.encoding === 'DELTA_BINARY_PACKED') {
|
||||
const int32 = type === 'INT32'
|
||||
dataPage = int32 ? new Int32Array(nValues) : new BigInt64Array(nValues)
|
||||
deltaBinaryUnpack(reader, nValues, dataPage)
|
||||
} else if (daph.encoding === 'DELTA_LENGTH_BYTE_ARRAY') {
|
||||
dataPage = new Array(nValues)
|
||||
deltaLengthByteArray(reader, nValues, dataPage)
|
||||
} else {
|
||||
throw new Error(`parquet unsupported encoding: ${daph.encoding}`)
|
||||
}
|
||||
|
||||
1252
test/files/duckdb_delta_binary_packed.json
Normal file
1252
test/files/duckdb_delta_binary_packed.json
Normal file
File diff suppressed because it is too large
Load Diff
48
test/files/duckdb_delta_binary_packed.metadata.json
Normal file
48
test/files/duckdb_delta_binary_packed.metadata.json
Normal file
@ -0,0 +1,48 @@
|
||||
{
|
||||
"version": 1,
|
||||
"schema": [
|
||||
{
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "duckdb_schema",
|
||||
"num_children": 1
|
||||
},
|
||||
{
|
||||
"type": "INT64",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "range",
|
||||
"converted_type": "INT_64"
|
||||
}
|
||||
],
|
||||
"num_rows": 1250,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "INT64",
|
||||
"encodings": ["DELTA_BINARY_PACKED"],
|
||||
"path_in_schema": ["range"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 1250,
|
||||
"total_uncompressed_size": 40,
|
||||
"total_compressed_size": 42,
|
||||
"data_page_offset": 4,
|
||||
"statistics": {
|
||||
"max": 1249,
|
||||
"min": 0,
|
||||
"null_count": 0,
|
||||
"max_value": 1249,
|
||||
"min_value": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"total_byte_size": 40,
|
||||
"num_rows": 1250,
|
||||
"file_offset": 4
|
||||
}
|
||||
],
|
||||
"created_by": "DuckDB version v1.2.1 (build 8e52ec4395)",
|
||||
"metadata_length": 169
|
||||
}
|
||||
BIN
test/files/duckdb_delta_binary_packed.parquet
Normal file
BIN
test/files/duckdb_delta_binary_packed.parquet
Normal file
Binary file not shown.
1252
test/files/duckdb_delta_length_byte_array.json
Normal file
1252
test/files/duckdb_delta_length_byte_array.json
Normal file
File diff suppressed because it is too large
Load Diff
48
test/files/duckdb_delta_length_byte_array.metadata.json
Normal file
48
test/files/duckdb_delta_length_byte_array.metadata.json
Normal file
@ -0,0 +1,48 @@
|
||||
{
|
||||
"version": 1,
|
||||
"schema": [
|
||||
{
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "duckdb_schema",
|
||||
"num_children": 1
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "range_varchar",
|
||||
"converted_type": "UTF8"
|
||||
}
|
||||
],
|
||||
"num_rows": 1250,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": ["DELTA_LENGTH_BYTE_ARRAY"],
|
||||
"path_in_schema": ["range_varchar"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 1250,
|
||||
"total_uncompressed_size": 3996,
|
||||
"total_compressed_size": 3390,
|
||||
"data_page_offset": 4,
|
||||
"statistics": {
|
||||
"max": "999",
|
||||
"min": "0",
|
||||
"null_count": 0,
|
||||
"max_value": "999",
|
||||
"min_value": "0"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"total_byte_size": 3996,
|
||||
"num_rows": 1250,
|
||||
"file_offset": 4
|
||||
}
|
||||
],
|
||||
"created_by": "DuckDB version v1.2.1 (build 8e52ec4395)",
|
||||
"metadata_length": 164
|
||||
}
|
||||
BIN
test/files/duckdb_delta_length_byte_array.parquet
Normal file
BIN
test/files/duckdb_delta_length_byte_array.parquet
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user