mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Delta length byte array encoding
This commit is contained in:
parent
da72c06ac2
commit
1689d7473a
@ -182,7 +182,7 @@ Parquet encodings:
|
||||
- [X] BIT_PACKED
|
||||
- [X] DELTA_BINARY_PACKED
|
||||
- [X] DELTA_BYTE_ARRAY
|
||||
- [ ] DELTA_LENGTH_BYTE_ARRAY
|
||||
- [X] DELTA_LENGTH_BYTE_ARRAY
|
||||
- [ ] BYTE_STREAM_SPLIT
|
||||
|
||||
## Hysnappy
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { decompressPage } from './column.js'
|
||||
import { deltaBinaryUnpack, deltaByteArray } from './delta.js'
|
||||
import { deltaBinaryUnpack, deltaByteArray, deltaLengthByteArray } from './delta.js'
|
||||
import { readRleBitPackedHybrid, widthFromMaxInt } from './encoding.js'
|
||||
import { readPlain } from './plain.js'
|
||||
import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js'
|
||||
@ -67,6 +67,9 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata,
|
||||
const int32 = columnMetadata.type === 'INT32'
|
||||
dataPage = int32 ? new Int32Array(nValues) : new BigInt64Array(nValues)
|
||||
deltaBinaryUnpack(pageReader, nValues, dataPage)
|
||||
} else if (daph2.encoding === 'DELTA_LENGTH_BYTE_ARRAY') {
|
||||
dataPage = new Array(nValues)
|
||||
deltaLengthByteArray(pageReader, nValues, dataPage)
|
||||
} else if (daph2.encoding === 'DELTA_BYTE_ARRAY') {
|
||||
dataPage = new Array(nValues)
|
||||
deltaByteArray(pageReader, nValues, dataPage)
|
||||
|
||||
14
src/delta.js
14
src/delta.js
@ -61,6 +61,20 @@ export function deltaBinaryUnpack(reader, nValues, output) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {DataReader} reader
|
||||
* @param {number} nValues
|
||||
* @param {Uint8Array[]} output
|
||||
*/
|
||||
export function deltaLengthByteArray(reader, nValues, output) {
|
||||
const lengths = new Int32Array(nValues)
|
||||
deltaBinaryUnpack(reader, nValues, lengths)
|
||||
for (let i = 0; i < nValues; i++) {
|
||||
output[i] = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, lengths[i])
|
||||
reader.offset += lengths[i]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {DataReader} reader
|
||||
* @param {number} nValues
|
||||
|
||||
1002
test/files/delta_length_byte_array.json
Normal file
1002
test/files/delta_length_byte_array.json
Normal file
File diff suppressed because it is too large
Load Diff
59
test/files/delta_length_byte_array.metadata.json
Normal file
59
test/files/delta_length_byte_array.metadata.json
Normal file
@ -0,0 +1,59 @@
|
||||
{
|
||||
"version": 2,
|
||||
"schema": [
|
||||
{
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "schema",
|
||||
"num_children": 1,
|
||||
"field_id": -1
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "FRUIT",
|
||||
"converted_type": "UTF8",
|
||||
"field_id": 1,
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
}
|
||||
],
|
||||
"num_rows": 1000,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
{
|
||||
"file_offset": 2629,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"RLE",
|
||||
"DELTA_LENGTH_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"FRUIT"
|
||||
],
|
||||
"codec": "ZSTD",
|
||||
"num_values": 1000,
|
||||
"total_uncompressed_size": 23747,
|
||||
"total_compressed_size": 2625,
|
||||
"data_page_offset": 4,
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_LENGTH_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"total_byte_size": 23747,
|
||||
"num_rows": 1000,
|
||||
"file_offset": 0,
|
||||
"total_compressed_size": 2625,
|
||||
"ordinal": 0
|
||||
}
|
||||
],
|
||||
"metadata_length": 105
|
||||
}
|
||||
BIN
test/files/delta_length_byte_array.parquet
Normal file
BIN
test/files/delta_length_byte_array.parquet
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user