mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Use prepended length for bit-packed hybrid bool columns (#62)
This commit is contained in:
parent
cad751e546
commit
bf268e141c
@ -38,7 +38,8 @@ export function readDataPage(bytes, daph, schemaPath, { type }) {
|
||||
const bitWidth = type === 'BOOLEAN' ? 1 : view.getUint8(reader.offset++)
|
||||
if (bitWidth) {
|
||||
dataPage = new Array(nValues)
|
||||
readRleBitPackedHybrid(reader, bitWidth, view.byteLength - reader.offset, dataPage)
|
||||
const encodedLength = type === 'BOOLEAN' ? 0 : view.byteLength - reader.offset
|
||||
readRleBitPackedHybrid(reader, bitWidth, encodedLength, dataPage)
|
||||
} else {
|
||||
dataPage = new Uint8Array(nValues) // nValue zeroes
|
||||
}
|
||||
|
||||
@ -22,7 +22,7 @@ export function bitWidth(value) {
|
||||
*/
|
||||
export function readRleBitPackedHybrid(reader, width, length, output) {
|
||||
if (!length) {
|
||||
// length = reader.view.getUint32(reader.offset, true)
|
||||
length = reader.view.getUint32(reader.offset, true)
|
||||
reader.offset += 4
|
||||
}
|
||||
let seen = 0
|
||||
|
||||
17
test/files/boolean_rle.json
Normal file
17
test/files/boolean_rle.json
Normal file
@ -0,0 +1,17 @@
|
||||
[
|
||||
[1],
|
||||
[1],
|
||||
[1],
|
||||
[1],
|
||||
[1],
|
||||
[null],
|
||||
[null],
|
||||
[null],
|
||||
[null],
|
||||
[null],
|
||||
[0],
|
||||
[0],
|
||||
[0],
|
||||
[0],
|
||||
[0]
|
||||
]
|
||||
61
test/files/boolean_rle.metadata.json
Normal file
61
test/files/boolean_rle.metadata.json
Normal file
@ -0,0 +1,61 @@
|
||||
{
|
||||
"created_by": "Polars",
|
||||
"key_value_metadata": [
|
||||
{
|
||||
"key": "ARROW:schema",
|
||||
"value": "/////3YAAAAEAAAA8v///xQAAAAEAAEAAAAKAAsACAAKAAQA+P///wwAAAAIAAgAAAAEAAEAAAAEAAAA7P///ywAAAAgAAAAGAAAAAEGAAAQABIABAAQABEACAAAAAwAAAAAAPz///8EAAQADQAAAEJvb2xlYW5Db2x1bW4A"
|
||||
}
|
||||
],
|
||||
"metadata_length": 308,
|
||||
"num_rows": 15,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
{
|
||||
"column_index_length": 17,
|
||||
"column_index_offset": 89,
|
||||
"crypto_metadata": 17,
|
||||
"file_offset": 47,
|
||||
"meta_data": {
|
||||
"codec": "SNAPPY",
|
||||
"data_page_offset": 4,
|
||||
"encodings": [
|
||||
"RLE"
|
||||
],
|
||||
"num_values": 15,
|
||||
"path_in_schema": [
|
||||
"BooleanColumn"
|
||||
],
|
||||
"statistics": {
|
||||
"max_value": true,
|
||||
"min_value": false,
|
||||
"null_count": 5
|
||||
},
|
||||
"total_compressed_size": 43,
|
||||
"total_uncompressed_size": 41,
|
||||
"type": "BOOLEAN"
|
||||
},
|
||||
"offset_index_length": 10,
|
||||
"offset_index_offset": 106
|
||||
}
|
||||
],
|
||||
"file_offset": 4,
|
||||
"num_rows": 15,
|
||||
"ordinal": 0,
|
||||
"total_byte_size": 41,
|
||||
"total_compressed_size": 43
|
||||
}
|
||||
],
|
||||
"schema": [
|
||||
{
|
||||
"name": "root",
|
||||
"num_children": 1
|
||||
},
|
||||
{
|
||||
"name": "BooleanColumn",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"type": "BOOLEAN"
|
||||
}
|
||||
],
|
||||
"version": 1
|
||||
}
|
||||
BIN
test/files/boolean_rle.parquet
Normal file
BIN
test/files/boolean_rle.parquet
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user