Use prepended length for bit-packed hybrid bool columns (#62)

This commit is contained in:
Johan Levin 2025-02-19 20:07:49 +01:00 committed by GitHub
parent cad751e546
commit bf268e141c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 81 additions and 2 deletions

@ -38,7 +38,8 @@ export function readDataPage(bytes, daph, schemaPath, { type }) {
const bitWidth = type === 'BOOLEAN' ? 1 : view.getUint8(reader.offset++)
if (bitWidth) {
dataPage = new Array(nValues)
readRleBitPackedHybrid(reader, bitWidth, view.byteLength - reader.offset, dataPage)
const encodedLength = type === 'BOOLEAN' ? 0 : view.byteLength - reader.offset
readRleBitPackedHybrid(reader, bitWidth, encodedLength, dataPage)
} else {
dataPage = new Uint8Array(nValues) // nValue zeroes
}

@ -22,7 +22,7 @@ export function bitWidth(value) {
*/
export function readRleBitPackedHybrid(reader, width, length, output) {
if (!length) {
// length = reader.view.getUint32(reader.offset, true)
length = reader.view.getUint32(reader.offset, true)
reader.offset += 4
}
let seen = 0

@ -0,0 +1,17 @@
[
[1],
[1],
[1],
[1],
[1],
[null],
[null],
[null],
[null],
[null],
[0],
[0],
[0],
[0],
[0]
]

@ -0,0 +1,61 @@
{
"created_by": "Polars",
"key_value_metadata": [
{
"key": "ARROW:schema",
"value": "/////3YAAAAEAAAA8v///xQAAAAEAAEAAAAKAAsACAAKAAQA+P///wwAAAAIAAgAAAAEAAEAAAAEAAAA7P///ywAAAAgAAAAGAAAAAEGAAAQABIABAAQABEACAAAAAwAAAAAAPz///8EAAQADQAAAEJvb2xlYW5Db2x1bW4A"
}
],
"metadata_length": 308,
"num_rows": 15,
"row_groups": [
{
"columns": [
{
"column_index_length": 17,
"column_index_offset": 89,
"crypto_metadata": 17,
"file_offset": 47,
"meta_data": {
"codec": "SNAPPY",
"data_page_offset": 4,
"encodings": [
"RLE"
],
"num_values": 15,
"path_in_schema": [
"BooleanColumn"
],
"statistics": {
"max_value": true,
"min_value": false,
"null_count": 5
},
"total_compressed_size": 43,
"total_uncompressed_size": 41,
"type": "BOOLEAN"
},
"offset_index_length": 10,
"offset_index_offset": 106
}
],
"file_offset": 4,
"num_rows": 15,
"ordinal": 0,
"total_byte_size": 41,
"total_compressed_size": 43
}
],
"schema": [
{
"name": "root",
"num_children": 1
},
{
"name": "BooleanColumn",
"repetition_type": "OPTIONAL",
"type": "BOOLEAN"
}
],
"version": 1
}

Binary file not shown.