diff --git a/src/datapage.js b/src/datapage.js index 69c79af..5d0b07c 100644 --- a/src/datapage.js +++ b/src/datapage.js @@ -38,7 +38,8 @@ export function readDataPage(bytes, daph, schemaPath, { type }) { const bitWidth = type === 'BOOLEAN' ? 1 : view.getUint8(reader.offset++) if (bitWidth) { dataPage = new Array(nValues) - readRleBitPackedHybrid(reader, bitWidth, view.byteLength - reader.offset, dataPage) + const encodedLength = type === 'BOOLEAN' ? 0 : view.byteLength - reader.offset + readRleBitPackedHybrid(reader, bitWidth, encodedLength, dataPage) } else { dataPage = new Uint8Array(nValues) // nValue zeroes } diff --git a/src/encoding.js b/src/encoding.js index 5d315a3..6f1f4ab 100644 --- a/src/encoding.js +++ b/src/encoding.js @@ -22,7 +22,7 @@ export function bitWidth(value) { */ export function readRleBitPackedHybrid(reader, width, length, output) { if (!length) { - // length = reader.view.getUint32(reader.offset, true) + length = reader.view.getUint32(reader.offset, true) reader.offset += 4 } let seen = 0 diff --git a/test/files/boolean_rle.json b/test/files/boolean_rle.json new file mode 100644 index 0000000..7c29a74 --- /dev/null +++ b/test/files/boolean_rle.json @@ -0,0 +1,17 @@ +[ + [1], + [1], + [1], + [1], + [1], + [null], + [null], + [null], + [null], + [null], + [0], + [0], + [0], + [0], + [0] +] diff --git a/test/files/boolean_rle.metadata.json b/test/files/boolean_rle.metadata.json new file mode 100644 index 0000000..43c937a --- /dev/null +++ b/test/files/boolean_rle.metadata.json @@ -0,0 +1,61 @@ +{ + "created_by": "Polars", + "key_value_metadata": [ + { + "key": "ARROW:schema", + "value": "/////3YAAAAEAAAA8v///xQAAAAEAAEAAAAKAAsACAAKAAQA+P///wwAAAAIAAgAAAAEAAEAAAAEAAAA7P///ywAAAAgAAAAGAAAAAEGAAAQABIABAAQABEACAAAAAwAAAAAAPz///8EAAQADQAAAEJvb2xlYW5Db2x1bW4A" + } + ], + "metadata_length": 308, + "num_rows": 15, + "row_groups": [ + { + "columns": [ + { + "column_index_length": 17, + "column_index_offset": 89, + "crypto_metadata": 17, + "file_offset": 47, + "meta_data": { + "codec": "SNAPPY", + "data_page_offset": 4, + "encodings": [ + "RLE" + ], + "num_values": 15, + "path_in_schema": [ + "BooleanColumn" + ], + "statistics": { + "max_value": true, + "min_value": false, + "null_count": 5 + }, + "total_compressed_size": 43, + "total_uncompressed_size": 41, + "type": "BOOLEAN" + }, + "offset_index_length": 10, + "offset_index_offset": 106 + } + ], + "file_offset": 4, + "num_rows": 15, + "ordinal": 0, + "total_byte_size": 41, + "total_compressed_size": 43 + } + ], + "schema": [ + { + "name": "root", + "num_children": 1 + }, + { + "name": "BooleanColumn", + "repetition_type": "OPTIONAL", + "type": "BOOLEAN" + } + ], + "version": 1 +} diff --git a/test/files/boolean_rle.parquet b/test/files/boolean_rle.parquet new file mode 100644 index 0000000..0041319 Binary files /dev/null and b/test/files/boolean_rle.parquet differ