rle_boolean_encoding.parquet

This commit is contained in:
Kenny Daniel 2024-05-22 18:23:13 -07:00
parent 9369faad46
commit 1f8289b4b2
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
5 changed files with 126 additions and 0 deletions

@ -3,6 +3,7 @@ import { deltaBinaryUnpack, deltaByteArray, deltaLengthByteArray } from './delta
import { bitWidth, byteStreamSplit, readRleBitPackedHybrid } from './encoding.js'
import { readPlain } from './plain.js'
import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js'
import { readVarInt } from './thrift.js'
/**
* Read a data page from the given Uint8Array.
@ -31,6 +32,11 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata,
// assert(reader.offset === daph2.repetition_levels_byte_length)
// definition levels
const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath)
if (columnMetadata.type === 'BOOLEAN' && maxDefinitionLevel) {
// special case for boolean data page v2
readVarInt(reader) // assert(=== num_values)
}
const definitionLevels = readDefinitionLevelsV2(reader, daph2, schemaPath)
// assert(reader.offset === daph2.repetition_levels_byte_length + daph2.definition_levels_byte_length)

@ -57,6 +57,7 @@ function readRle(reader, count, bitWidth, output, seen) {
let value = 0
if (width === 1) {
value = reader.view.getUint8(reader.offset)
// assert(value < 1 << bitWidth)
} else if (width === 2) {
value = reader.view.getUint16(reader.offset, true)
} else if (width === 4) {

@ -0,0 +1,70 @@
[
[true],
[false],
[null],
[true],
[true],
[false],
[false],
[true],
[true],
[true],
[false],
[false],
[true],
[true],
[false],
[null],
[true],
[true],
[false],
[false],
[true],
[true],
[false],
[null],
[true],
[true],
[false],
[false],
[true],
[true],
[true],
[false],
[false],
[false],
[false],
[true],
[true],
[false],
[null],
[true],
[true],
[false],
[false],
[true],
[true],
[true],
[false],
[false],
[null],
[true],
[true],
[false],
[false],
[true],
[true],
[true],
[false],
[true],
[true],
[false],
[null],
[true],
[true],
[false],
[false],
[true],
[true],
[true]
]

@ -0,0 +1,49 @@
{
"version": 1,
"schema": [
{
"name": "table",
"num_children": 1
},
{
"type": "BOOLEAN",
"repetition_type": "OPTIONAL",
"name": "datatype_boolean",
"field_id": 1
}
],
"num_rows": 68,
"row_groups": [
{
"columns": [
{
"file_offset": 0,
"meta_data": {
"type": "BOOLEAN",
"encodings": [
"RLE"
],
"path_in_schema": [
"datatype_boolean"
],
"codec": "GZIP",
"num_values": 68,
"total_uncompressed_size": 49,
"total_compressed_size": 69,
"data_page_offset": 4,
"statistics": {
"max": true,
"min": false,
"null_count": 6,
"max_value": true,
"min_value": false
}
}
}
],
"total_byte_size": 69,
"num_rows": 68
}
],
"metadata_length": 111
}

Binary file not shown.