mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-29 16:36:37 +00:00
rle_boolean_encoding.parquet
This commit is contained in:
parent
9369faad46
commit
1f8289b4b2
@ -3,6 +3,7 @@ import { deltaBinaryUnpack, deltaByteArray, deltaLengthByteArray } from './delta
|
||||
import { bitWidth, byteStreamSplit, readRleBitPackedHybrid } from './encoding.js'
|
||||
import { readPlain } from './plain.js'
|
||||
import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js'
|
||||
import { readVarInt } from './thrift.js'
|
||||
|
||||
/**
|
||||
* Read a data page from the given Uint8Array.
|
||||
@ -31,6 +32,11 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata,
|
||||
// assert(reader.offset === daph2.repetition_levels_byte_length)
|
||||
|
||||
// definition levels
|
||||
const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath)
|
||||
if (columnMetadata.type === 'BOOLEAN' && maxDefinitionLevel) {
|
||||
// special case for boolean data page v2
|
||||
readVarInt(reader) // assert(=== num_values)
|
||||
}
|
||||
const definitionLevels = readDefinitionLevelsV2(reader, daph2, schemaPath)
|
||||
// assert(reader.offset === daph2.repetition_levels_byte_length + daph2.definition_levels_byte_length)
|
||||
|
||||
|
||||
@ -57,6 +57,7 @@ function readRle(reader, count, bitWidth, output, seen) {
|
||||
let value = 0
|
||||
if (width === 1) {
|
||||
value = reader.view.getUint8(reader.offset)
|
||||
// assert(value < 1 << bitWidth)
|
||||
} else if (width === 2) {
|
||||
value = reader.view.getUint16(reader.offset, true)
|
||||
} else if (width === 4) {
|
||||
|
||||
70
test/files/rle_boolean_encoding.json
Normal file
70
test/files/rle_boolean_encoding.json
Normal file
@ -0,0 +1,70 @@
|
||||
[
|
||||
[true],
|
||||
[false],
|
||||
[null],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[false],
|
||||
[true],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[false],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[null],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[false],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[null],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[false],
|
||||
[true],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[false],
|
||||
[false],
|
||||
[false],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[null],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[false],
|
||||
[true],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[false],
|
||||
[null],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[false],
|
||||
[true],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[null],
|
||||
[true],
|
||||
[true],
|
||||
[false],
|
||||
[false],
|
||||
[true],
|
||||
[true],
|
||||
[true]
|
||||
]
|
||||
49
test/files/rle_boolean_encoding.metadata.json
Normal file
49
test/files/rle_boolean_encoding.metadata.json
Normal file
@ -0,0 +1,49 @@
|
||||
{
|
||||
"version": 1,
|
||||
"schema": [
|
||||
{
|
||||
"name": "table",
|
||||
"num_children": 1
|
||||
},
|
||||
{
|
||||
"type": "BOOLEAN",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "datatype_boolean",
|
||||
"field_id": 1
|
||||
}
|
||||
],
|
||||
"num_rows": 68,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "BOOLEAN",
|
||||
"encodings": [
|
||||
"RLE"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"datatype_boolean"
|
||||
],
|
||||
"codec": "GZIP",
|
||||
"num_values": 68,
|
||||
"total_uncompressed_size": 49,
|
||||
"total_compressed_size": 69,
|
||||
"data_page_offset": 4,
|
||||
"statistics": {
|
||||
"max": true,
|
||||
"min": false,
|
||||
"null_count": 6,
|
||||
"max_value": true,
|
||||
"min_value": false
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"total_byte_size": 69,
|
||||
"num_rows": 68
|
||||
}
|
||||
],
|
||||
"metadata_length": 111
|
||||
}
|
||||
BIN
test/files/rle_boolean_encoding.parquet
Normal file
BIN
test/files/rle_boolean_encoding.parquet
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user