mirror of
https://github.com/asadbek064/hyparquet.git
synced 2026-01-09 04:26:38 +00:00
dict-page-offset-zero.parquet
This commit is contained in:
parent
4f7791354c
commit
5eeb05da40
@ -28,14 +28,14 @@ import { concat } from './utils.js'
|
||||
export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, schemaPath, compressors) {
|
||||
/** @type {ArrayLike<any> | undefined} */
|
||||
let dictionary = undefined
|
||||
let valuesSeen = 0
|
||||
let seen = 0
|
||||
/** @type {any[]} */
|
||||
const rowData = []
|
||||
const { element } = schemaPath[schemaPath.length - 1]
|
||||
// column reader:
|
||||
const reader = { view: new DataView(arrayBuffer, columnOffset), offset: 0 }
|
||||
|
||||
while (valuesSeen < rowGroup.num_rows) {
|
||||
while (seen < rowGroup.num_rows) {
|
||||
// parse column header
|
||||
const header = parquetHeader(reader)
|
||||
if (header.compressed_page_size === undefined) {
|
||||
@ -58,7 +58,7 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
compressedBytes, Number(header.uncompressed_page_size), columnMetadata.codec, compressors
|
||||
)
|
||||
const { definitionLevels, repetitionLevels, dataPage } = readDataPage(page, daph, schemaPath, columnMetadata)
|
||||
valuesSeen += daph.num_values
|
||||
seen += daph.num_values
|
||||
// assert(!daph.statistics || daph.statistics.null_count === BigInt(daph.num_values - dataPage.length))
|
||||
|
||||
// construct output values: skip nulls and construct lists
|
||||
@ -89,7 +89,7 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
const { definitionLevels, repetitionLevels, dataPage } = readDataPageV2(
|
||||
compressedBytes, header, schemaPath, columnMetadata, compressors
|
||||
)
|
||||
valuesSeen += daph2.num_values
|
||||
seen += daph2.num_values
|
||||
|
||||
dereferenceDictionary(dictionary, dataPage)
|
||||
values = convert(dataPage, element)
|
||||
@ -145,7 +145,7 @@ function dereferenceDictionary(dictionary, dataPage) {
|
||||
*/
|
||||
export function getColumnOffset({ dictionary_page_offset, data_page_offset }) {
|
||||
let columnOffset = dictionary_page_offset
|
||||
if (dictionary_page_offset === undefined || data_page_offset < dictionary_page_offset) {
|
||||
if (!dictionary_page_offset || data_page_offset < dictionary_page_offset) {
|
||||
columnOffset = data_page_offset
|
||||
}
|
||||
return Number(columnOffset)
|
||||
|
||||
41
test/files/dict-page-offset-zero.json
Normal file
41
test/files/dict-page-offset-zero.json
Normal file
@ -0,0 +1,41 @@
|
||||
[
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552],
|
||||
[1552]
|
||||
]
|
||||
94
test/files/dict-page-offset-zero.metadata.json
Normal file
94
test/files/dict-page-offset-zero.metadata.json
Normal file
@ -0,0 +1,94 @@
|
||||
{
|
||||
"version": 1,
|
||||
"schema": [
|
||||
{
|
||||
"name": "root",
|
||||
"num_children": 1
|
||||
},
|
||||
{
|
||||
"type": "INT32",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "l_partkey"
|
||||
}
|
||||
],
|
||||
"num_rows": 39,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
{
|
||||
"file_offset": 4,
|
||||
"meta_data": {
|
||||
"type": "INT32",
|
||||
"encodings": [
|
||||
"PLAIN",
|
||||
"BIT_PACKED",
|
||||
"RLE"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"l_partkey"
|
||||
],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 39,
|
||||
"total_uncompressed_size": 180,
|
||||
"total_compressed_size": 40,
|
||||
"data_page_offset": 4,
|
||||
"dictionary_page_offset": 0,
|
||||
"statistics": {
|
||||
"max": 1552,
|
||||
"min": 1552,
|
||||
"null_count": 0,
|
||||
"max_value": 1552,
|
||||
"min_value": 1552
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 0,
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"bloom_filter_length": [
|
||||
{
|
||||
"field_1": {
|
||||
"field_1": 0,
|
||||
"field_2": 162,
|
||||
"field_3": 22,
|
||||
"field_5": {
|
||||
"field_1": 39,
|
||||
"field_2": 0,
|
||||
"field_3": 3,
|
||||
"field_4": 4
|
||||
}
|
||||
},
|
||||
"field_2": 22
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 67,
|
||||
"offset_index_length": 10,
|
||||
"column_index_offset": 44,
|
||||
"column_index_length": 23,
|
||||
"crypto_metadata": 23
|
||||
}
|
||||
],
|
||||
"total_byte_size": 180,
|
||||
"num_rows": 39
|
||||
}
|
||||
],
|
||||
"key_value_metadata": [
|
||||
{
|
||||
"key": "is.date.correct",
|
||||
"value": "true"
|
||||
},
|
||||
{
|
||||
"key": "dremio.arrow.schema.2.1",
|
||||
"value": "{\n \"fields\" : [ {\n \"name\" : \"l_partkey\",\n \"nullable\" : true,\n \"type\" : {\n \"name\" : \"int\",\n \"bitWidth\" : 32,\n \"isSigned\" : true\n },\n \"children\" : [ ]\n } ]\n}"
|
||||
},
|
||||
{
|
||||
"key": "dremio.version",
|
||||
"value": "3.2.0-201905102005330382-0598733"
|
||||
}
|
||||
],
|
||||
"created_by": "parquet-mr version 1.12.0-201812210311360288-a86293f (build cec1a483e9dcd545e09170ae787d3dcb13744433)",
|
||||
"metadata_length": 550
|
||||
}
|
||||
BIN
test/files/dict-page-offset-zero.parquet
Normal file
BIN
test/files/dict-page-offset-zero.parquet
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user