mirror of
https://github.com/asadbek064/hyparquet.git
synced 2026-01-07 03:26:38 +00:00
Parse additional metadata
This commit is contained in:
parent
eabf62f5a1
commit
3d5d423694
@ -137,13 +137,30 @@ export function parquetMetadata(arrayBuffer) {
|
||||
min: decode(column.field_3.field_12.field_2),
|
||||
null_count: column.field_3.field_12.field_3,
|
||||
distinct_count: column.field_3.field_12.field_4,
|
||||
max_value: decode(column.field_3.field_12.field_5),
|
||||
min_value: decode(column.field_3.field_12.field_6),
|
||||
is_max_value_exact: column.field_3.field_12.field_7,
|
||||
is_min_value_exact: column.field_3.field_12.field_8,
|
||||
},
|
||||
encoding_stats: column.field_3.field_13?.map((/** @type {any} */ encodingStat) => ({
|
||||
page_type: encodingStat.field_1,
|
||||
encoding: Encoding[encodingStat.field_2],
|
||||
count: encodingStat.field_3,
|
||||
})),
|
||||
bloom_filter_offset: column.field_3.field_14,
|
||||
bloom_filter_length: column.field_3.field_15,
|
||||
size_statistics: column.field_3.field_16 && {
|
||||
unencoded_byte_array_data_bytes: column.field_3.field_16.field_1,
|
||||
repetition_level_histogram: column.field_3.field_16.field_2,
|
||||
definition_level_histogram: column.field_3.field_16.field_3,
|
||||
},
|
||||
},
|
||||
offset_index_offset: column.field_4,
|
||||
offset_index_length: column.field_5,
|
||||
column_index_offset: column.field_6,
|
||||
column_index_length: column.field_7,
|
||||
crypto_metadata: column.field_7,
|
||||
encrypted_column_metadata: column.field_8,
|
||||
})),
|
||||
total_byte_size: rowGroup.field_2,
|
||||
num_rows: rowGroup.field_3,
|
||||
@ -152,6 +169,9 @@ export function parquetMetadata(arrayBuffer) {
|
||||
descending: sortingColumn.field_2,
|
||||
nulls_first: sortingColumn.field_3,
|
||||
})),
|
||||
file_offset: rowGroup.field_5,
|
||||
total_compressed_size: rowGroup.field_6,
|
||||
ordinal: rowGroup.field_7,
|
||||
}))
|
||||
const key_value_metadata = metadata.field_5?.map((/** @type {any} */ keyValue) => ({
|
||||
key: decode(keyValue.field_1),
|
||||
|
||||
@ -16,7 +16,9 @@
|
||||
"path_in_schema": ["int_map", "key_value", "key"],
|
||||
"statistics": {
|
||||
"max": "k3",
|
||||
"min": "k1"
|
||||
"min": "k1",
|
||||
"max_value": "k3",
|
||||
"min_value": "k1"
|
||||
},
|
||||
"total_compressed_size": 64,
|
||||
"total_uncompressed_size": 82,
|
||||
@ -33,7 +35,9 @@
|
||||
"path_in_schema": ["int_map", "key_value", "value"],
|
||||
"statistics": {
|
||||
"max": "d\u0000\u0000\u0000",
|
||||
"min": "\u0001\u0000\u0000\u0000"
|
||||
"min": "\u0001\u0000\u0000\u0000",
|
||||
"max_value": "d\u0000\u0000\u0000",
|
||||
"min_value": "\u0001\u0000\u0000\u0000"
|
||||
},
|
||||
"total_compressed_size": 60,
|
||||
"total_uncompressed_size": 59,
|
||||
@ -41,6 +45,7 @@
|
||||
}
|
||||
}
|
||||
],
|
||||
"file_offset": 4,
|
||||
"num_rows": 7,
|
||||
"total_byte_size": 74752
|
||||
}
|
||||
|
||||
@ -34,12 +34,15 @@
|
||||
"statistics": {
|
||||
"max": "Intersection",
|
||||
"min": "Block",
|
||||
"max_value": "Intersection",
|
||||
"min_value": "Block",
|
||||
"null_count": 1,
|
||||
"distinct_count": 2
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"file_offset": 4,
|
||||
"total_byte_size": 33024,
|
||||
"num_rows": 10
|
||||
}
|
||||
|
||||
@ -18,7 +18,10 @@
|
||||
"path_in_schema": [
|
||||
"long_col"
|
||||
],
|
||||
"statistics": {},
|
||||
"statistics": {
|
||||
"max_value": "\u0001\u0002\u0000\u0000\u0000\u0000\u0000\u0000",
|
||||
"min_value": "\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"
|
||||
},
|
||||
"total_compressed_size": 1467,
|
||||
"total_uncompressed_size": 4155,
|
||||
"type": "INT64"
|
||||
@ -26,7 +29,9 @@
|
||||
}
|
||||
],
|
||||
"num_rows": 513,
|
||||
"total_byte_size": 4155
|
||||
"ordinal": 0,
|
||||
"total_byte_size": 4155,
|
||||
"total_compressed_size": 1467
|
||||
}
|
||||
],
|
||||
"schema": [
|
||||
|
||||
@ -34,6 +34,8 @@
|
||||
"statistics": {
|
||||
"max": "\n\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
|
||||
"min": "\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
|
||||
"max_value": "\n\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
|
||||
"min_value": "\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
|
||||
"null_count": 0
|
||||
},
|
||||
"total_compressed_size": 146,
|
||||
@ -42,8 +44,11 @@
|
||||
}
|
||||
}
|
||||
],
|
||||
"file_offset": 4,
|
||||
"num_rows": 10,
|
||||
"ordinal": 0,
|
||||
"total_byte_size": 172,
|
||||
"num_rows": 10
|
||||
"total_compressed_size": 146
|
||||
},
|
||||
{
|
||||
"columns": [
|
||||
@ -63,6 +68,8 @@
|
||||
"statistics": {
|
||||
"max": "\u000f\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
|
||||
"min": "\u000b\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
|
||||
"max_value": "\u000f\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
|
||||
"min_value": "\u000b\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
|
||||
"null_count": 0
|
||||
},
|
||||
"total_compressed_size": 120,
|
||||
@ -71,8 +78,11 @@
|
||||
}
|
||||
}
|
||||
],
|
||||
"file_offset": 248,
|
||||
"num_rows": 5,
|
||||
"ordinal": 1,
|
||||
"total_byte_size": 126,
|
||||
"num_rows": 5
|
||||
"total_compressed_size": 120
|
||||
}
|
||||
],
|
||||
"key_value_metadata": [
|
||||
|
||||
Loading…
Reference in New Issue
Block a user