Parse additional metadata

This commit is contained in:
Kenny Daniel 2024-05-04 00:28:22 -07:00
parent eabf62f5a1
commit 3d5d423694
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
5 changed files with 49 additions and 6 deletions

@ -137,13 +137,30 @@ export function parquetMetadata(arrayBuffer) {
min: decode(column.field_3.field_12.field_2),
null_count: column.field_3.field_12.field_3,
distinct_count: column.field_3.field_12.field_4,
max_value: decode(column.field_3.field_12.field_5),
min_value: decode(column.field_3.field_12.field_6),
is_max_value_exact: column.field_3.field_12.field_7,
is_min_value_exact: column.field_3.field_12.field_8,
},
encoding_stats: column.field_3.field_13?.map((/** @type {any} */ encodingStat) => ({
page_type: encodingStat.field_1,
encoding: Encoding[encodingStat.field_2],
count: encodingStat.field_3,
})),
bloom_filter_offset: column.field_3.field_14,
bloom_filter_length: column.field_3.field_15,
size_statistics: column.field_3.field_16 && {
unencoded_byte_array_data_bytes: column.field_3.field_16.field_1,
repetition_level_histogram: column.field_3.field_16.field_2,
definition_level_histogram: column.field_3.field_16.field_3,
},
},
offset_index_offset: column.field_4,
offset_index_length: column.field_5,
column_index_offset: column.field_6,
column_index_length: column.field_7,
crypto_metadata: column.field_7,
encrypted_column_metadata: column.field_8,
})),
total_byte_size: rowGroup.field_2,
num_rows: rowGroup.field_3,
@ -152,6 +169,9 @@ export function parquetMetadata(arrayBuffer) {
descending: sortingColumn.field_2,
nulls_first: sortingColumn.field_3,
})),
file_offset: rowGroup.field_5,
total_compressed_size: rowGroup.field_6,
ordinal: rowGroup.field_7,
}))
const key_value_metadata = metadata.field_5?.map((/** @type {any} */ keyValue) => ({
key: decode(keyValue.field_1),

@ -16,7 +16,9 @@
"path_in_schema": ["int_map", "key_value", "key"],
"statistics": {
"max": "k3",
"min": "k1"
"min": "k1",
"max_value": "k3",
"min_value": "k1"
},
"total_compressed_size": 64,
"total_uncompressed_size": 82,
@ -33,7 +35,9 @@
"path_in_schema": ["int_map", "key_value", "value"],
"statistics": {
"max": "d\u0000\u0000\u0000",
"min": "\u0001\u0000\u0000\u0000"
"min": "\u0001\u0000\u0000\u0000",
"max_value": "d\u0000\u0000\u0000",
"min_value": "\u0001\u0000\u0000\u0000"
},
"total_compressed_size": 60,
"total_uncompressed_size": 59,
@ -41,6 +45,7 @@
}
}
],
"file_offset": 4,
"num_rows": 7,
"total_byte_size": 74752
}

@ -34,12 +34,15 @@
"statistics": {
"max": "Intersection",
"min": "Block",
"max_value": "Intersection",
"min_value": "Block",
"null_count": 1,
"distinct_count": 2
}
}
}
],
"file_offset": 4,
"total_byte_size": 33024,
"num_rows": 10
}

@ -18,7 +18,10 @@
"path_in_schema": [
"long_col"
],
"statistics": {},
"statistics": {
"max_value": "\u0001\u0002\u0000\u0000\u0000\u0000\u0000\u0000",
"min_value": "\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000"
},
"total_compressed_size": 1467,
"total_uncompressed_size": 4155,
"type": "INT64"
@ -26,7 +29,9 @@
}
],
"num_rows": 513,
"total_byte_size": 4155
"ordinal": 0,
"total_byte_size": 4155,
"total_compressed_size": 1467
}
],
"schema": [

@ -34,6 +34,8 @@
"statistics": {
"max": "\n\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
"min": "\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
"max_value": "\n\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
"min_value": "\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
"null_count": 0
},
"total_compressed_size": 146,
@ -42,8 +44,11 @@
}
}
],
"file_offset": 4,
"num_rows": 10,
"ordinal": 0,
"total_byte_size": 172,
"num_rows": 10
"total_compressed_size": 146
},
{
"columns": [
@ -63,6 +68,8 @@
"statistics": {
"max": "\u000f\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
"min": "\u000b\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
"max_value": "\u000f\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
"min_value": "\u000b\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
"null_count": 0
},
"total_compressed_size": 120,
@ -71,8 +78,11 @@
}
}
],
"file_offset": 248,
"num_rows": 5,
"ordinal": 1,
"total_byte_size": 126,
"num_rows": 5
"total_compressed_size": 120
}
],
"key_value_metadata": [