diff --git a/src/metadata.js b/src/metadata.js index 3e4c11d..ccc6271 100644 --- a/src/metadata.js +++ b/src/metadata.js @@ -137,13 +137,30 @@ export function parquetMetadata(arrayBuffer) { min: decode(column.field_3.field_12.field_2), null_count: column.field_3.field_12.field_3, distinct_count: column.field_3.field_12.field_4, + max_value: decode(column.field_3.field_12.field_5), + min_value: decode(column.field_3.field_12.field_6), + is_max_value_exact: column.field_3.field_12.field_7, + is_min_value_exact: column.field_3.field_12.field_8, }, encoding_stats: column.field_3.field_13?.map((/** @type {any} */ encodingStat) => ({ page_type: encodingStat.field_1, encoding: Encoding[encodingStat.field_2], count: encodingStat.field_3, })), + bloom_filter_offset: column.field_3.field_14, + bloom_filter_length: column.field_3.field_15, + size_statistics: column.field_3.field_16 && { + unencoded_byte_array_data_bytes: column.field_3.field_16.field_1, + repetition_level_histogram: column.field_3.field_16.field_2, + definition_level_histogram: column.field_3.field_16.field_3, + }, }, + offset_index_offset: column.field_4, + offset_index_length: column.field_5, + column_index_offset: column.field_6, + column_index_length: column.field_7, + crypto_metadata: column.field_7, + encrypted_column_metadata: column.field_8, })), total_byte_size: rowGroup.field_2, num_rows: rowGroup.field_3, @@ -152,6 +169,9 @@ export function parquetMetadata(arrayBuffer) { descending: sortingColumn.field_2, nulls_first: sortingColumn.field_3, })), + file_offset: rowGroup.field_5, + total_compressed_size: rowGroup.field_6, + ordinal: rowGroup.field_7, })) const key_value_metadata = metadata.field_5?.map((/** @type {any} */ keyValue) => ({ key: decode(keyValue.field_1), diff --git a/test/files/Int_Map.metadata.json b/test/files/Int_Map.metadata.json index 4bf84aa..dc6a681 100644 --- a/test/files/Int_Map.metadata.json +++ b/test/files/Int_Map.metadata.json @@ -16,7 +16,9 @@ "path_in_schema": ["int_map", "key_value", "key"], "statistics": { "max": "k3", - "min": "k1" + "min": "k1", + "max_value": "k3", + "min_value": "k1" }, "total_compressed_size": 64, "total_uncompressed_size": 82, @@ -33,7 +35,9 @@ "path_in_schema": ["int_map", "key_value", "value"], "statistics": { "max": "d\u0000\u0000\u0000", - "min": "\u0001\u0000\u0000\u0000" + "min": "\u0001\u0000\u0000\u0000", + "max_value": "d\u0000\u0000\u0000", + "min_value": "\u0001\u0000\u0000\u0000" }, "total_compressed_size": 60, "total_uncompressed_size": 59, @@ -41,6 +45,7 @@ } } ], + "file_offset": 4, "num_rows": 7, "total_byte_size": 74752 } diff --git a/test/files/addrtype-missing-value.metadata.json b/test/files/addrtype-missing-value.metadata.json index 15cd13c..5c72310 100644 --- a/test/files/addrtype-missing-value.metadata.json +++ b/test/files/addrtype-missing-value.metadata.json @@ -34,12 +34,15 @@ "statistics": { "max": "Intersection", "min": "Block", + "max_value": "Intersection", + "min_value": "Block", "null_count": 1, "distinct_count": 2 } } } ], + "file_offset": 4, "total_byte_size": 33024, "num_rows": 10 } diff --git a/test/files/concatenated_gzip_members.metadata.json b/test/files/concatenated_gzip_members.metadata.json index cee5ddf..7405607 100644 --- a/test/files/concatenated_gzip_members.metadata.json +++ b/test/files/concatenated_gzip_members.metadata.json @@ -18,7 +18,10 @@ "path_in_schema": [ "long_col" ], - "statistics": {}, + "statistics": { + "max_value": "\u0001\u0002\u0000\u0000\u0000\u0000\u0000\u0000", + "min_value": "\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000" + }, "total_compressed_size": 1467, "total_uncompressed_size": 4155, "type": "INT64" @@ -26,7 +29,9 @@ } ], "num_rows": 513, - "total_byte_size": 4155 + "ordinal": 0, + "total_byte_size": 4155, + "total_compressed_size": 1467 } ], "schema": [ diff --git a/test/files/rowgroups.metadata.json b/test/files/rowgroups.metadata.json index 12dea07..58516b2 100644 --- a/test/files/rowgroups.metadata.json +++ b/test/files/rowgroups.metadata.json @@ -34,6 +34,8 @@ "statistics": { "max": "\n\u0000\u0000\u0000\u0000\u0000\u0000\u0000", "min": "\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000", + "max_value": "\n\u0000\u0000\u0000\u0000\u0000\u0000\u0000", + "min_value": "\u0001\u0000\u0000\u0000\u0000\u0000\u0000\u0000", "null_count": 0 }, "total_compressed_size": 146, @@ -42,8 +44,11 @@ } } ], + "file_offset": 4, + "num_rows": 10, + "ordinal": 0, "total_byte_size": 172, - "num_rows": 10 + "total_compressed_size": 146 }, { "columns": [ @@ -63,6 +68,8 @@ "statistics": { "max": "\u000f\u0000\u0000\u0000\u0000\u0000\u0000\u0000", "min": "\u000b\u0000\u0000\u0000\u0000\u0000\u0000\u0000", + "max_value": "\u000f\u0000\u0000\u0000\u0000\u0000\u0000\u0000", + "min_value": "\u000b\u0000\u0000\u0000\u0000\u0000\u0000\u0000", "null_count": 0 }, "total_compressed_size": 120, @@ -71,8 +78,11 @@ } } ], + "file_offset": 248, + "num_rows": 5, + "ordinal": 1, "total_byte_size": 126, - "num_rows": 5 + "total_compressed_size": 120 } ], "key_value_metadata": [