diff --git a/src/convert.js b/src/convert.js index 78ba055..9ebcac7 100644 --- a/src/convert.js +++ b/src/convert.js @@ -120,7 +120,7 @@ export function convert(data, schemaElement, utf8 = true) { * @param {Uint8Array} bytes * @returns {number} */ -function parseDecimal(bytes) { +export function parseDecimal(bytes) { // TODO: handle signed let value = 0 for (const byte of bytes) { diff --git a/src/metadata.js b/src/metadata.js index fa26488..f108758 100644 --- a/src/metadata.js +++ b/src/metadata.js @@ -1,5 +1,5 @@ import { CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, PageType, ParquetType } from './constants.js' -import { parseFloat16 } from './convert.js' +import { parseDecimal, parseFloat16 } from './convert.js' import { getSchemaPath } from './schema.js' import { deserializeTCompactProtocol } from './thrift.js' @@ -252,30 +252,22 @@ function timeUnit(unit) { * @returns {import("./types.d.ts").Statistics} */ function columnStats(stats, schema) { - const { type, logical_type } = schema + const { type, converted_type, logical_type } = schema function convert(/** @type {Uint8Array} */ value) { if (value === undefined) return value if (type === 'BOOLEAN') return value[0] === 1 if (type === 'BYTE_ARRAY') return new TextDecoder().decode(value) - if (type === 'INT32') { - const view = new DataView(value.buffer, value.byteOffset, value.byteLength) - return view.getInt32(0, true) - } - if (type === 'INT64') { - const view = new DataView(value.buffer, value.byteOffset, value.byteLength) - return view.getBigInt64(0, true) - } - if (type === 'FLOAT') { - const view = new DataView(value.buffer, value.byteOffset, value.byteLength) - return view.getFloat32(0, true) - } - if (type === 'DOUBLE') { - const view = new DataView(value.buffer, value.byteOffset, value.byteLength) - return view.getFloat64(0, true) - } - if (logical_type?.type === 'FLOAT16') { - return parseFloat16(value) - } + const view = new DataView(value.buffer, value.byteOffset, value.byteLength) + if (type === 'FLOAT') return view.getFloat32(0, true) + if (type === 'DOUBLE') return view.getFloat64(0, true) + if (type === 'INT32' && converted_type === 'DATE') return new Date(view.getInt32(0, true) * 86400000) + if (type === 'INT64' && converted_type === 'TIMESTAMP_MICROS') return new Date(Number(view.getBigInt64(0, true) / 1000n)) + if (type === 'INT64' && converted_type === 'TIMESTAMP_MILLIS') return new Date(Number(view.getBigInt64(0, true))) + if (type === 'INT64' && logical_type?.type === 'TIMESTAMP') return new Date(Number(view.getBigInt64(0, true))) + if (type === 'INT32') return view.getInt32(0, true) + if (type === 'INT64') return view.getBigInt64(0, true) + if (converted_type === 'DECIMAL') return parseDecimal(value) * Math.pow(10, -(schema.scale || 0)) + if (logical_type?.type === 'FLOAT16') return parseFloat16(value) return value } return stats && { diff --git a/test/files/byte_stream_split_extended.gzip.metadata.json b/test/files/byte_stream_split_extended.gzip.metadata.json index 0660f2d..70239a7 100644 --- a/test/files/byte_stream_split_extended.gzip.metadata.json +++ b/test/files/byte_stream_split_extended.gzip.metadata.json @@ -530,31 +530,11 @@ "total_compressed_size": 736, "data_page_offset": 11065, "statistics": { - "max": [ - 0, - 19, - 139, - 153 - ], - "min": [ - 0, - 9, - 177, - 23 - ], + "max": 1280.921, + "min": 635.159, "null_count": 0, - "max_value": [ - 0, - 19, - 139, - 153 - ], - "min_value": [ - 0, - 9, - 177, - 23 - ] + "max_value": 1280.921, + "min_value": 635.159 }, "encoding_stats": [ { @@ -582,31 +562,11 @@ "total_compressed_size": 643, "data_page_offset": 11882, "statistics": { - "max": [ - 0, - 19, - 139, - 153 - ], - "min": [ - 0, - 9, - 177, - 23 - ], + "max": 1280.921, + "min": 635.159, "null_count": 0, - "max_value": [ - 0, - 19, - 139, - 153 - ], - "min_value": [ - 0, - 9, - 177, - 23 - ] + "max_value": 1280.921, + "min_value": 635.159 }, "encoding_stats": [ { diff --git a/test/files/duckdb5533.metadata.json b/test/files/duckdb5533.metadata.json index 0a3afbd..024120e 100644 --- a/test/files/duckdb5533.metadata.json +++ b/test/files/duckdb5533.metadata.json @@ -76,8 +76,8 @@ "data_page_offset": 4, "statistics": { "null_count": 0, - "max_value": 1669570964280, - "min_value": 1669570963514 + "max_value": "2022-11-27T17:42:44.280Z", + "min_value": "2022-11-27T17:42:43.514Z" } }, "offset_index_offset": 656, diff --git a/test/files/fixed_length_decimal.metadata.json b/test/files/fixed_length_decimal.metadata.json index 1c37cbd..566a936 100644 --- a/test/files/fixed_length_decimal.metadata.json +++ b/test/files/fixed_length_decimal.metadata.json @@ -37,32 +37,8 @@ "total_compressed_size": 319, "data_page_offset": 4, "statistics": { - "max": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 9, - 96 - ], - "min": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 200 - ], + "max": 24, + "min": 2, "null_count": 0 }, "encoding_stats": [ diff --git a/test/files/nested_structs.rust.metadata.json b/test/files/nested_structs.rust.metadata.json index 442e45a..1605a91 100644 --- a/test/files/nested_structs.rust.metadata.json +++ b/test/files/nested_structs.rust.metadata.json @@ -6511,8 +6511,8 @@ "data_page_offset": 31605, "dictionary_page_offset": 31574, "statistics": { - "max_value": 1608822900000000000, - "min_value": 1608822900000000000 + "max_value": "+052951-07-27T10:00:00.000Z", + "min_value": "+052951-07-27T10:00:00.000Z" } } }, @@ -6536,8 +6536,8 @@ "data_page_offset": 31767, "dictionary_page_offset": 31736, "statistics": { - "max_value": 1608822900000000000, - "min_value": 1608822900000000000 + "max_value": "+052951-07-27T10:00:00.000Z", + "min_value": "+052951-07-27T10:00:00.000Z" } } }, @@ -6561,8 +6561,8 @@ "data_page_offset": 31929, "dictionary_page_offset": 31898, "statistics": { - "max_value": 0, - "min_value": 0 + "max_value": "1970-01-01T00:00:00.000Z", + "min_value": "1970-01-01T00:00:00.000Z" } } }, @@ -6611,8 +6611,8 @@ "data_page_offset": 32256, "dictionary_page_offset": 32225, "statistics": { - "max_value": 0, - "min_value": 0 + "max_value": "1970-01-01T00:00:00.000Z", + "min_value": "1970-01-01T00:00:00.000Z" } } }, @@ -6636,8 +6636,8 @@ "data_page_offset": 32418, "dictionary_page_offset": 32387, "statistics": { - "max_value": 0, - "min_value": 0 + "max_value": "1970-01-01T00:00:00.000Z", + "min_value": "1970-01-01T00:00:00.000Z" } } },