Convert date and decimal stats

This commit is contained in:
Kenny Daniel 2024-05-24 15:22:59 -07:00
parent a56420de2f
commit efdbf459a5
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
6 changed files with 36 additions and 108 deletions

@ -120,7 +120,7 @@ export function convert(data, schemaElement, utf8 = true) {
* @param {Uint8Array} bytes
* @returns {number}
*/
function parseDecimal(bytes) {
export function parseDecimal(bytes) {
// TODO: handle signed
let value = 0
for (const byte of bytes) {

@ -1,5 +1,5 @@
import { CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, PageType, ParquetType } from './constants.js'
import { parseFloat16 } from './convert.js'
import { parseDecimal, parseFloat16 } from './convert.js'
import { getSchemaPath } from './schema.js'
import { deserializeTCompactProtocol } from './thrift.js'
@ -252,30 +252,22 @@ function timeUnit(unit) {
* @returns {import("./types.d.ts").Statistics}
*/
function columnStats(stats, schema) {
const { type, logical_type } = schema
const { type, converted_type, logical_type } = schema
function convert(/** @type {Uint8Array} */ value) {
if (value === undefined) return value
if (type === 'BOOLEAN') return value[0] === 1
if (type === 'BYTE_ARRAY') return new TextDecoder().decode(value)
if (type === 'INT32') {
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
return view.getInt32(0, true)
}
if (type === 'INT64') {
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
return view.getBigInt64(0, true)
}
if (type === 'FLOAT') {
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
return view.getFloat32(0, true)
}
if (type === 'DOUBLE') {
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
return view.getFloat64(0, true)
}
if (logical_type?.type === 'FLOAT16') {
return parseFloat16(value)
}
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
if (type === 'FLOAT') return view.getFloat32(0, true)
if (type === 'DOUBLE') return view.getFloat64(0, true)
if (type === 'INT32' && converted_type === 'DATE') return new Date(view.getInt32(0, true) * 86400000)
if (type === 'INT64' && converted_type === 'TIMESTAMP_MICROS') return new Date(Number(view.getBigInt64(0, true) / 1000n))
if (type === 'INT64' && converted_type === 'TIMESTAMP_MILLIS') return new Date(Number(view.getBigInt64(0, true)))
if (type === 'INT64' && logical_type?.type === 'TIMESTAMP') return new Date(Number(view.getBigInt64(0, true)))
if (type === 'INT32') return view.getInt32(0, true)
if (type === 'INT64') return view.getBigInt64(0, true)
if (converted_type === 'DECIMAL') return parseDecimal(value) * Math.pow(10, -(schema.scale || 0))
if (logical_type?.type === 'FLOAT16') return parseFloat16(value)
return value
}
return stats && {

@ -530,31 +530,11 @@
"total_compressed_size": 736,
"data_page_offset": 11065,
"statistics": {
"max": [
0,
19,
139,
153
],
"min": [
0,
9,
177,
23
],
"max": 1280.921,
"min": 635.159,
"null_count": 0,
"max_value": [
0,
19,
139,
153
],
"min_value": [
0,
9,
177,
23
]
"max_value": 1280.921,
"min_value": 635.159
},
"encoding_stats": [
{
@ -582,31 +562,11 @@
"total_compressed_size": 643,
"data_page_offset": 11882,
"statistics": {
"max": [
0,
19,
139,
153
],
"min": [
0,
9,
177,
23
],
"max": 1280.921,
"min": 635.159,
"null_count": 0,
"max_value": [
0,
19,
139,
153
],
"min_value": [
0,
9,
177,
23
]
"max_value": 1280.921,
"min_value": 635.159
},
"encoding_stats": [
{

@ -76,8 +76,8 @@
"data_page_offset": 4,
"statistics": {
"null_count": 0,
"max_value": 1669570964280,
"min_value": 1669570963514
"max_value": "2022-11-27T17:42:44.280Z",
"min_value": "2022-11-27T17:42:43.514Z"
}
},
"offset_index_offset": 656,

@ -37,32 +37,8 @@
"total_compressed_size": 319,
"data_page_offset": 4,
"statistics": {
"max": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
9,
96
],
"min": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
200
],
"max": 24,
"min": 2,
"null_count": 0
},
"encoding_stats": [

@ -6511,8 +6511,8 @@
"data_page_offset": 31605,
"dictionary_page_offset": 31574,
"statistics": {
"max_value": 1608822900000000000,
"min_value": 1608822900000000000
"max_value": "+052951-07-27T10:00:00.000Z",
"min_value": "+052951-07-27T10:00:00.000Z"
}
}
},
@ -6536,8 +6536,8 @@
"data_page_offset": 31767,
"dictionary_page_offset": 31736,
"statistics": {
"max_value": 1608822900000000000,
"min_value": 1608822900000000000
"max_value": "+052951-07-27T10:00:00.000Z",
"min_value": "+052951-07-27T10:00:00.000Z"
}
}
},
@ -6561,8 +6561,8 @@
"data_page_offset": 31929,
"dictionary_page_offset": 31898,
"statistics": {
"max_value": 0,
"min_value": 0
"max_value": "1970-01-01T00:00:00.000Z",
"min_value": "1970-01-01T00:00:00.000Z"
}
}
},
@ -6611,8 +6611,8 @@
"data_page_offset": 32256,
"dictionary_page_offset": 32225,
"statistics": {
"max_value": 0,
"min_value": 0
"max_value": "1970-01-01T00:00:00.000Z",
"min_value": "1970-01-01T00:00:00.000Z"
}
}
},
@ -6636,8 +6636,8 @@
"data_page_offset": 32418,
"dictionary_page_offset": 32387,
"statistics": {
"max_value": 0,
"min_value": 0
"max_value": "1970-01-01T00:00:00.000Z",
"min_value": "1970-01-01T00:00:00.000Z"
}
}
},