mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-06 06:51:54 +00:00
Convert date and decimal stats
This commit is contained in:
parent
a56420de2f
commit
efdbf459a5
@ -120,7 +120,7 @@ export function convert(data, schemaElement, utf8 = true) {
|
||||
* @param {Uint8Array} bytes
|
||||
* @returns {number}
|
||||
*/
|
||||
function parseDecimal(bytes) {
|
||||
export function parseDecimal(bytes) {
|
||||
// TODO: handle signed
|
||||
let value = 0
|
||||
for (const byte of bytes) {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { CompressionCodec, ConvertedType, Encoding, FieldRepetitionType, PageType, ParquetType } from './constants.js'
|
||||
import { parseFloat16 } from './convert.js'
|
||||
import { parseDecimal, parseFloat16 } from './convert.js'
|
||||
import { getSchemaPath } from './schema.js'
|
||||
import { deserializeTCompactProtocol } from './thrift.js'
|
||||
|
||||
@ -252,30 +252,22 @@ function timeUnit(unit) {
|
||||
* @returns {import("./types.d.ts").Statistics}
|
||||
*/
|
||||
function columnStats(stats, schema) {
|
||||
const { type, logical_type } = schema
|
||||
const { type, converted_type, logical_type } = schema
|
||||
function convert(/** @type {Uint8Array} */ value) {
|
||||
if (value === undefined) return value
|
||||
if (type === 'BOOLEAN') return value[0] === 1
|
||||
if (type === 'BYTE_ARRAY') return new TextDecoder().decode(value)
|
||||
if (type === 'INT32') {
|
||||
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
|
||||
return view.getInt32(0, true)
|
||||
}
|
||||
if (type === 'INT64') {
|
||||
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
|
||||
return view.getBigInt64(0, true)
|
||||
}
|
||||
if (type === 'FLOAT') {
|
||||
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
|
||||
return view.getFloat32(0, true)
|
||||
}
|
||||
if (type === 'DOUBLE') {
|
||||
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
|
||||
return view.getFloat64(0, true)
|
||||
}
|
||||
if (logical_type?.type === 'FLOAT16') {
|
||||
return parseFloat16(value)
|
||||
}
|
||||
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
|
||||
if (type === 'FLOAT') return view.getFloat32(0, true)
|
||||
if (type === 'DOUBLE') return view.getFloat64(0, true)
|
||||
if (type === 'INT32' && converted_type === 'DATE') return new Date(view.getInt32(0, true) * 86400000)
|
||||
if (type === 'INT64' && converted_type === 'TIMESTAMP_MICROS') return new Date(Number(view.getBigInt64(0, true) / 1000n))
|
||||
if (type === 'INT64' && converted_type === 'TIMESTAMP_MILLIS') return new Date(Number(view.getBigInt64(0, true)))
|
||||
if (type === 'INT64' && logical_type?.type === 'TIMESTAMP') return new Date(Number(view.getBigInt64(0, true)))
|
||||
if (type === 'INT32') return view.getInt32(0, true)
|
||||
if (type === 'INT64') return view.getBigInt64(0, true)
|
||||
if (converted_type === 'DECIMAL') return parseDecimal(value) * Math.pow(10, -(schema.scale || 0))
|
||||
if (logical_type?.type === 'FLOAT16') return parseFloat16(value)
|
||||
return value
|
||||
}
|
||||
return stats && {
|
||||
|
||||
@ -530,31 +530,11 @@
|
||||
"total_compressed_size": 736,
|
||||
"data_page_offset": 11065,
|
||||
"statistics": {
|
||||
"max": [
|
||||
0,
|
||||
19,
|
||||
139,
|
||||
153
|
||||
],
|
||||
"min": [
|
||||
0,
|
||||
9,
|
||||
177,
|
||||
23
|
||||
],
|
||||
"max": 1280.921,
|
||||
"min": 635.159,
|
||||
"null_count": 0,
|
||||
"max_value": [
|
||||
0,
|
||||
19,
|
||||
139,
|
||||
153
|
||||
],
|
||||
"min_value": [
|
||||
0,
|
||||
9,
|
||||
177,
|
||||
23
|
||||
]
|
||||
"max_value": 1280.921,
|
||||
"min_value": 635.159
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
@ -582,31 +562,11 @@
|
||||
"total_compressed_size": 643,
|
||||
"data_page_offset": 11882,
|
||||
"statistics": {
|
||||
"max": [
|
||||
0,
|
||||
19,
|
||||
139,
|
||||
153
|
||||
],
|
||||
"min": [
|
||||
0,
|
||||
9,
|
||||
177,
|
||||
23
|
||||
],
|
||||
"max": 1280.921,
|
||||
"min": 635.159,
|
||||
"null_count": 0,
|
||||
"max_value": [
|
||||
0,
|
||||
19,
|
||||
139,
|
||||
153
|
||||
],
|
||||
"min_value": [
|
||||
0,
|
||||
9,
|
||||
177,
|
||||
23
|
||||
]
|
||||
"max_value": 1280.921,
|
||||
"min_value": 635.159
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
|
||||
@ -76,8 +76,8 @@
|
||||
"data_page_offset": 4,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": 1669570964280,
|
||||
"min_value": 1669570963514
|
||||
"max_value": "2022-11-27T17:42:44.280Z",
|
||||
"min_value": "2022-11-27T17:42:43.514Z"
|
||||
}
|
||||
},
|
||||
"offset_index_offset": 656,
|
||||
|
||||
@ -37,32 +37,8 @@
|
||||
"total_compressed_size": 319,
|
||||
"data_page_offset": 4,
|
||||
"statistics": {
|
||||
"max": [
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
9,
|
||||
96
|
||||
],
|
||||
"min": [
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
200
|
||||
],
|
||||
"max": 24,
|
||||
"min": 2,
|
||||
"null_count": 0
|
||||
},
|
||||
"encoding_stats": [
|
||||
|
||||
@ -6511,8 +6511,8 @@
|
||||
"data_page_offset": 31605,
|
||||
"dictionary_page_offset": 31574,
|
||||
"statistics": {
|
||||
"max_value": 1608822900000000000,
|
||||
"min_value": 1608822900000000000
|
||||
"max_value": "+052951-07-27T10:00:00.000Z",
|
||||
"min_value": "+052951-07-27T10:00:00.000Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -6536,8 +6536,8 @@
|
||||
"data_page_offset": 31767,
|
||||
"dictionary_page_offset": 31736,
|
||||
"statistics": {
|
||||
"max_value": 1608822900000000000,
|
||||
"min_value": 1608822900000000000
|
||||
"max_value": "+052951-07-27T10:00:00.000Z",
|
||||
"min_value": "+052951-07-27T10:00:00.000Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -6561,8 +6561,8 @@
|
||||
"data_page_offset": 31929,
|
||||
"dictionary_page_offset": 31898,
|
||||
"statistics": {
|
||||
"max_value": 0,
|
||||
"min_value": 0
|
||||
"max_value": "1970-01-01T00:00:00.000Z",
|
||||
"min_value": "1970-01-01T00:00:00.000Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -6611,8 +6611,8 @@
|
||||
"data_page_offset": 32256,
|
||||
"dictionary_page_offset": 32225,
|
||||
"statistics": {
|
||||
"max_value": 0,
|
||||
"min_value": 0
|
||||
"max_value": "1970-01-01T00:00:00.000Z",
|
||||
"min_value": "1970-01-01T00:00:00.000Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -6636,8 +6636,8 @@
|
||||
"data_page_offset": 32418,
|
||||
"dictionary_page_offset": 32387,
|
||||
"statistics": {
|
||||
"max_value": 0,
|
||||
"min_value": 0
|
||||
"max_value": "1970-01-01T00:00:00.000Z",
|
||||
"min_value": "1970-01-01T00:00:00.000Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
Loading…
Reference in New Issue
Block a user