mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-26 23:26:38 +00:00
Int96 date parsing
This commit is contained in:
parent
82db6a8017
commit
e064efc66c
3
demo.js
3
demo.js
@ -229,7 +229,8 @@ function renderTable(header, data) {
|
||||
|
||||
function stringify(value) {
|
||||
if (value === undefined) return ''
|
||||
value = toJson(value)
|
||||
if (typeof value === 'string') return value
|
||||
if (typeof value === 'object') return JSON.stringify(toJson(value))
|
||||
if (typeof value === 'object') return JSON.stringify(value)
|
||||
return value
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
const dayMillis = 86400000000000 // 1 day in milliseconds
|
||||
const dayMillis = 86400000 // 1 day in milliseconds
|
||||
|
||||
/**
|
||||
* Convert known types from primitive to rich.
|
||||
@ -28,6 +28,9 @@ export function convert(data, schemaElement) {
|
||||
if (ctype === 'DATE') {
|
||||
return data.map(v => new Date(v * dayMillis))
|
||||
}
|
||||
if (ctype === undefined && schemaElement.type === 'INT96') {
|
||||
return data.map(parseInt96Date)
|
||||
}
|
||||
if (ctype === 'TIME_MILLIS') {
|
||||
return data.map(v => new Date(v))
|
||||
}
|
||||
@ -44,8 +47,6 @@ export function convert(data, schemaElement) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse decimal from byte array.
|
||||
*
|
||||
* @param {Uint8Array} bytes
|
||||
* @returns {number}
|
||||
*/
|
||||
@ -57,3 +58,14 @@ function parseDecimal(bytes) {
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {bigint} value
|
||||
* @returns {Date}
|
||||
*/
|
||||
function parseInt96Date(value) {
|
||||
const days = Number((value >> 64n) - 2440588n)
|
||||
const nano = Number((value & 0xffffffffffffffffn) / 1000000n)
|
||||
const millis = days * dayMillis + nano
|
||||
return new Date(millis)
|
||||
}
|
||||
|
||||
@ -52,13 +52,9 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata,
|
||||
dataPage = readPlain(pageReader, columnMetadata.type, nValues)
|
||||
} else if (daph2.encoding === 'RLE') {
|
||||
const bitWidth = 1
|
||||
if (daph2.num_nulls) {
|
||||
throw new Error('parquet RLE encoding with nulls not supported')
|
||||
} else {
|
||||
const pageReader = { view: pageView, offset: 4 }
|
||||
dataPage = new Array(nValues)
|
||||
readRleBitPackedHybrid(pageReader, bitWidth, uncompressedPageSize, dataPage)
|
||||
}
|
||||
const pageReader = { view: pageView, offset: 4 }
|
||||
dataPage = new Array(nValues)
|
||||
readRleBitPackedHybrid(pageReader, bitWidth, uncompressedPageSize, dataPage)
|
||||
} else if (
|
||||
daph2.encoding === 'PLAIN_DICTIONARY' ||
|
||||
daph2.encoding === 'RLE_DICTIONARY'
|
||||
|
||||
@ -93,7 +93,7 @@ function readPlainInt96(reader, count) {
|
||||
for (let i = 0; i < count; i++) {
|
||||
const low = reader.view.getBigInt64(reader.offset + i * 12, true)
|
||||
const high = reader.view.getInt32(reader.offset + i * 12 + 8, true)
|
||||
values[i] = (BigInt(high) << BigInt(32)) | low
|
||||
values[i] = (BigInt(high) << 64n) | low
|
||||
}
|
||||
reader.offset += count * 12
|
||||
return values
|
||||
|
||||
@ -11,6 +11,7 @@ export function toJson(obj) {
|
||||
if (typeof obj === 'bigint') return Number(obj)
|
||||
if (Array.isArray(obj)) return obj.map(toJson)
|
||||
if (obj instanceof Uint8Array) return Array.from(obj)
|
||||
if (obj instanceof Date) return obj.toISOString()
|
||||
if (obj instanceof Object) {
|
||||
/** @type {Record<string, unknown>} */
|
||||
const newObj = {}
|
||||
|
||||
@ -59,7 +59,7 @@ describe('convert function', () => {
|
||||
const data = [1, 2] // days since epoch
|
||||
/** @type {SchemaElement} */
|
||||
const schemaElement = { name, converted_type: 'DATE' }
|
||||
expect(convert(data, schemaElement)).toEqual([new Date(86400000000000), new Date(86400000000000 * 2)])
|
||||
expect(convert(data, schemaElement)).toEqual([new Date(86400000), new Date(86400000 * 2)])
|
||||
})
|
||||
|
||||
it('converts milliseconds to TIME_MILLIS', () => {
|
||||
@ -70,6 +70,14 @@ describe('convert function', () => {
|
||||
expect(convert(data, schemaElement)).toEqual([new Date(now)])
|
||||
})
|
||||
|
||||
it('converts INT96 to DATE', () => {
|
||||
// from alltypes_plain.parquet
|
||||
const data = [45284764452596988585705472n, 45284764452597048585705472n]
|
||||
/** @type {SchemaElement} */
|
||||
const schemaElement = { name, type: 'INT96' }
|
||||
expect(convert(data, schemaElement)).toEqual([new Date('2009-03-01T00:00:00.000Z'), new Date('2009-03-01T00:01:00.000Z')])
|
||||
})
|
||||
|
||||
it('parses strings to JSON', () => {
|
||||
const data = ['{"key": true}', '{"quay": 314}']
|
||||
/** @type {SchemaElement} */
|
||||
|
||||
@ -41,7 +41,7 @@ describe('readPlain', () => {
|
||||
view.setInt32(8, high, true)
|
||||
const reader = { view, offset: 0 }
|
||||
const result = readPlain(reader, 'INT96', 1)
|
||||
const expectedValue = (BigInt(high) << BigInt(32)) | low
|
||||
const expectedValue = (BigInt(high) << 64n) | low
|
||||
expect(result).toEqual([expectedValue])
|
||||
expect(reader.offset).toBe(12)
|
||||
})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user