Int96 date parsing

This commit is contained in:
Kenny Daniel 2024-05-12 18:12:30 -07:00
parent 82db6a8017
commit e064efc66c
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
7 changed files with 32 additions and 14 deletions

@ -229,7 +229,8 @@ function renderTable(header, data) {
function stringify(value) {
if (value === undefined) return ''
value = toJson(value)
if (typeof value === 'string') return value
if (typeof value === 'object') return JSON.stringify(toJson(value))
if (typeof value === 'object') return JSON.stringify(value)
return value
}

@ -1,4 +1,4 @@
const dayMillis = 86400000000000 // 1 day in milliseconds
const dayMillis = 86400000 // 1 day in milliseconds
/**
* Convert known types from primitive to rich.
@ -28,6 +28,9 @@ export function convert(data, schemaElement) {
if (ctype === 'DATE') {
return data.map(v => new Date(v * dayMillis))
}
if (ctype === undefined && schemaElement.type === 'INT96') {
return data.map(parseInt96Date)
}
if (ctype === 'TIME_MILLIS') {
return data.map(v => new Date(v))
}
@ -44,8 +47,6 @@ export function convert(data, schemaElement) {
}
/**
* Parse decimal from byte array.
*
* @param {Uint8Array} bytes
* @returns {number}
*/
@ -57,3 +58,14 @@ function parseDecimal(bytes) {
}
return value
}
/**
* @param {bigint} value
* @returns {Date}
*/
function parseInt96Date(value) {
const days = Number((value >> 64n) - 2440588n)
const nano = Number((value & 0xffffffffffffffffn) / 1000000n)
const millis = days * dayMillis + nano
return new Date(millis)
}

@ -52,13 +52,9 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata,
dataPage = readPlain(pageReader, columnMetadata.type, nValues)
} else if (daph2.encoding === 'RLE') {
const bitWidth = 1
if (daph2.num_nulls) {
throw new Error('parquet RLE encoding with nulls not supported')
} else {
const pageReader = { view: pageView, offset: 4 }
dataPage = new Array(nValues)
readRleBitPackedHybrid(pageReader, bitWidth, uncompressedPageSize, dataPage)
}
const pageReader = { view: pageView, offset: 4 }
dataPage = new Array(nValues)
readRleBitPackedHybrid(pageReader, bitWidth, uncompressedPageSize, dataPage)
} else if (
daph2.encoding === 'PLAIN_DICTIONARY' ||
daph2.encoding === 'RLE_DICTIONARY'

@ -93,7 +93,7 @@ function readPlainInt96(reader, count) {
for (let i = 0; i < count; i++) {
const low = reader.view.getBigInt64(reader.offset + i * 12, true)
const high = reader.view.getInt32(reader.offset + i * 12 + 8, true)
values[i] = (BigInt(high) << BigInt(32)) | low
values[i] = (BigInt(high) << 64n) | low
}
reader.offset += count * 12
return values

@ -11,6 +11,7 @@ export function toJson(obj) {
if (typeof obj === 'bigint') return Number(obj)
if (Array.isArray(obj)) return obj.map(toJson)
if (obj instanceof Uint8Array) return Array.from(obj)
if (obj instanceof Date) return obj.toISOString()
if (obj instanceof Object) {
/** @type {Record<string, unknown>} */
const newObj = {}

@ -59,7 +59,7 @@ describe('convert function', () => {
const data = [1, 2] // days since epoch
/** @type {SchemaElement} */
const schemaElement = { name, converted_type: 'DATE' }
expect(convert(data, schemaElement)).toEqual([new Date(86400000000000), new Date(86400000000000 * 2)])
expect(convert(data, schemaElement)).toEqual([new Date(86400000), new Date(86400000 * 2)])
})
it('converts milliseconds to TIME_MILLIS', () => {
@ -70,6 +70,14 @@ describe('convert function', () => {
expect(convert(data, schemaElement)).toEqual([new Date(now)])
})
it('converts INT96 to DATE', () => {
// from alltypes_plain.parquet
const data = [45284764452596988585705472n, 45284764452597048585705472n]
/** @type {SchemaElement} */
const schemaElement = { name, type: 'INT96' }
expect(convert(data, schemaElement)).toEqual([new Date('2009-03-01T00:00:00.000Z'), new Date('2009-03-01T00:01:00.000Z')])
})
it('parses strings to JSON', () => {
const data = ['{"key": true}', '{"quay": 314}']
/** @type {SchemaElement} */

@ -41,7 +41,7 @@ describe('readPlain', () => {
view.setInt32(8, high, true)
const reader = { view, offset: 0 }
const result = readPlain(reader, 'INT96', 1)
const expectedValue = (BigInt(high) << BigInt(32)) | low
const expectedValue = (BigInt(high) << 64n) | low
expect(result).toEqual([expectedValue])
expect(reader.offset).toBe(12)
})