Unconvert decimal type

This commit is contained in:
Kenny Daniel 2025-04-11 03:13:35 -07:00
parent fff0b1c6d9
commit 10f23492e6
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
3 changed files with 98 additions and 7 deletions

@ -19,11 +19,14 @@ export function schemaFromColumnData(columnData) {
throw new Error('columns must have the same length')
}
// auto-detect type if not provided
/** @type {SchemaElement} */
const schemaElement = column.type ? column : autoSchemaElement(column.name, column.data)
if (!schemaElement.type) throw new Error(`column ${column.name} cannot determine type`)
schema.push(schemaElement)
const { data, ...schemaElement } = column
if (column.type) {
// use provided type
schema.push(schemaElement)
} else {
// auto-detect type
schema.push(autoSchemaElement(column.name, data))
}
}
return schema

@ -1,3 +1,4 @@
const dayMillis = 86400000 // 1 day in milliseconds
/**
* Convert from rich to primitive types.
@ -9,7 +10,15 @@
*/
export function unconvert(schemaElement, values) {
const ctype = schemaElement.converted_type
// TODO: DECIMAL
if (ctype === 'DECIMAL') {
const scale = schemaElement.scale || 0
const factor = 10 ** scale
return values.map(v => {
if (v === null || v === undefined) return v
if (typeof v !== 'number') throw new Error('DECIMAL must be a number')
return unconvertDecimal(BigInt(Math.round(v * factor))) // to byte array
})
}
if (ctype === 'DATE') {
return values.map(v => v.getTime())
}
@ -68,6 +77,11 @@ export function unconvertMetadata(value, schema) {
new DataView(buffer).setBigInt64(0, value, true)
return new Uint8Array(buffer)
}
if (type === 'INT32' && converted_type === 'DATE' && value instanceof Date) {
const buffer = new ArrayBuffer(8)
new DataView(buffer).setInt32(0, Math.floor(value.getTime() / dayMillis), true)
return new Uint8Array(buffer)
}
if (type === 'INT64' && converted_type === 'TIMESTAMP_MILLIS' && value instanceof Date) {
const buffer = new ArrayBuffer(8)
new DataView(buffer).setBigInt64(0, BigInt(value.getTime()), true)
@ -75,3 +89,29 @@ export function unconvertMetadata(value, schema) {
}
throw new Error(`unsupported type for statistics: ${type} with value ${value}`)
}
/**
* @param {bigint} value
* @returns {Uint8Array}
*/
export function unconvertDecimal(value) {
if (value === 0n) return new Uint8Array([])
const bytes = []
let current = value
while (true) {
// extract the lowest 8 bits
const byte = Number(current & 0xffn)
bytes.unshift(byte)
current >>= 8n
// for nonnegative: stop when top byte has signBit = 0 AND shifted value == 0n
// for negative: stop when top byte has signBit = 1 AND shifted value == -1n
const signBit = byte & 0x80
if (!signBit && current === 0n || signBit && current === -1n) {
break
}
}
return new Uint8Array(bytes)
}

@ -1,5 +1,5 @@
import { describe, expect, it } from 'vitest'
import { unconvert, unconvertMetadata } from '../src/unconvert.js'
import { unconvert, unconvertDecimal, unconvertMetadata } from '../src/unconvert.js'
import { convertMetadata } from 'hyparquet/src/metadata.js'
/**
@ -155,3 +155,51 @@ describe('unconvertMetadata', () => {
.toThrow('unsupported type for statistics: INT64 with value 123')
})
})
describe('unconvertDecimal', () => {
const examples = [
{ input: 0n, expected: new Uint8Array([]) },
{ input: 1n, expected: new Uint8Array([0x01]) },
{ input: -1n, expected: new Uint8Array([0xff]) },
{ input: 1234n, expected: new Uint8Array([0x04, 0xd2]) },
{ input: -1234n, expected: new Uint8Array([0xfb, 0x2e]) },
{ input: 1234567890123456789n, expected: new Uint8Array([0x11, 0x22, 0x10, 0xf4, 0x7d, 0xe9, 0x81, 0x15]) },
{ input: -1234567890123456789n, expected: new Uint8Array([0xee, 0xdd, 0xef, 0x0b, 0x82, 0x16, 0x7e, 0xeb]) },
]
it.for(examples)('should convert %p', ({ input, expected }) => {
expect(parseDecimal(expected)).toEqual(input)
})
it.for(examples)('should unconvert %p', ({ input, expected }) => {
expect(unconvertDecimal(input)).toEqual(expected)
})
it.for(examples)('should roundtrip %p', ({ input }) => {
expect(parseDecimal(unconvertDecimal(input))).toEqual(input)
})
it.for(examples)('should reverse roundtrip %p', ({ expected }) => {
expect(unconvertDecimal(parseDecimal(expected))).toEqual(expected)
})
})
/**
* BigInt parseDecimal
* @param {Uint8Array} bytes
* @returns {bigint}
*/
function parseDecimal(bytes) {
let value = 0n
for (const byte of bytes) {
value = value * 256n + BigInt(byte)
}
// handle signed
const bits = BigInt(bytes.length) * 8n
if (bits && value >= 2n ** (bits - 1n)) {
value -= 2n ** bits
}
return value
}