From 10f23492e60e384f537ef7b6aeaf7a30ec778784 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Fri, 11 Apr 2025 03:13:35 -0700 Subject: [PATCH] Unconvert decimal type --- src/schema.js | 13 ++++++----- src/unconvert.js | 42 ++++++++++++++++++++++++++++++++++- test/unconvert.test.js | 50 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 98 insertions(+), 7 deletions(-) diff --git a/src/schema.js b/src/schema.js index bae468f..bff23e3 100644 --- a/src/schema.js +++ b/src/schema.js @@ -19,11 +19,14 @@ export function schemaFromColumnData(columnData) { throw new Error('columns must have the same length') } - // auto-detect type if not provided - /** @type {SchemaElement} */ - const schemaElement = column.type ? column : autoSchemaElement(column.name, column.data) - if (!schemaElement.type) throw new Error(`column ${column.name} cannot determine type`) - schema.push(schemaElement) + const { data, ...schemaElement } = column + if (column.type) { + // use provided type + schema.push(schemaElement) + } else { + // auto-detect type + schema.push(autoSchemaElement(column.name, data)) + } } return schema diff --git a/src/unconvert.js b/src/unconvert.js index 0a772ea..6718439 100644 --- a/src/unconvert.js +++ b/src/unconvert.js @@ -1,3 +1,4 @@ +const dayMillis = 86400000 // 1 day in milliseconds /** * Convert from rich to primitive types. @@ -9,7 +10,15 @@ */ export function unconvert(schemaElement, values) { const ctype = schemaElement.converted_type - // TODO: DECIMAL + if (ctype === 'DECIMAL') { + const scale = schemaElement.scale || 0 + const factor = 10 ** scale + return values.map(v => { + if (v === null || v === undefined) return v + if (typeof v !== 'number') throw new Error('DECIMAL must be a number') + return unconvertDecimal(BigInt(Math.round(v * factor))) // to byte array + }) + } if (ctype === 'DATE') { return values.map(v => v.getTime()) } @@ -68,6 +77,11 @@ export function unconvertMetadata(value, schema) { new DataView(buffer).setBigInt64(0, value, true) return new Uint8Array(buffer) } + if (type === 'INT32' && converted_type === 'DATE' && value instanceof Date) { + const buffer = new ArrayBuffer(8) + new DataView(buffer).setInt32(0, Math.floor(value.getTime() / dayMillis), true) + return new Uint8Array(buffer) + } if (type === 'INT64' && converted_type === 'TIMESTAMP_MILLIS' && value instanceof Date) { const buffer = new ArrayBuffer(8) new DataView(buffer).setBigInt64(0, BigInt(value.getTime()), true) @@ -75,3 +89,29 @@ export function unconvertMetadata(value, schema) { } throw new Error(`unsupported type for statistics: ${type} with value ${value}`) } + +/** + * @param {bigint} value + * @returns {Uint8Array} + */ +export function unconvertDecimal(value) { + if (value === 0n) return new Uint8Array([]) + const bytes = [] + let current = value + + while (true) { + // extract the lowest 8 bits + const byte = Number(current & 0xffn) + bytes.unshift(byte) + current >>= 8n + + // for nonnegative: stop when top byte has signBit = 0 AND shifted value == 0n + // for negative: stop when top byte has signBit = 1 AND shifted value == -1n + const signBit = byte & 0x80 + if (!signBit && current === 0n || signBit && current === -1n) { + break + } + } + + return new Uint8Array(bytes) +} diff --git a/test/unconvert.test.js b/test/unconvert.test.js index 4c4741d..581b4e9 100644 --- a/test/unconvert.test.js +++ b/test/unconvert.test.js @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest' -import { unconvert, unconvertMetadata } from '../src/unconvert.js' +import { unconvert, unconvertDecimal, unconvertMetadata } from '../src/unconvert.js' import { convertMetadata } from 'hyparquet/src/metadata.js' /** @@ -155,3 +155,51 @@ describe('unconvertMetadata', () => { .toThrow('unsupported type for statistics: INT64 with value 123') }) }) + +describe('unconvertDecimal', () => { + const examples = [ + { input: 0n, expected: new Uint8Array([]) }, + { input: 1n, expected: new Uint8Array([0x01]) }, + { input: -1n, expected: new Uint8Array([0xff]) }, + { input: 1234n, expected: new Uint8Array([0x04, 0xd2]) }, + { input: -1234n, expected: new Uint8Array([0xfb, 0x2e]) }, + { input: 1234567890123456789n, expected: new Uint8Array([0x11, 0x22, 0x10, 0xf4, 0x7d, 0xe9, 0x81, 0x15]) }, + { input: -1234567890123456789n, expected: new Uint8Array([0xee, 0xdd, 0xef, 0x0b, 0x82, 0x16, 0x7e, 0xeb]) }, + ] + + it.for(examples)('should convert %p', ({ input, expected }) => { + expect(parseDecimal(expected)).toEqual(input) + }) + + it.for(examples)('should unconvert %p', ({ input, expected }) => { + expect(unconvertDecimal(input)).toEqual(expected) + }) + + it.for(examples)('should roundtrip %p', ({ input }) => { + expect(parseDecimal(unconvertDecimal(input))).toEqual(input) + }) + + it.for(examples)('should reverse roundtrip %p', ({ expected }) => { + expect(unconvertDecimal(parseDecimal(expected))).toEqual(expected) + }) +}) + +/** + * BigInt parseDecimal + * @param {Uint8Array} bytes + * @returns {bigint} + */ +function parseDecimal(bytes) { + let value = 0n + for (const byte of bytes) { + value = value * 256n + BigInt(byte) + } + + // handle signed + const bits = BigInt(bytes.length) * 8n + if (bits && value >= 2n ** (bits - 1n)) { + value -= 2n ** bits + } + + return value +}