mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Unconvert decimal type
This commit is contained in:
parent
fff0b1c6d9
commit
10f23492e6
@ -19,11 +19,14 @@ export function schemaFromColumnData(columnData) {
|
||||
throw new Error('columns must have the same length')
|
||||
}
|
||||
|
||||
// auto-detect type if not provided
|
||||
/** @type {SchemaElement} */
|
||||
const schemaElement = column.type ? column : autoSchemaElement(column.name, column.data)
|
||||
if (!schemaElement.type) throw new Error(`column ${column.name} cannot determine type`)
|
||||
schema.push(schemaElement)
|
||||
const { data, ...schemaElement } = column
|
||||
if (column.type) {
|
||||
// use provided type
|
||||
schema.push(schemaElement)
|
||||
} else {
|
||||
// auto-detect type
|
||||
schema.push(autoSchemaElement(column.name, data))
|
||||
}
|
||||
}
|
||||
|
||||
return schema
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
const dayMillis = 86400000 // 1 day in milliseconds
|
||||
|
||||
/**
|
||||
* Convert from rich to primitive types.
|
||||
@ -9,7 +10,15 @@
|
||||
*/
|
||||
export function unconvert(schemaElement, values) {
|
||||
const ctype = schemaElement.converted_type
|
||||
// TODO: DECIMAL
|
||||
if (ctype === 'DECIMAL') {
|
||||
const scale = schemaElement.scale || 0
|
||||
const factor = 10 ** scale
|
||||
return values.map(v => {
|
||||
if (v === null || v === undefined) return v
|
||||
if (typeof v !== 'number') throw new Error('DECIMAL must be a number')
|
||||
return unconvertDecimal(BigInt(Math.round(v * factor))) // to byte array
|
||||
})
|
||||
}
|
||||
if (ctype === 'DATE') {
|
||||
return values.map(v => v.getTime())
|
||||
}
|
||||
@ -68,6 +77,11 @@ export function unconvertMetadata(value, schema) {
|
||||
new DataView(buffer).setBigInt64(0, value, true)
|
||||
return new Uint8Array(buffer)
|
||||
}
|
||||
if (type === 'INT32' && converted_type === 'DATE' && value instanceof Date) {
|
||||
const buffer = new ArrayBuffer(8)
|
||||
new DataView(buffer).setInt32(0, Math.floor(value.getTime() / dayMillis), true)
|
||||
return new Uint8Array(buffer)
|
||||
}
|
||||
if (type === 'INT64' && converted_type === 'TIMESTAMP_MILLIS' && value instanceof Date) {
|
||||
const buffer = new ArrayBuffer(8)
|
||||
new DataView(buffer).setBigInt64(0, BigInt(value.getTime()), true)
|
||||
@ -75,3 +89,29 @@ export function unconvertMetadata(value, schema) {
|
||||
}
|
||||
throw new Error(`unsupported type for statistics: ${type} with value ${value}`)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {bigint} value
|
||||
* @returns {Uint8Array}
|
||||
*/
|
||||
export function unconvertDecimal(value) {
|
||||
if (value === 0n) return new Uint8Array([])
|
||||
const bytes = []
|
||||
let current = value
|
||||
|
||||
while (true) {
|
||||
// extract the lowest 8 bits
|
||||
const byte = Number(current & 0xffn)
|
||||
bytes.unshift(byte)
|
||||
current >>= 8n
|
||||
|
||||
// for nonnegative: stop when top byte has signBit = 0 AND shifted value == 0n
|
||||
// for negative: stop when top byte has signBit = 1 AND shifted value == -1n
|
||||
const signBit = byte & 0x80
|
||||
if (!signBit && current === 0n || signBit && current === -1n) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return new Uint8Array(bytes)
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { unconvert, unconvertMetadata } from '../src/unconvert.js'
|
||||
import { unconvert, unconvertDecimal, unconvertMetadata } from '../src/unconvert.js'
|
||||
import { convertMetadata } from 'hyparquet/src/metadata.js'
|
||||
|
||||
/**
|
||||
@ -155,3 +155,51 @@ describe('unconvertMetadata', () => {
|
||||
.toThrow('unsupported type for statistics: INT64 with value 123')
|
||||
})
|
||||
})
|
||||
|
||||
describe('unconvertDecimal', () => {
|
||||
const examples = [
|
||||
{ input: 0n, expected: new Uint8Array([]) },
|
||||
{ input: 1n, expected: new Uint8Array([0x01]) },
|
||||
{ input: -1n, expected: new Uint8Array([0xff]) },
|
||||
{ input: 1234n, expected: new Uint8Array([0x04, 0xd2]) },
|
||||
{ input: -1234n, expected: new Uint8Array([0xfb, 0x2e]) },
|
||||
{ input: 1234567890123456789n, expected: new Uint8Array([0x11, 0x22, 0x10, 0xf4, 0x7d, 0xe9, 0x81, 0x15]) },
|
||||
{ input: -1234567890123456789n, expected: new Uint8Array([0xee, 0xdd, 0xef, 0x0b, 0x82, 0x16, 0x7e, 0xeb]) },
|
||||
]
|
||||
|
||||
it.for(examples)('should convert %p', ({ input, expected }) => {
|
||||
expect(parseDecimal(expected)).toEqual(input)
|
||||
})
|
||||
|
||||
it.for(examples)('should unconvert %p', ({ input, expected }) => {
|
||||
expect(unconvertDecimal(input)).toEqual(expected)
|
||||
})
|
||||
|
||||
it.for(examples)('should roundtrip %p', ({ input }) => {
|
||||
expect(parseDecimal(unconvertDecimal(input))).toEqual(input)
|
||||
})
|
||||
|
||||
it.for(examples)('should reverse roundtrip %p', ({ expected }) => {
|
||||
expect(unconvertDecimal(parseDecimal(expected))).toEqual(expected)
|
||||
})
|
||||
})
|
||||
|
||||
/**
|
||||
* BigInt parseDecimal
|
||||
* @param {Uint8Array} bytes
|
||||
* @returns {bigint}
|
||||
*/
|
||||
function parseDecimal(bytes) {
|
||||
let value = 0n
|
||||
for (const byte of bytes) {
|
||||
value = value * 256n + BigInt(byte)
|
||||
}
|
||||
|
||||
// handle signed
|
||||
const bits = BigInt(bytes.length) * 8n
|
||||
if (bits && value >= 2n ** (bits - 1n)) {
|
||||
value -= 2n ** bits
|
||||
}
|
||||
|
||||
return value
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user