mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Fixed length byte array decimals
This commit is contained in:
parent
279e055a60
commit
fde7f81893
@ -49,7 +49,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/eslint-parser": "7.27.0",
|
||||
"@types/node": "22.14.0",
|
||||
"@types/node": "22.14.1",
|
||||
"@vitest/coverage-v8": "3.1.1",
|
||||
"eslint": "9.24.0",
|
||||
"eslint-plugin-jsdoc": "50.6.9",
|
||||
|
||||
13
src/plain.js
13
src/plain.js
@ -20,7 +20,7 @@ export function writePlain(writer, values, type) {
|
||||
} else if (type === 'BYTE_ARRAY') {
|
||||
writePlainByteArray(writer, values)
|
||||
} else if (type === 'FIXED_LEN_BYTE_ARRAY') {
|
||||
writePlainByteArray(writer, values)
|
||||
writePlainByteArrayFixed(writer, values)
|
||||
} else {
|
||||
throw new Error(`parquet unsupported type: ${type}`)
|
||||
}
|
||||
@ -109,3 +109,14 @@ function writePlainByteArray(writer, values) {
|
||||
writer.appendBytes(value)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Writer} writer
|
||||
* @param {DecodedArray} values
|
||||
*/
|
||||
function writePlainByteArrayFixed(writer, values) {
|
||||
for (const value of values) {
|
||||
if (!(value instanceof Uint8Array)) throw new Error('parquet expected Uint8Array value')
|
||||
writer.appendBytes(value)
|
||||
}
|
||||
}
|
||||
|
||||
@ -4,19 +4,19 @@ const dayMillis = 86400000 // 1 day in milliseconds
|
||||
* Convert from rich to primitive types.
|
||||
*
|
||||
* @import {DecodedArray, SchemaElement} from 'hyparquet'
|
||||
* @param {SchemaElement} schemaElement
|
||||
* @param {SchemaElement} element
|
||||
* @param {DecodedArray} values
|
||||
* @returns {DecodedArray}
|
||||
*/
|
||||
export function unconvert(schemaElement, values) {
|
||||
const ctype = schemaElement.converted_type
|
||||
export function unconvert(element, values) {
|
||||
const ctype = element.converted_type
|
||||
if (ctype === 'DECIMAL') {
|
||||
const scale = schemaElement.scale || 0
|
||||
const scale = element.scale || 0
|
||||
const factor = 10 ** scale
|
||||
return values.map(v => {
|
||||
if (v === null || v === undefined) return v
|
||||
if (typeof v !== 'number') throw new Error('DECIMAL must be a number')
|
||||
return unconvertDecimal(BigInt(Math.round(v * factor))) // to byte array
|
||||
return unconvertDecimal(element, BigInt(Math.round(v * factor)))
|
||||
})
|
||||
}
|
||||
if (ctype === 'DATE') {
|
||||
@ -45,12 +45,12 @@ export function unconvert(schemaElement, values) {
|
||||
* Uncovert from rich type to byte array for metadata statistics.
|
||||
*
|
||||
* @param {import('hyparquet/src/types.js').MinMaxType | undefined} value
|
||||
* @param {SchemaElement} schema
|
||||
* @param {SchemaElement} element
|
||||
* @returns {Uint8Array | undefined}
|
||||
*/
|
||||
export function unconvertMetadata(value, schema) {
|
||||
export function unconvertMetadata(value, element) {
|
||||
if (value === undefined || value === null) return undefined
|
||||
const { type, converted_type } = schema
|
||||
const { type, converted_type } = element
|
||||
if (type === 'BOOLEAN') return new Uint8Array([value ? 1 : 0])
|
||||
if (type === 'BYTE_ARRAY' || type === 'FIXED_LEN_BYTE_ARRAY') {
|
||||
// truncate byte arrays to 16 bytes for statistics
|
||||
@ -91,25 +91,34 @@ export function unconvertMetadata(value, schema) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {SchemaElement} element
|
||||
* @param {bigint} value
|
||||
* @returns {Uint8Array}
|
||||
* @returns {number | bigint | Uint8Array}
|
||||
*/
|
||||
export function unconvertDecimal(value) {
|
||||
if (value === 0n) return new Uint8Array([])
|
||||
const bytes = []
|
||||
let current = value
|
||||
export function unconvertDecimal({ type, type_length }, value) {
|
||||
if (type === 'INT32') return Number(value)
|
||||
if (type === 'INT64') return value
|
||||
if (type === 'FIXED_LEN_BYTE_ARRAY' && !type_length) {
|
||||
throw new Error('fixed length byte array type_length is required')
|
||||
}
|
||||
if (!type_length && !value) return new Uint8Array()
|
||||
|
||||
const bytes = []
|
||||
while (true) {
|
||||
// extract the lowest 8 bits
|
||||
const byte = Number(current & 0xffn)
|
||||
const byte = Number(value & 0xffn)
|
||||
bytes.unshift(byte)
|
||||
current >>= 8n
|
||||
value >>= 8n
|
||||
|
||||
// for nonnegative: stop when top byte has signBit = 0 AND shifted value == 0n
|
||||
// for negative: stop when top byte has signBit = 1 AND shifted value == -1n
|
||||
const signBit = byte & 0x80
|
||||
if (!signBit && current === 0n || signBit && current === -1n) {
|
||||
break
|
||||
if (type_length) {
|
||||
if (bytes.length >= type_length) break // fixed length
|
||||
} else {
|
||||
// for nonnegative: stop when top byte has signBit = 0 AND shifted value == 0n
|
||||
// for negative: stop when top byte has signBit = 1 AND shifted value == -1n
|
||||
const signBit = byte & 0x80
|
||||
if (!signBit && value === 0n || signBit && value === -1n) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -166,21 +166,41 @@ describe('unconvertDecimal', () => {
|
||||
{ input: 1234567890123456789n, expected: new Uint8Array([0x11, 0x22, 0x10, 0xf4, 0x7d, 0xe9, 0x81, 0x15]) },
|
||||
{ input: -1234567890123456789n, expected: new Uint8Array([0xee, 0xdd, 0xef, 0x0b, 0x82, 0x16, 0x7e, 0xeb]) },
|
||||
]
|
||||
/** @type {SchemaElement} */
|
||||
const element = {
|
||||
name: 'col',
|
||||
type: 'BYTE_ARRAY',
|
||||
}
|
||||
|
||||
it.for(examples)('should convert %p', ({ input, expected }) => {
|
||||
expect(parseDecimal(expected)).toEqual(input)
|
||||
})
|
||||
|
||||
it.for(examples)('should unconvert %p', ({ input, expected }) => {
|
||||
expect(unconvertDecimal(input)).toEqual(expected)
|
||||
expect(unconvertDecimal(element, input)).toEqual(expected)
|
||||
})
|
||||
|
||||
it.for(examples)('should roundtrip %p', ({ input }) => {
|
||||
expect(parseDecimal(unconvertDecimal(input))).toEqual(input)
|
||||
const byteArray = unconvertDecimal(element, input)
|
||||
if (!(byteArray instanceof Uint8Array)) throw new Error('expected Uint8Array')
|
||||
expect(parseDecimal(byteArray)).toEqual(input)
|
||||
})
|
||||
|
||||
it.for(examples)('should reverse roundtrip %p', ({ expected }) => {
|
||||
expect(unconvertDecimal(parseDecimal(expected))).toEqual(expected)
|
||||
expect(unconvertDecimal(element, parseDecimal(expected))).toEqual(expected)
|
||||
})
|
||||
|
||||
it('convert to INT32', () => {
|
||||
expect(unconvertDecimal({ name: 'col', type: 'INT32' }, 1234n)).toEqual(1234)
|
||||
})
|
||||
|
||||
it('convert to INT64', () => {
|
||||
expect(unconvertDecimal({ name: 'col', type: 'INT64' }, 1234n)).toEqual(1234n)
|
||||
})
|
||||
|
||||
it('throws if fixed length is not specified', () => {
|
||||
expect(() => unconvertDecimal({ name: 'col', type: 'FIXED_LEN_BYTE_ARRAY' }, 1234n))
|
||||
.toThrow('fixed length byte array type_length is required')
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user