hyparquet-writer/test/write.test.js
Kenny Daniel 947a78f72d
Choose best of RLE or bit-packed for hybrid encoding
sparse booleans: 2634 -> 1399 bytes
2025-03-25 23:02:24 -07:00

55 lines
1.9 KiB
JavaScript

import { parquetMetadata, parquetReadObjects } from 'hyparquet'
import { describe, expect, it } from 'vitest'
import { parquetWrite } from '../src/index.js'
import { exampleMetadata } from './metadata.test.js'
/**
* Utility to encode a parquet file and then read it back into a JS object.
*
* @param {Record<string, any[]>} columnData
* @returns {Promise<Record<string, any>>}
*/
async function roundTripDeserialize(columnData) {
const file = parquetWrite(columnData)
return await parquetReadObjects({ file })
}
const data = {
bool: [true, false, true, false], // BOOLEAN
int: [0, 127, 0x7fff, 0x7fffffff], // INT32
bigint: [0n, 127n, 0x7fffn, 0x7fffffffffffffffn], // INT64
double: [0, 0.0001, 123.456, 1e100], // DOUBLE
string: ['a', 'b', 'c', 'd'], // BYTE_ARRAY
nullable: [true, false, null, null], // BOOLEAN nullable
}
describe('parquetWrite', () => {
it('writes expected metadata', () => {
const file = parquetWrite(data)
const metadata = parquetMetadata(file)
expect(metadata).toEqual(exampleMetadata)
})
it('serializes basic types correctly', async () => {
const result = await roundTripDeserialize(data)
expect(result).toEqual([
{ bool: true, int: 0, bigint: 0n, double: 0, string: 'a', nullable: true },
{ bool: false, int: 127, bigint: 127n, double: 0.0001, string: 'b', nullable: false },
{ bool: true, int: 0x7fff, bigint: 0x7fffn, double: 123.456, string: 'c', nullable: null },
{ bool: false, int: 0x7fffffff, bigint: 0x7fffffffffffffffn, double: 1e100, string: 'd', nullable: null },
])
})
it('efficiently serializes sparse booleans', () => {
const bool = Array(10000).fill(null)
bool[10] = true
bool[100] = false
bool[500] = true
bool[9999] = false
const buffer = parquetWrite({ bool })
expect(buffer.byteLength).toBe(1399)
const metadata = parquetMetadata(buffer)
expect(metadata.metadata_length).toBe(89)
})
})