Moar tests

This commit is contained in:
Kenny Daniel 2025-03-29 12:28:25 -07:00
parent 81def93ce4
commit 07928e8eb7
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
4 changed files with 40 additions and 6 deletions

@ -4,7 +4,7 @@
[![minzipped](https://img.shields.io/bundlephobia/minzip/hyparquet-writer)](https://www.npmjs.com/package/hyparquet-writer)
[![workflow status](https://github.com/hyparam/hyparquet-writer/actions/workflows/ci.yml/badge.svg)](https://github.com/hyparam/hyparquet-writer/actions)
[![mit license](https://img.shields.io/badge/License-MIT-orange.svg)](https://opensource.org/licenses/MIT)
![coverage](https://img.shields.io/badge/Coverage-96-darkred)
![coverage](https://img.shields.io/badge/Coverage-97-darkred)
[![dependencies](https://img.shields.io/badge/Dependencies-0-blueviolet)](https://www.npmjs.com/package/hyparquet?activeTab=dependencies)
## Usage

@ -46,7 +46,7 @@ export function getSchemaElementForValues(name, values, type) {
converted_type = 'JSON'
valueType = 'BYTE_ARRAY'
}
else if (!valueType) throw new Error(`Cannot determine parquet type for: ${value}`)
else if (!valueType) throw new Error(`cannot determine parquet type for: ${value}`)
// expand type if necessary
if (type === undefined) {

@ -20,7 +20,7 @@ export function parquetWrite({ columnData, compressed = true }) {
const num_rows = columnData.length ? BigInt(columnData[0].data.length) : 0n
for (const { data } of columnData) {
if (BigInt(data.length) !== num_rows) {
throw new Error('parquetWrite: all columns must have the same length')
throw new Error('columns must have the same length')
}
}

@ -75,13 +75,16 @@ describe('parquetWrite', () => {
})
it('efficiently serializes column with few distinct values', async () => {
const data = Array(10000).fill('aaaa')
const data = Array(100000)
.fill('aaaa', 0, 50000)
.fill('bbbb', 50000, 100000)
const file = parquetWrite({ columnData: [{ name: 'string', data }] })
expect(file.byteLength).toBe(161)
expect(file.byteLength).toBe(178)
// round trip
const result = await parquetReadObjects({ file })
expect(result.length).toBe(10000)
expect(result.length).toBe(100000)
expect(result[0]).toEqual({ string: 'aaaa' })
expect(result[50000]).toEqual({ string: 'bbbb' })
})
it('serializes list types', async () => {
@ -150,6 +153,25 @@ describe('parquetWrite', () => {
])
})
it('serializes empty table', async () => {
const result = await roundTripDeserialize([])
expect(result).toEqual([])
})
it('handles special numeric values', async () => {
const data = [
{ name: 'double', data: [NaN, Infinity, -Infinity, 42, 0, -0] },
]
const result = await roundTripDeserialize(data)
expect(result[0].double).toBeNaN()
expect(result[1].double).toEqual(Infinity)
expect(result[2].double).toEqual(-Infinity)
expect(result[3].double).toEqual(42)
expect(result[4].double).toEqual(0)
expect(result[5].double).toEqual(-0)
expect(result[5].double).not.toEqual(0)
})
it('throws for wrong type specified', () => {
expect(() => parquetWrite({ columnData: [{ name: 'int', data: [1, 2, 3], type: 'BOOLEAN' }] }))
.toThrow('parquet cannot write mixed types')
@ -166,4 +188,16 @@ describe('parquetWrite', () => {
expect(() => parquetWrite({ columnData: [{ name: 'mixed', data: [1, 2, 3, 'boom'] }] }))
.toThrow('mixed types not supported')
})
it('throws error when columns have mismatched lengths', () => {
expect(() => parquetWrite({ columnData: [
{ name: 'col1', data: [1, 2, 3] },
{ name: 'col2', data: [4, 5] },
] })).toThrow('columns must have the same length')
})
it('throws error for unsupported data types', () => {
expect(() => parquetWrite({ columnData: [{ name: 'func', data: [() => {}] }] }))
.toThrow('cannot determine parquet type for: () => {}')
})
})