diff --git a/README.md b/README.md index b942a4a..6a056fb 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![minzipped](https://img.shields.io/bundlephobia/minzip/hyparquet-writer)](https://www.npmjs.com/package/hyparquet-writer) [![workflow status](https://github.com/hyparam/hyparquet-writer/actions/workflows/ci.yml/badge.svg)](https://github.com/hyparam/hyparquet-writer/actions) [![mit license](https://img.shields.io/badge/License-MIT-orange.svg)](https://opensource.org/licenses/MIT) -![coverage](https://img.shields.io/badge/Coverage-96-darkred) +![coverage](https://img.shields.io/badge/Coverage-97-darkred) [![dependencies](https://img.shields.io/badge/Dependencies-0-blueviolet)](https://www.npmjs.com/package/hyparquet?activeTab=dependencies) ## Usage diff --git a/src/schema.js b/src/schema.js index 62f77e8..2692773 100644 --- a/src/schema.js +++ b/src/schema.js @@ -46,7 +46,7 @@ export function getSchemaElementForValues(name, values, type) { converted_type = 'JSON' valueType = 'BYTE_ARRAY' } - else if (!valueType) throw new Error(`Cannot determine parquet type for: ${value}`) + else if (!valueType) throw new Error(`cannot determine parquet type for: ${value}`) // expand type if necessary if (type === undefined) { diff --git a/src/write.js b/src/write.js index 04a52dc..b8827de 100644 --- a/src/write.js +++ b/src/write.js @@ -20,7 +20,7 @@ export function parquetWrite({ columnData, compressed = true }) { const num_rows = columnData.length ? BigInt(columnData[0].data.length) : 0n for (const { data } of columnData) { if (BigInt(data.length) !== num_rows) { - throw new Error('parquetWrite: all columns must have the same length') + throw new Error('columns must have the same length') } } diff --git a/test/write.test.js b/test/write.test.js index fa40b12..e4ee465 100644 --- a/test/write.test.js +++ b/test/write.test.js @@ -75,13 +75,16 @@ describe('parquetWrite', () => { }) it('efficiently serializes column with few distinct values', async () => { - const data = Array(10000).fill('aaaa') + const data = Array(100000) + .fill('aaaa', 0, 50000) + .fill('bbbb', 50000, 100000) const file = parquetWrite({ columnData: [{ name: 'string', data }] }) - expect(file.byteLength).toBe(161) + expect(file.byteLength).toBe(178) // round trip const result = await parquetReadObjects({ file }) - expect(result.length).toBe(10000) + expect(result.length).toBe(100000) expect(result[0]).toEqual({ string: 'aaaa' }) + expect(result[50000]).toEqual({ string: 'bbbb' }) }) it('serializes list types', async () => { @@ -150,6 +153,25 @@ describe('parquetWrite', () => { ]) }) + it('serializes empty table', async () => { + const result = await roundTripDeserialize([]) + expect(result).toEqual([]) + }) + + it('handles special numeric values', async () => { + const data = [ + { name: 'double', data: [NaN, Infinity, -Infinity, 42, 0, -0] }, + ] + const result = await roundTripDeserialize(data) + expect(result[0].double).toBeNaN() + expect(result[1].double).toEqual(Infinity) + expect(result[2].double).toEqual(-Infinity) + expect(result[3].double).toEqual(42) + expect(result[4].double).toEqual(0) + expect(result[5].double).toEqual(-0) + expect(result[5].double).not.toEqual(0) + }) + it('throws for wrong type specified', () => { expect(() => parquetWrite({ columnData: [{ name: 'int', data: [1, 2, 3], type: 'BOOLEAN' }] })) .toThrow('parquet cannot write mixed types') @@ -166,4 +188,16 @@ describe('parquetWrite', () => { expect(() => parquetWrite({ columnData: [{ name: 'mixed', data: [1, 2, 3, 'boom'] }] })) .toThrow('mixed types not supported') }) + + it('throws error when columns have mismatched lengths', () => { + expect(() => parquetWrite({ columnData: [ + { name: 'col1', data: [1, 2, 3] }, + { name: 'col2', data: [4, 5] }, + ] })).toThrow('columns must have the same length') + }) + + it('throws error for unsupported data types', () => { + expect(() => parquetWrite({ columnData: [{ name: 'func', data: [() => {}] }] })) + .toThrow('cannot determine parquet type for: () => {}') + }) })