From ee346e7d0890a9cca62475de728ccdeb4d77df52 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Thu, 23 Oct 2025 12:15:14 -0700 Subject: [PATCH] Update schemaOverrides to allow nested column types, and fix the README --- README.md | 6 +++++- src/schema.js | 2 -- test/schema.test.js | 18 ------------------ 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index dd5cea6..3f682ea 100644 --- a/README.md +++ b/README.md @@ -129,21 +129,25 @@ Most converted types will be auto-detected if you just provide data with no type You can use mostly automatic schema detection, but override the schema for specific columns. This is useful if most of the column types can be automatically determined, but you want to use a specific schema element for one particular element. ```javascript -import { parquetWrite, schemaFromColumnData } from 'hyparquet-writer' +const { ByteWriter, parquetWrite, schemaFromColumnData } = await import("hyparquet-writer") const columnData = [ { name: 'unsigned_int', data: [1000000, 2000000] }, { name: 'signed_int', data: [1000000, 2000000] }, ] +const writer = new ByteWriter() parquetWrite({ + writer, columnData, // override schema for uint column schema: schemaFromColumnData({ columnData, schemaOverrides: { unsigned_int: { + name: 'unsigned_int', type: 'INT32', converted_type: 'UINT_32', + repetition_type: 'REQUIRED', }, }, }), diff --git a/src/schema.js b/src/schema.js index 0f2ec1c..0e31f28 100644 --- a/src/schema.js +++ b/src/schema.js @@ -26,8 +26,6 @@ export function schemaFromColumnData({ columnData, schemaOverrides }) { // use schema override const override = schemaOverrides[name] if (override.name !== name) throw new Error('schema override name does not match column name') - if (override.num_children) throw new Error('schema override cannot have children') - if (override.repetition_type === 'REPEATED') throw new Error('schema override cannot be repeated') schema.push(override) } else if (type) { // use provided type diff --git a/test/schema.test.js b/test/schema.test.js index f235493..35d7d1f 100644 --- a/test/schema.test.js +++ b/test/schema.test.js @@ -44,24 +44,6 @@ describe('schemaFromColumnData', () => { ).toThrow(/columns must have the same length/) }) - it('rejects override type REPEATED', () => { - expect(() => - schemaFromColumnData({ - columnData: [{ name: 'x', data: new Int32Array([1]) }], - schemaOverrides: { x: { name: 'x', type: 'INT32', repetition_type: 'REPEATED' } }, - }) - ).toThrow(/cannot be repeated/) - }) - - it('rejects override with children', () => { - expect(() => - schemaFromColumnData({ - columnData: [{ name: 'x', data: new Int32Array([1]) }], - schemaOverrides: { x: { name: 'x', type: 'INT32', num_children: 1 } }, - }) - ).toThrow(/cannot have children/) - }) - it('rejects override with mismatched name', () => { expect(() => schemaFromColumnData({