Update schemaOverrides to allow nested column types, and fix the README

This commit is contained in:
Kenny Daniel 2025-10-23 12:15:14 -07:00
parent 4715a4a429
commit ee346e7d08
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
3 changed files with 5 additions and 21 deletions

@ -129,21 +129,25 @@ Most converted types will be auto-detected if you just provide data with no type
You can use mostly automatic schema detection, but override the schema for specific columns. This is useful if most of the column types can be automatically determined, but you want to use a specific schema element for one particular element.
```javascript
import { parquetWrite, schemaFromColumnData } from 'hyparquet-writer'
const { ByteWriter, parquetWrite, schemaFromColumnData } = await import("hyparquet-writer")
const columnData = [
{ name: 'unsigned_int', data: [1000000, 2000000] },
{ name: 'signed_int', data: [1000000, 2000000] },
]
const writer = new ByteWriter()
parquetWrite({
writer,
columnData,
// override schema for uint column
schema: schemaFromColumnData({
columnData,
schemaOverrides: {
unsigned_int: {
name: 'unsigned_int',
type: 'INT32',
converted_type: 'UINT_32',
repetition_type: 'REQUIRED',
},
},
}),

@ -26,8 +26,6 @@ export function schemaFromColumnData({ columnData, schemaOverrides }) {
// use schema override
const override = schemaOverrides[name]
if (override.name !== name) throw new Error('schema override name does not match column name')
if (override.num_children) throw new Error('schema override cannot have children')
if (override.repetition_type === 'REPEATED') throw new Error('schema override cannot be repeated')
schema.push(override)
} else if (type) {
// use provided type

@ -44,24 +44,6 @@ describe('schemaFromColumnData', () => {
).toThrow(/columns must have the same length/)
})
it('rejects override type REPEATED', () => {
expect(() =>
schemaFromColumnData({
columnData: [{ name: 'x', data: new Int32Array([1]) }],
schemaOverrides: { x: { name: 'x', type: 'INT32', repetition_type: 'REPEATED' } },
})
).toThrow(/cannot be repeated/)
})
it('rejects override with children', () => {
expect(() =>
schemaFromColumnData({
columnData: [{ name: 'x', data: new Int32Array([1]) }],
schemaOverrides: { x: { name: 'x', type: 'INT32', num_children: 1 } },
})
).toThrow(/cannot have children/)
})
it('rejects override with mismatched name', () => {
expect(() =>
schemaFromColumnData({