mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
106 lines
3.2 KiB
JavaScript
106 lines
3.2 KiB
JavaScript
|
|
import { describe, expect, it } from 'vitest'
|
||
|
|
import { autoSchemaElement, getMaxDefinitionLevel, getMaxRepetitionLevel, schemaFromColumnData } from '../src/schema.js'
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @import {SchemaElement} from 'hyparquet'
|
||
|
|
*/
|
||
|
|
|
||
|
|
describe('schemaFromColumnData', () => {
|
||
|
|
it('honours provided type with nullable = false → REQUIRED', () => {
|
||
|
|
const schema = schemaFromColumnData({
|
||
|
|
columnData: [
|
||
|
|
{ name: 'id', data: new Int32Array([1, 2, 3]), type: 'INT32', nullable: false },
|
||
|
|
],
|
||
|
|
})
|
||
|
|
expect(schema[1]).toEqual({ name: 'id', type: 'INT32', repetition_type: 'REQUIRED' })
|
||
|
|
})
|
||
|
|
|
||
|
|
it('applies valid schema override verbatim', () => {
|
||
|
|
const schema = schemaFromColumnData({
|
||
|
|
columnData: [{ name: 'strings', data: ['a', 'b'] }],
|
||
|
|
schemaOverrides: {
|
||
|
|
strings: {
|
||
|
|
name: 'strings',
|
||
|
|
type: 'BYTE_ARRAY',
|
||
|
|
converted_type: 'UTF8',
|
||
|
|
repetition_type: 'OPTIONAL',
|
||
|
|
},
|
||
|
|
},
|
||
|
|
})
|
||
|
|
expect(schema[1].name).toBe('strings')
|
||
|
|
expect(schema[1].type).toBe('BYTE_ARRAY')
|
||
|
|
expect(schema[1].converted_type).toBe('UTF8')
|
||
|
|
expect(schema[1].repetition_type).toBe('OPTIONAL')
|
||
|
|
})
|
||
|
|
|
||
|
|
it('throws when column lengths differ', () => {
|
||
|
|
expect(() =>
|
||
|
|
schemaFromColumnData({
|
||
|
|
columnData: [
|
||
|
|
{ name: 'a', data: new Int32Array([1]) },
|
||
|
|
{ name: 'b', data: new Int32Array([1, 2]) },
|
||
|
|
],
|
||
|
|
})
|
||
|
|
).toThrow(/columns must have the same length/)
|
||
|
|
})
|
||
|
|
|
||
|
|
it('rejects override with mismatched name', () => {
|
||
|
|
expect(() =>
|
||
|
|
schemaFromColumnData({
|
||
|
|
columnData: [{ name: 'x', data: new Int32Array([1]) }],
|
||
|
|
schemaOverrides: { x: { name: 'y', type: 'INT32' } },
|
||
|
|
})
|
||
|
|
).toThrow(/does not match column name/)
|
||
|
|
})
|
||
|
|
})
|
||
|
|
|
||
|
|
describe('autoSchemaElement', () => {
|
||
|
|
it.each([
|
||
|
|
[new Int32Array([1, 2]), 'INT32'],
|
||
|
|
[new BigInt64Array([1n, 2n]), 'INT64'],
|
||
|
|
[new Float32Array([1, 2]), 'FLOAT'],
|
||
|
|
[new Float64Array([1, 2]), 'DOUBLE'],
|
||
|
|
])('detects typed arrays (%#)', (data, expected) => {
|
||
|
|
const el = autoSchemaElement('col', data)
|
||
|
|
expect(el.type).toBe(expected)
|
||
|
|
expect(el.repetition_type).toBe('REQUIRED')
|
||
|
|
})
|
||
|
|
|
||
|
|
it('promotes INT32 + DOUBLE mix to DOUBLE', () => {
|
||
|
|
const el = autoSchemaElement('mix', [1, 2.5])
|
||
|
|
expect(el.type).toBe('DOUBLE')
|
||
|
|
})
|
||
|
|
|
||
|
|
it('sets repetition_type OPTIONAL when nulls present', () => {
|
||
|
|
const el = autoSchemaElement('maybe', [null, 1])
|
||
|
|
expect(el.repetition_type).toBe('OPTIONAL')
|
||
|
|
})
|
||
|
|
|
||
|
|
it('falls back to BYTE_ARRAY for empty arrays', () => {
|
||
|
|
const el = autoSchemaElement('empty', [])
|
||
|
|
expect(el.type).toBe('BYTE_ARRAY')
|
||
|
|
expect(el.repetition_type).toBe('OPTIONAL')
|
||
|
|
})
|
||
|
|
|
||
|
|
it('throws on incompatible mixed scalar types', () => {
|
||
|
|
expect(() => autoSchemaElement('bad', [1, 'a'])).toThrow(/mixed types/)
|
||
|
|
})
|
||
|
|
})
|
||
|
|
|
||
|
|
describe('level helpers', () => {
|
||
|
|
/** @type {SchemaElement[]} */
|
||
|
|
const path = [
|
||
|
|
{ name: 'root', repetition_type: 'REPEATED' },
|
||
|
|
{ name: 'child', repetition_type: 'OPTIONAL' },
|
||
|
|
{ name: 'leaf', repetition_type: 'REPEATED' },
|
||
|
|
]
|
||
|
|
|
||
|
|
it('computes max repetition level', () => {
|
||
|
|
expect(getMaxRepetitionLevel(path)).toBe(2)
|
||
|
|
})
|
||
|
|
|
||
|
|
it('computes max definition level', () => {
|
||
|
|
expect(getMaxDefinitionLevel(path)).toBe(2)
|
||
|
|
})
|
||
|
|
})
|