hyparquet/test/schemaTree.test.js

63 lines
1.5 KiB
JavaScript
Raw Normal View History

2024-01-20 20:17:11 +00:00
import { describe, expect, it } from 'vitest'
import { parquetMetadata, parquetSchema } from '../src/hyparquet.js'
import { readFileToArrayBuffer } from './helpers.js'
describe('schemaTree', () => {
it('parse schema tree from addrtype-missing-value.parquet', async () => {
2024-01-20 20:17:11 +00:00
const arrayBuffer = await readFileToArrayBuffer('test/files/addrtype-missing-value.parquet')
const metadata = parquetMetadata(arrayBuffer)
const result = parquetSchema(metadata)
expect(result).toEqual(addrtypeSchema)
})
it('parse schema tree from rowgroups.parquet', async () => {
2024-01-20 20:17:11 +00:00
const arrayBuffer = await readFileToArrayBuffer('test/files/rowgroups.parquet')
const metadata = parquetMetadata(arrayBuffer)
const result = parquetSchema(metadata)
expect(result).toEqual(rowgroupsSchema)
})
})
// Parquet v1 from DuckDB
const addrtypeSchema = {
children: [
{
children: [],
count: 1,
element: {
2024-02-11 22:33:56 +00:00
converted_type: 'UTF8',
2024-01-20 20:17:11 +00:00
name: 'ADDRTYPE',
2024-02-11 22:33:56 +00:00
repetition_type: 'OPTIONAL',
2024-02-27 19:06:31 +00:00
type: 'BYTE_ARRAY',
2024-01-20 20:17:11 +00:00
},
},
],
count: 2,
element: {
name: 'duckdb_schema',
num_children: 1,
2024-02-11 22:33:56 +00:00
repetition_type: 'REQUIRED',
2024-01-20 20:17:11 +00:00
},
}
// Parquet v2 from pandas with 2 row groups
const rowgroupsSchema = {
children: [
{
children: [],
count: 1,
element: {
name: 'numbers',
2024-02-11 22:33:56 +00:00
repetition_type: 'OPTIONAL',
2024-02-27 19:06:31 +00:00
type: 'INT64',
2024-01-20 20:17:11 +00:00
},
},
],
count: 2,
element: {
name: 'schema',
num_children: 1,
2024-02-11 22:33:56 +00:00
repetition_type: 'REQUIRED',
2024-01-20 20:17:11 +00:00
},
}