2024-01-20 20:17:11 +00:00
|
|
|
import { describe, expect, it } from 'vitest'
|
|
|
|
|
import { parquetMetadata, parquetSchema } from '../src/hyparquet.js'
|
|
|
|
|
import { readFileToArrayBuffer } from './helpers.js'
|
|
|
|
|
|
2024-05-23 06:45:02 +00:00
|
|
|
describe('parquetSchema', () => {
|
2024-04-12 20:09:31 +00:00
|
|
|
it('parse schema tree from rowgroups.parquet', async () => {
|
2024-01-20 20:17:11 +00:00
|
|
|
const arrayBuffer = await readFileToArrayBuffer('test/files/rowgroups.parquet')
|
|
|
|
|
const metadata = parquetMetadata(arrayBuffer)
|
|
|
|
|
const result = parquetSchema(metadata)
|
|
|
|
|
expect(result).toEqual(rowgroupsSchema)
|
|
|
|
|
})
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
// Parquet v2 from pandas with 2 row groups
|
|
|
|
|
const rowgroupsSchema = {
|
|
|
|
|
children: [
|
|
|
|
|
{
|
|
|
|
|
children: [],
|
|
|
|
|
count: 1,
|
|
|
|
|
element: {
|
|
|
|
|
name: 'numbers',
|
2024-02-11 22:33:56 +00:00
|
|
|
repetition_type: 'OPTIONAL',
|
2024-02-27 19:06:31 +00:00
|
|
|
type: 'INT64',
|
2024-01-20 20:17:11 +00:00
|
|
|
},
|
2024-05-06 20:18:27 +00:00
|
|
|
path: ['numbers'],
|
2024-01-20 20:17:11 +00:00
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
count: 2,
|
|
|
|
|
element: {
|
|
|
|
|
name: 'schema',
|
|
|
|
|
num_children: 1,
|
2024-02-11 22:33:56 +00:00
|
|
|
repetition_type: 'REQUIRED',
|
2024-01-20 20:17:11 +00:00
|
|
|
},
|
2024-05-06 20:18:27 +00:00
|
|
|
path: [],
|
2024-01-20 20:17:11 +00:00
|
|
|
}
|