hyparquet/test/column.test.js

62 lines
2.7 KiB
JavaScript
Raw Normal View History

import { describe, expect, it } from 'vitest'
2024-12-20 08:53:56 +00:00
import { getColumnRange, readColumn } from '../src/column.js'
import { parquetMetadata } from '../src/hyparquet.js'
import { getSchemaPath } from '../src/schema.js'
import { asyncBufferFromFile } from '../src/utils.js'
2024-12-20 08:53:56 +00:00
const values = [null, 1, -2, NaN, 0, -1, -0, 2]
2024-12-20 08:53:56 +00:00
describe('readColumn', () => {
it.for([
{ rowGroupEnd: Infinity, expected: [values] },
{ rowGroupEnd: 2, expected: [values.slice(0, 2)] },
{ rowGroupEnd: 0, expected: [] },
])('readColumn with rowGroupEnd %p', async ({ rowGroupEnd, expected }) => {
const testFile = 'test/files/float16_nonzeros_and_nans.parquet'
const file = await asyncBufferFromFile(testFile)
const arrayBuffer = await file.slice(0)
const metadata = parquetMetadata(arrayBuffer)
const column = metadata.row_groups[0].columns[0]
if (!column.meta_data) throw new Error(`No column metadata for ${testFile}`)
const [columnStartByte, columnEndByte] = getColumnRange(column.meta_data).map(Number)
const columnArrayBuffer = arrayBuffer.slice(columnStartByte, columnEndByte)
const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? [])
const reader = { view: new DataView(columnArrayBuffer), offset: 0 }
const columnDecoder = {
columnName: column.meta_data.path_in_schema.join('.'),
type: column.meta_data.type,
element: schemaPath[schemaPath.length - 1].element,
schemaPath,
codec: column.meta_data.codec,
}
const result = readColumn(reader, 0, rowGroupEnd, columnDecoder)
expect(result).toEqual(expected)
})
it('readColumn should return a typed array', async () => {
const testFile = 'test/files/datapage_v2.snappy.parquet'
const file = await asyncBufferFromFile(testFile)
const arrayBuffer = await file.slice(0)
const metadata = parquetMetadata(arrayBuffer)
const column = metadata.row_groups[0].columns[1] // second column
if (!column.meta_data) throw new Error(`No column metadata for ${testFile}`)
const [columnStartByte, columnEndByte] = getColumnRange(column.meta_data).map(Number)
const columnArrayBuffer = arrayBuffer.slice(columnStartByte, columnEndByte)
const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? [])
const reader = { view: new DataView(columnArrayBuffer), offset: 0 }
const columnDecoder = {
columnName: column.meta_data.path_in_schema.join('.'),
type: column.meta_data.type,
element: schemaPath[schemaPath.length - 1].element,
schemaPath,
codec: column.meta_data.codec,
}
const columnData = readColumn(reader, 0, Infinity, columnDecoder)
expect(columnData[0]).toBeInstanceOf(Int32Array)
})
})