2024-04-12 20:09:31 +00:00
|
|
|
import fs from 'fs'
|
2024-05-20 09:07:40 +00:00
|
|
|
import { compressors } from 'hyparquet-compressors'
|
2024-04-12 20:09:31 +00:00
|
|
|
import { describe, expect, it } from 'vitest'
|
2025-05-30 20:01:20 +00:00
|
|
|
import { parquetMetadataAsync, parquetRead, toJson } from '../src/hyparquet.js'
|
|
|
|
|
import { asyncBufferFromFile } from '../src/node.js'
|
2024-07-26 22:01:01 +00:00
|
|
|
import { fileToJson } from './helpers.js'
|
2024-04-12 20:09:31 +00:00
|
|
|
|
|
|
|
|
describe('parquetRead test files', () => {
|
|
|
|
|
const files = fs.readdirSync('test/files').filter(f => f.endsWith('.parquet'))
|
|
|
|
|
|
|
|
|
|
files.forEach(filename => {
|
|
|
|
|
it(`parse data from ${filename}`, async () => {
|
2024-07-26 22:01:01 +00:00
|
|
|
const file = await asyncBufferFromFile(`test/files/${filename}`)
|
2024-04-12 20:09:31 +00:00
|
|
|
await parquetRead({
|
|
|
|
|
file,
|
|
|
|
|
compressors,
|
2025-03-11 02:32:31 +00:00
|
|
|
onComplete(rows) {
|
2024-04-12 20:09:31 +00:00
|
|
|
const base = filename.replace('.parquet', '')
|
|
|
|
|
const expected = fileToJson(`test/files/${base}.json`)
|
2025-04-07 04:21:24 +00:00
|
|
|
// stringify and parse to make legal json (NaN, -0, etc)
|
2024-05-13 16:22:55 +00:00
|
|
|
expect(JSON.parse(JSON.stringify(toJson(rows)))).toEqual(expected)
|
2024-04-12 20:09:31 +00:00
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
})
|
2025-04-07 04:21:24 +00:00
|
|
|
|
|
|
|
|
it(`read the last row from ${filename}`, async () => {
|
|
|
|
|
// this exercises some of the page-skipping optimizations
|
|
|
|
|
const file = await asyncBufferFromFile(`test/files/${filename}`)
|
|
|
|
|
const metadata = await parquetMetadataAsync(file)
|
|
|
|
|
let numRows = Number(metadata.num_rows)
|
|
|
|
|
// repeated_no_annotation has wrong num_rows in metadata:
|
|
|
|
|
if (filename === 'repeated_no_annotation.parquet') numRows = 6
|
|
|
|
|
await parquetRead({
|
|
|
|
|
file,
|
|
|
|
|
compressors,
|
|
|
|
|
rowStart: numRows - 1,
|
|
|
|
|
rowEnd: numRows,
|
|
|
|
|
onComplete(rows) {
|
|
|
|
|
const base = filename.replace('.parquet', '')
|
|
|
|
|
const expected = [fileToJson(`test/files/${base}.json`).at(-1)]
|
|
|
|
|
expect(toJson(rows)).toEqual(expected)
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
})
|
2024-04-12 20:09:31 +00:00
|
|
|
})
|
|
|
|
|
})
|