hyparquet/test/read.test.js

127 lines
3.2 KiB
JavaScript
Raw Normal View History

2024-01-15 19:08:48 +00:00
import { describe, expect, it } from 'vitest'
import { parquetRead } from '../src/hyparquet.js'
2024-07-26 22:01:01 +00:00
import { asyncBufferFromFile, toJson } from '../src/utils.js'
2024-02-23 18:25:06 +00:00
describe('parquetRead', () => {
it('throws error for undefined file', async () => {
2024-04-26 19:52:42 +00:00
// @ts-expect-error testing invalid input
await expect(parquetRead({ file: undefined }))
.rejects.toThrow('parquet file is required')
})
2024-04-18 00:45:15 +00:00
it('throws error for undefined byteLength', async () => {
const file = { byteLength: undefined, slice: () => new ArrayBuffer(0) }
2024-04-26 19:52:42 +00:00
// @ts-expect-error testing invalid input
2024-04-18 00:45:15 +00:00
await expect(parquetRead({ file }))
.rejects.toThrow('parquet file byteLength is required')
})
it('filter by row', async () => {
2024-07-26 22:01:01 +00:00
const file = await asyncBufferFromFile('test/files/rowgroups.parquet')
2024-02-27 03:33:38 +00:00
await parquetRead({
file,
2024-06-08 02:30:30 +00:00
rowStart: 2,
rowEnd: 4,
onComplete: rows => {
2024-06-08 02:30:30 +00:00
expect(toJson(rows)).toEqual([[3], [4]])
},
})
})
it('filter by row overestimate', async () => {
2024-07-26 22:01:01 +00:00
const file = await asyncBufferFromFile('test/files/rowgroups.parquet')
2024-06-08 02:30:30 +00:00
await parquetRead({
file,
rowEnd: 100,
onComplete: rows => {
expect(toJson(rows)).toEqual([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15]])
},
})
})
it('read a single column', async () => {
2024-07-26 22:01:01 +00:00
const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
await parquetRead({
file,
2024-03-14 22:39:00 +00:00
columns: ['c'],
onChunk: chunk => {
expect(toJson(chunk)).toEqual({
2024-03-14 22:39:00 +00:00
columnName: 'c',
columnData: [2, 3, 4, 5, 2],
2024-02-27 03:33:38 +00:00
rowStart: 0,
rowEnd: 5,
})
},
onComplete: (rows) => {
expect(toJson(rows)).toEqual([
2024-03-12 02:35:57 +00:00
[2],
[3],
[4],
[5],
[2],
2024-02-27 03:33:38 +00:00
])
},
})
})
2024-03-14 23:39:03 +00:00
it('read a list-like column', async () => {
2024-07-26 22:01:01 +00:00
const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
2024-03-14 23:39:03 +00:00
await parquetRead({
file,
2024-03-14 23:39:03 +00:00
columns: ['e'],
onChunk: chunk => {
expect(toJson(chunk)).toEqual({
2024-03-14 23:39:03 +00:00
columnName: 'e',
columnData: [[1, 2, 3], null, null, [1, 2, 3], [1, 2]],
rowStart: 0,
rowEnd: 5,
})
},
onComplete: rows => {
2024-03-14 23:39:03 +00:00
expect(toJson(rows)).toEqual([
2024-03-12 02:35:57 +00:00
[[1, 2, 3]],
[null],
[null],
[[1, 2, 3]],
[[1, 2]],
])
},
})
})
it('read a map-like column', async () => {
2024-07-26 22:01:01 +00:00
const file = await asyncBufferFromFile('test/files/nullable.impala.parquet')
2024-03-12 02:35:57 +00:00
await parquetRead({
file,
2024-03-12 02:35:57 +00:00
columns: ['int_map'],
onChunk: chunk => {
expect(toJson(chunk)).toEqual({
2024-03-12 02:35:57 +00:00
columnName: 'int_map',
columnData: [
{ k1: 1, k2: 100 },
2024-03-19 06:54:58 +00:00
{ k1: 2, k2: null },
2024-03-12 02:35:57 +00:00
{ },
2024-03-19 06:54:58 +00:00
{ },
{ },
null,
{ k1: null, k3: null },
2024-03-12 02:35:57 +00:00
],
rowStart: 0,
2024-03-19 06:54:58 +00:00
rowEnd: 7,
2024-03-12 02:35:57 +00:00
})
},
onComplete: rows => {
2024-03-12 02:35:57 +00:00
expect(toJson(rows)).toEqual([
[{ k1: 1, k2: 100 }],
2024-03-19 06:54:58 +00:00
[{ k1: 2, k2: null }],
[{ }],
[{ }],
[{ }],
[null],
[{ k1: null, k3: null }],
2024-03-14 23:39:03 +00:00
])
},
})
})
2024-01-15 19:08:48 +00:00
})