2024-01-15 19:08:48 +00:00
|
|
|
import { describe, expect, it } from 'vitest'
|
|
|
|
|
import { parquetRead } from '../src/hyparquet.js'
|
2024-04-07 16:33:57 +00:00
|
|
|
import { toJson } from '../src/utils.js'
|
2024-04-12 20:09:31 +00:00
|
|
|
import { fileToAsyncBuffer } from './helpers.js'
|
2024-02-23 18:25:06 +00:00
|
|
|
|
2024-02-17 00:07:09 +00:00
|
|
|
describe('parquetRead', () => {
|
2024-04-12 20:09:31 +00:00
|
|
|
it('throws error for undefined file', async () => {
|
2024-04-05 18:08:10 +00:00
|
|
|
const file = undefined
|
|
|
|
|
await expect(parquetRead({ file }))
|
|
|
|
|
.rejects.toThrow('parquet file is required')
|
|
|
|
|
})
|
|
|
|
|
|
2024-04-18 00:45:15 +00:00
|
|
|
it('throws error for undefined byteLength', async () => {
|
|
|
|
|
const file = { byteLength: undefined, slice: () => new ArrayBuffer(0) }
|
|
|
|
|
await expect(parquetRead({ file }))
|
|
|
|
|
.rejects.toThrow('parquet file byteLength is required')
|
|
|
|
|
})
|
|
|
|
|
|
2024-04-12 20:09:31 +00:00
|
|
|
it('filter by row', async () => {
|
|
|
|
|
const file = fileToAsyncBuffer('test/files/rowgroups.parquet')
|
2024-02-27 03:33:38 +00:00
|
|
|
await parquetRead({
|
2024-04-12 20:09:31 +00:00
|
|
|
file,
|
|
|
|
|
rowEnd: 2,
|
|
|
|
|
onComplete: rows => {
|
|
|
|
|
/* eslint-disable no-sparse-arrays */
|
|
|
|
|
expect(toJson(rows)).toEqual([
|
|
|
|
|
[1], [2],
|
|
|
|
|
])
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('read a single column', async () => {
|
|
|
|
|
const file = fileToAsyncBuffer('test/files/datapage_v2.snappy.parquet')
|
|
|
|
|
await parquetRead({
|
|
|
|
|
file,
|
2024-03-14 22:39:00 +00:00
|
|
|
columns: ['c'],
|
2024-04-12 20:09:31 +00:00
|
|
|
onChunk: chunk => {
|
|
|
|
|
expect(toJson(chunk)).toEqual({
|
2024-03-14 22:39:00 +00:00
|
|
|
columnName: 'c',
|
|
|
|
|
columnData: [2, 3, 4, 5, 2],
|
2024-02-27 03:33:38 +00:00
|
|
|
rowStart: 0,
|
|
|
|
|
rowEnd: 5,
|
|
|
|
|
})
|
|
|
|
|
},
|
|
|
|
|
onComplete: (rows) => {
|
|
|
|
|
/* eslint-disable no-sparse-arrays */
|
|
|
|
|
expect(toJson(rows)).toEqual([
|
2024-03-12 02:35:57 +00:00
|
|
|
[2],
|
|
|
|
|
[3],
|
|
|
|
|
[4],
|
|
|
|
|
[5],
|
|
|
|
|
[2],
|
2024-02-27 03:33:38 +00:00
|
|
|
])
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
})
|
2024-03-14 23:39:03 +00:00
|
|
|
|
2024-04-12 20:09:31 +00:00
|
|
|
it('read a list-like column', async () => {
|
|
|
|
|
const file = fileToAsyncBuffer('test/files/datapage_v2.snappy.parquet')
|
2024-03-14 23:39:03 +00:00
|
|
|
await parquetRead({
|
2024-04-12 20:09:31 +00:00
|
|
|
file,
|
2024-03-14 23:39:03 +00:00
|
|
|
columns: ['e'],
|
2024-04-12 20:09:31 +00:00
|
|
|
onChunk: chunk => {
|
|
|
|
|
expect(toJson(chunk)).toEqual({
|
2024-03-14 23:39:03 +00:00
|
|
|
columnName: 'e',
|
|
|
|
|
columnData: [[1, 2, 3], null, null, [1, 2, 3], [1, 2]],
|
|
|
|
|
rowStart: 0,
|
|
|
|
|
rowEnd: 5,
|
|
|
|
|
})
|
|
|
|
|
},
|
2024-04-12 20:09:31 +00:00
|
|
|
onComplete: rows => {
|
2024-03-14 23:39:03 +00:00
|
|
|
/* eslint-disable no-sparse-arrays */
|
|
|
|
|
expect(toJson(rows)).toEqual([
|
2024-03-12 02:35:57 +00:00
|
|
|
[[1, 2, 3]],
|
|
|
|
|
[null],
|
|
|
|
|
[null],
|
|
|
|
|
[[1, 2, 3]],
|
|
|
|
|
[[1, 2]],
|
|
|
|
|
])
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
})
|
|
|
|
|
|
2024-04-12 20:09:31 +00:00
|
|
|
it('read a map-like column', async () => {
|
|
|
|
|
const file = fileToAsyncBuffer('test/files/Int_Map.parquet')
|
2024-03-12 02:35:57 +00:00
|
|
|
await parquetRead({
|
2024-04-12 20:09:31 +00:00
|
|
|
file,
|
2024-03-12 02:35:57 +00:00
|
|
|
columns: ['int_map'],
|
2024-04-12 20:09:31 +00:00
|
|
|
onChunk: chunk => {
|
|
|
|
|
expect(toJson(chunk)).toEqual({
|
2024-03-12 02:35:57 +00:00
|
|
|
columnName: 'int_map',
|
|
|
|
|
columnData: [
|
|
|
|
|
{ k1: 1, k2: 100 },
|
2024-03-19 06:54:58 +00:00
|
|
|
{ k1: 2, k2: null },
|
2024-03-12 02:35:57 +00:00
|
|
|
{ },
|
2024-03-19 06:54:58 +00:00
|
|
|
{ },
|
|
|
|
|
{ },
|
|
|
|
|
null,
|
|
|
|
|
{ k1: null, k3: null },
|
2024-03-12 02:35:57 +00:00
|
|
|
],
|
|
|
|
|
rowStart: 0,
|
2024-03-19 06:54:58 +00:00
|
|
|
rowEnd: 7,
|
2024-03-12 02:35:57 +00:00
|
|
|
})
|
|
|
|
|
},
|
2024-04-12 20:09:31 +00:00
|
|
|
onComplete: rows => {
|
2024-03-12 02:35:57 +00:00
|
|
|
/* eslint-disable no-sparse-arrays */
|
|
|
|
|
expect(toJson(rows)).toEqual([
|
|
|
|
|
[{ k1: 1, k2: 100 }],
|
2024-03-19 06:54:58 +00:00
|
|
|
[{ k1: 2, k2: null }],
|
|
|
|
|
[{ }],
|
|
|
|
|
[{ }],
|
|
|
|
|
[{ }],
|
|
|
|
|
[null],
|
|
|
|
|
[{ k1: null, k3: null }],
|
2024-03-14 23:39:03 +00:00
|
|
|
])
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
})
|
2024-01-15 19:08:48 +00:00
|
|
|
})
|