hyparquet/test/read.test.js

117 lines
3.1 KiB
JavaScript
Raw Normal View History

2024-02-14 05:11:34 +00:00
import fs from 'fs'
2024-01-15 19:08:48 +00:00
import { describe, expect, it } from 'vitest'
2024-02-23 18:25:06 +00:00
import { gunzipSync } from 'zlib'
2024-01-15 19:08:48 +00:00
import { parquetRead } from '../src/hyparquet.js'
import { toJson } from '../src/toJson.js'
2024-02-14 05:11:34 +00:00
import { fileToAsyncBuffer, fileToJson } from './helpers.js'
2024-01-15 19:08:48 +00:00
2024-02-23 18:25:06 +00:00
/**
* @typedef {import('../src/types.js').Compressors} Compressors
* @type {Compressors}
*/
const compressors = {
GZIP: (/** @type {Uint8Array} */ input, /** @type {number} */ outputLength) => {
2024-02-23 18:25:06 +00:00
const result = gunzipSync(input)
return new Uint8Array(result.buffer, result.byteOffset, outputLength)
2024-02-23 18:25:06 +00:00
},
}
describe('parquetRead', () => {
2024-03-15 04:13:30 +00:00
const files = fs.readdirSync('test/files').filter(f => f.endsWith('.parquet'))
2024-02-24 19:01:08 +00:00
files.forEach(file => {
it(`should parse data from ${file}`, async () => {
2024-02-14 05:11:34 +00:00
const asyncBuffer = fileToAsyncBuffer(`test/files/${file}`)
await parquetRead({
file: asyncBuffer,
2024-02-23 18:25:06 +00:00
compressors,
2024-02-14 05:11:34 +00:00
onComplete: (rows) => {
const base = file.replace('.parquet', '')
const expected = fileToJson(`test/files/${base}.json`)
expect(toJson(rows)).toEqual(expected)
},
})
2024-02-24 19:01:08 +00:00
})
2024-01-15 19:08:48 +00:00
})
2024-02-27 03:33:38 +00:00
it('should read a single column from a file', async () => {
const asyncBuffer = fileToAsyncBuffer('test/files/datapage_v2.snappy.parquet')
await parquetRead({
file: asyncBuffer,
2024-03-14 22:39:00 +00:00
columns: ['c'],
2024-02-27 03:33:38 +00:00
onChunk: (rows) => {
expect(toJson(rows)).toEqual({
2024-03-14 22:39:00 +00:00
columnName: 'c',
columnData: [2, 3, 4, 5, 2],
2024-02-27 03:33:38 +00:00
rowStart: 0,
rowEnd: 5,
})
},
onComplete: (rows) => {
/* eslint-disable no-sparse-arrays */
expect(toJson(rows)).toEqual([
2024-03-12 02:35:57 +00:00
[2],
[3],
[4],
[5],
[2],
2024-02-27 03:33:38 +00:00
])
},
})
})
2024-03-14 23:39:03 +00:00
it('should read a list-like column from a file', async () => {
const asyncBuffer = fileToAsyncBuffer('test/files/datapage_v2.snappy.parquet')
await parquetRead({
file: asyncBuffer,
columns: ['e'],
onChunk: (rows) => {
expect(toJson(rows)).toEqual({
columnName: 'e',
columnData: [[1, 2, 3], null, null, [1, 2, 3], [1, 2]],
rowStart: 0,
rowEnd: 5,
})
},
onComplete: (rows) => {
/* eslint-disable no-sparse-arrays */
expect(toJson(rows)).toEqual([
2024-03-12 02:35:57 +00:00
[[1, 2, 3]],
[null],
[null],
[[1, 2, 3]],
[[1, 2]],
])
},
})
})
it('should read a map-like column from a file', async () => {
const asyncBuffer = fileToAsyncBuffer('test/files/Int_Map.parquet')
await parquetRead({
file: asyncBuffer,
columns: ['int_map'],
onChunk: (rows) => {
expect(toJson(rows)).toEqual({
columnName: 'int_map',
columnData: [
{ k1: 1, k2: 100 },
{ k1: 2 },
{ },
],
rowStart: 0,
rowEnd: 3,
})
},
onComplete: (rows) => {
/* eslint-disable no-sparse-arrays */
expect(toJson(rows)).toEqual([
[{ k1: 1, k2: 100 }],
[{ k1: 2 }],
[{}],
2024-03-14 23:39:03 +00:00
])
},
})
})
2024-01-15 19:08:48 +00:00
})