Promisified parquetReadObjects function

This commit is contained in:
Kenny Daniel 2024-08-20 11:30:39 -07:00
parent a2024a781c
commit df02229407
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
3 changed files with 41 additions and 26 deletions

14
src/hyparquet.d.ts vendored

@ -27,6 +27,20 @@ export type { AsyncBuffer, Compressors, FileMetaData, SchemaTree }
*/
export function parquetRead(options: ParquetReadOptions): Promise<void>
/**
* Read parquet data and return a Promise of object-oriented row data.
*
* @param {object} options read options
* @param {AsyncBuffer} options.file file-like object containing parquet data
* @param {FileMetaData} [options.metadata] parquet file metadata
* @param {string[]} [options.columns] columns to read, all columns if undefined
* @param {number} [options.rowStart] first requested row index (inclusive)
* @param {number} [options.rowEnd] last requested row index (exclusive)
* @param {Compressors} [options.compressor] custom decompressors
* @returns {Promise<void>} resolves when all requested rows and columns are parsed
*/
export function parquetReadObjects(options: ParquetReadOptions): Promise<Array<Record<string, any>>>
/**
* Read parquet metadata from an async buffer.
*

@ -9,3 +9,17 @@ export { snappyUncompress }
import { asyncBufferFromFile, asyncBufferFromUrl, toJson } from './utils.js'
export { asyncBufferFromFile, asyncBufferFromUrl, toJson }
/**
* @param {import('./hyparquet.js').ParquetReadOptions} options
* @returns {Promise<Array<Record<string, any>>>}
*/
export function parquetReadObjects(options) {
return new Promise((onComplete, reject) => {
parquetRead({
rowFormat: 'object',
...options,
onComplete,
}).catch(reject)
})
}

@ -1,5 +1,5 @@
import { describe, expect, it } from 'vitest'
import { parquetRead } from '../src/hyparquet.js'
import { parquetRead, parquetReadObjects } from '../src/hyparquet.js'
import { asyncBufferFromFile, toJson } from '../src/utils.js'
describe('parquetRead', () => {
@ -130,14 +130,6 @@ describe('parquetRead', () => {
file,
columns: ['c'],
rowFormat: 'object',
onChunk: chunk => {
expect(toJson(chunk)).toEqual({
columnName: 'c',
columnData: [2, 3, 4, 5, 2],
rowStart: 0,
rowEnd: 5,
})
},
onComplete: (rows) => {
expect(toJson(rows)).toEqual([
{ c: 2 },
@ -155,23 +147,6 @@ describe('parquetRead', () => {
await parquetRead({
file,
columns: ['c', 'missing', 'b', 'c'],
onChunk: chunk => {
if (chunk.columnName === 'b') {
expect(toJson(chunk)).toEqual({
columnName: 'b',
columnData: [1, 2, 3, 4, 5],
rowStart: 0,
rowEnd: 5,
})
} else {
expect(toJson(chunk)).toEqual({
columnName: 'c',
columnData: [2, 3, 4, 5, 2],
rowStart: 0,
rowEnd: 5,
})
}
},
onComplete: (rows) => {
expect(toJson(rows)).toEqual([
[2, null, 1, 2],
@ -183,4 +158,16 @@ describe('parquetRead', () => {
},
})
})
it('read objects and return a promise', async () => {
const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
const rows = await parquetReadObjects({ file })
expect(toJson(rows)).toEqual([
{ a: 'abc', b: 1, c: 2, d: true, e: [1, 2, 3] },
{ a: 'abc', b: 2, c: 3, d: true },
{ a: 'abc', b: 3, c: 4, d: true },
{ a: null, b: 4, c: 5, d: false, e: [1, 2, 3] },
{ a: 'abc', b: 5, c: 2, d: true, e: [1, 2] },
])
})
})