diff --git a/src/hyparquet.d.ts b/src/hyparquet.d.ts index cc31ebe..60fa46b 100644 --- a/src/hyparquet.d.ts +++ b/src/hyparquet.d.ts @@ -27,6 +27,20 @@ export type { AsyncBuffer, Compressors, FileMetaData, SchemaTree } */ export function parquetRead(options: ParquetReadOptions): Promise +/** + * Read parquet data and return a Promise of object-oriented row data. + * + * @param {object} options read options + * @param {AsyncBuffer} options.file file-like object containing parquet data + * @param {FileMetaData} [options.metadata] parquet file metadata + * @param {string[]} [options.columns] columns to read, all columns if undefined + * @param {number} [options.rowStart] first requested row index (inclusive) + * @param {number} [options.rowEnd] last requested row index (exclusive) + * @param {Compressors} [options.compressor] custom decompressors + * @returns {Promise} resolves when all requested rows and columns are parsed + */ +export function parquetReadObjects(options: ParquetReadOptions): Promise>> + /** * Read parquet metadata from an async buffer. * diff --git a/src/hyparquet.js b/src/hyparquet.js index 407e7f5..da99d59 100644 --- a/src/hyparquet.js +++ b/src/hyparquet.js @@ -9,3 +9,17 @@ export { snappyUncompress } import { asyncBufferFromFile, asyncBufferFromUrl, toJson } from './utils.js' export { asyncBufferFromFile, asyncBufferFromUrl, toJson } + +/** + * @param {import('./hyparquet.js').ParquetReadOptions} options + * @returns {Promise>>} + */ +export function parquetReadObjects(options) { + return new Promise((onComplete, reject) => { + parquetRead({ + rowFormat: 'object', + ...options, + onComplete, + }).catch(reject) + }) +} diff --git a/test/read.test.js b/test/read.test.js index 94c4086..fa091c6 100644 --- a/test/read.test.js +++ b/test/read.test.js @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest' -import { parquetRead } from '../src/hyparquet.js' +import { parquetRead, parquetReadObjects } from '../src/hyparquet.js' import { asyncBufferFromFile, toJson } from '../src/utils.js' describe('parquetRead', () => { @@ -130,14 +130,6 @@ describe('parquetRead', () => { file, columns: ['c'], rowFormat: 'object', - onChunk: chunk => { - expect(toJson(chunk)).toEqual({ - columnName: 'c', - columnData: [2, 3, 4, 5, 2], - rowStart: 0, - rowEnd: 5, - }) - }, onComplete: (rows) => { expect(toJson(rows)).toEqual([ { c: 2 }, @@ -155,23 +147,6 @@ describe('parquetRead', () => { await parquetRead({ file, columns: ['c', 'missing', 'b', 'c'], - onChunk: chunk => { - if (chunk.columnName === 'b') { - expect(toJson(chunk)).toEqual({ - columnName: 'b', - columnData: [1, 2, 3, 4, 5], - rowStart: 0, - rowEnd: 5, - }) - } else { - expect(toJson(chunk)).toEqual({ - columnName: 'c', - columnData: [2, 3, 4, 5, 2], - rowStart: 0, - rowEnd: 5, - }) - } - }, onComplete: (rows) => { expect(toJson(rows)).toEqual([ [2, null, 1, 2], @@ -183,4 +158,16 @@ describe('parquetRead', () => { }, }) }) + + it('read objects and return a promise', async () => { + const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet') + const rows = await parquetReadObjects({ file }) + expect(toJson(rows)).toEqual([ + { a: 'abc', b: 1, c: 2, d: true, e: [1, 2, 3] }, + { a: 'abc', b: 2, c: 3, d: true }, + { a: 'abc', b: 3, c: 4, d: true }, + { a: null, b: 4, c: 5, d: false, e: [1, 2, 3] }, + { a: 'abc', b: 5, c: 2, d: true, e: [1, 2] }, + ]) + }) })