mirror of
https://github.com/asadbek064/hyparquet.git
synced 2026-01-07 03:26:38 +00:00
Promisified parquetReadObjects function
This commit is contained in:
parent
a2024a781c
commit
df02229407
14
src/hyparquet.d.ts
vendored
14
src/hyparquet.d.ts
vendored
@ -27,6 +27,20 @@ export type { AsyncBuffer, Compressors, FileMetaData, SchemaTree }
|
||||
*/
|
||||
export function parquetRead(options: ParquetReadOptions): Promise<void>
|
||||
|
||||
/**
|
||||
* Read parquet data and return a Promise of object-oriented row data.
|
||||
*
|
||||
* @param {object} options read options
|
||||
* @param {AsyncBuffer} options.file file-like object containing parquet data
|
||||
* @param {FileMetaData} [options.metadata] parquet file metadata
|
||||
* @param {string[]} [options.columns] columns to read, all columns if undefined
|
||||
* @param {number} [options.rowStart] first requested row index (inclusive)
|
||||
* @param {number} [options.rowEnd] last requested row index (exclusive)
|
||||
* @param {Compressors} [options.compressor] custom decompressors
|
||||
* @returns {Promise<void>} resolves when all requested rows and columns are parsed
|
||||
*/
|
||||
export function parquetReadObjects(options: ParquetReadOptions): Promise<Array<Record<string, any>>>
|
||||
|
||||
/**
|
||||
* Read parquet metadata from an async buffer.
|
||||
*
|
||||
|
||||
@ -9,3 +9,17 @@ export { snappyUncompress }
|
||||
|
||||
import { asyncBufferFromFile, asyncBufferFromUrl, toJson } from './utils.js'
|
||||
export { asyncBufferFromFile, asyncBufferFromUrl, toJson }
|
||||
|
||||
/**
|
||||
* @param {import('./hyparquet.js').ParquetReadOptions} options
|
||||
* @returns {Promise<Array<Record<string, any>>>}
|
||||
*/
|
||||
export function parquetReadObjects(options) {
|
||||
return new Promise((onComplete, reject) => {
|
||||
parquetRead({
|
||||
rowFormat: 'object',
|
||||
...options,
|
||||
onComplete,
|
||||
}).catch(reject)
|
||||
})
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { parquetRead } from '../src/hyparquet.js'
|
||||
import { parquetRead, parquetReadObjects } from '../src/hyparquet.js'
|
||||
import { asyncBufferFromFile, toJson } from '../src/utils.js'
|
||||
|
||||
describe('parquetRead', () => {
|
||||
@ -130,14 +130,6 @@ describe('parquetRead', () => {
|
||||
file,
|
||||
columns: ['c'],
|
||||
rowFormat: 'object',
|
||||
onChunk: chunk => {
|
||||
expect(toJson(chunk)).toEqual({
|
||||
columnName: 'c',
|
||||
columnData: [2, 3, 4, 5, 2],
|
||||
rowStart: 0,
|
||||
rowEnd: 5,
|
||||
})
|
||||
},
|
||||
onComplete: (rows) => {
|
||||
expect(toJson(rows)).toEqual([
|
||||
{ c: 2 },
|
||||
@ -155,23 +147,6 @@ describe('parquetRead', () => {
|
||||
await parquetRead({
|
||||
file,
|
||||
columns: ['c', 'missing', 'b', 'c'],
|
||||
onChunk: chunk => {
|
||||
if (chunk.columnName === 'b') {
|
||||
expect(toJson(chunk)).toEqual({
|
||||
columnName: 'b',
|
||||
columnData: [1, 2, 3, 4, 5],
|
||||
rowStart: 0,
|
||||
rowEnd: 5,
|
||||
})
|
||||
} else {
|
||||
expect(toJson(chunk)).toEqual({
|
||||
columnName: 'c',
|
||||
columnData: [2, 3, 4, 5, 2],
|
||||
rowStart: 0,
|
||||
rowEnd: 5,
|
||||
})
|
||||
}
|
||||
},
|
||||
onComplete: (rows) => {
|
||||
expect(toJson(rows)).toEqual([
|
||||
[2, null, 1, 2],
|
||||
@ -183,4 +158,16 @@ describe('parquetRead', () => {
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
it('read objects and return a promise', async () => {
|
||||
const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
|
||||
const rows = await parquetReadObjects({ file })
|
||||
expect(toJson(rows)).toEqual([
|
||||
{ a: 'abc', b: 1, c: 2, d: true, e: [1, 2, 3] },
|
||||
{ a: 'abc', b: 2, c: 3, d: true },
|
||||
{ a: 'abc', b: 3, c: 4, d: true },
|
||||
{ a: null, b: 4, c: 5, d: false, e: [1, 2, 3] },
|
||||
{ a: 'abc', b: 5, c: 2, d: true, e: [1, 2] },
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user