From 92a417c50669e0b9449f5146784a6273b6c3a0db Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Wed, 3 Sep 2025 22:15:51 -0700 Subject: [PATCH] Revert "Fix onComplete return type (#104)" (#117) This reverts commit 49bd895fb51dd13631f7a4f61e46e0baf8f1c0c5. --- src/query.js | 18 ++++++------------ src/read.js | 3 +-- src/types.d.ts | 15 ++++----------- 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/src/query.js b/src/query.js index b3695c6..816da66 100644 --- a/src/query.js +++ b/src/query.js @@ -9,7 +9,7 @@ import { equals } from './utils.js' * Note that using orderBy may SIGNIFICANTLY increase the query time. * * @param {ParquetReadOptions & { filter?: ParquetQueryFilter, orderBy?: string }} options - * @returns {Promise[] | any[][]>} resolves when all requested rows and columns are parsed + * @returns {Promise[]>} resolves when all requested rows and columns are parsed */ export async function parquetQuery(options) { if (!options.file || !(options.file.byteLength >= 0)) { @@ -45,15 +45,12 @@ export async function parquetQuery(options) { for (const group of metadata.row_groups) { const groupEnd = groupStart + Number(group.num_rows) // TODO: if expected > group size, start fetching next groups - - // eslint-disable-next-line no-extra-parens - const groupData = /** @type {Record[]} */ (await parquetReadObjects({ + const groupData = await parquetReadObjects({ ...options, - rowFormat: 'object', rowStart: groupStart, rowEnd: groupEnd, columns: relevantColumns, - })) + }) for (const row of groupData) { if (matchQuery(row, filter)) { if (requiresProjection && relevantColumns) { @@ -72,15 +69,12 @@ export async function parquetQuery(options) { return filteredRows.slice(rowStart, rowEnd) } else if (filter) { // read all rows, sort, and filter - - // eslint-disable-next-line no-extra-parens - const results = /** @type {Record[]} */ (await parquetReadObjects({ + const results = await parquetReadObjects({ ...options, - rowFormat: 'object', rowStart: undefined, rowEnd: undefined, columns: relevantColumns, - })) + }) if (orderBy) results.sort((a, b) => compare(a[orderBy], b[orderBy])) const filteredRows = new Array() for (const row of results) { @@ -118,7 +112,7 @@ export async function parquetQuery(options) { * Returns a sparse array of rows. * @import {ParquetQueryFilter, ParquetReadOptions} from '../src/types.d.ts' * @param {ParquetReadOptions & { rows: number[] }} options - * @returns {Promise[] | any[][]>} + * @returns {Promise[]>} */ async function parquetReadRows(options) { const { file, rows } = options diff --git a/src/read.js b/src/read.js index f19b9dd..bc3ec77 100644 --- a/src/read.js +++ b/src/read.js @@ -126,8 +126,7 @@ export async function parquetReadColumn(options) { * It is a wrapper around the more configurable parquetRead function. * * @param {Omit} options - * @returns {Promise[] | any[][]>} resolves when all requested rows and columns are parsed. - * Resolves to an array of arrays if 'rowFormat' is 'array', else to an array of objects (default). + * @returns {Promise[]>} resolves when all requested rows and columns are parsed */ export function parquetReadObjects(options) { return new Promise((onComplete, reject) => { diff --git a/src/types.d.ts b/src/types.d.ts index f3ce7b5..ed7f6f8 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -15,30 +15,23 @@ export interface MetadataOptions { parsers?: ParquetParsers // custom parsers to decode advanced types } -interface ArrayRowFormat { - rowFormat?: 'array' // format of each row passed to the onComplete function. Can be omitted, as it's the default. - onComplete?: (rows: any[][]) => void // called when all requested rows and columns are parsed -} -interface ObjectRowFormat { - rowFormat: 'object' // format of each row passed to the onComplete function - onComplete?: (rows: Record[]) => void // called when all requested rows and columns are parsed -} - /** * Parquet query options for reading data */ -export type ParquetReadOptions = { +export interface ParquetReadOptions { file: AsyncBuffer // file-like object containing parquet data metadata?: FileMetaData // parquet metadata, will be parsed if not provided columns?: string[] // columns to read, all columns if undefined + rowFormat?: 'object' | 'array' // format of each row passed to the onComplete function rowStart?: number // first requested row index (inclusive) rowEnd?: number // last requested row index (exclusive) onChunk?: (chunk: ColumnData) => void // called when a column chunk is parsed. chunks may contain data outside the requested range. onPage?: (chunk: ColumnData) => void // called when a data page is parsed. pages may contain data outside the requested range. + onComplete?: (rows: any[][]) => void // called when all requested rows and columns are parsed compressors?: Compressors // custom decompressors utf8?: boolean // decode byte arrays as utf8 strings (default true) parsers?: ParquetParsers // custom parsers to decode advanced types -} & (ArrayRowFormat | ObjectRowFormat) +} /** * Parquet query options for filtering data