From 49bd895fb51dd13631f7a4f61e46e0baf8f1c0c5 Mon Sep 17 00:00:00 2001 From: Sylvain Lesage Date: Fri, 22 Aug 2025 15:09:28 -0400 Subject: [PATCH] Fix onComplete return type (#104) * attempt to fix #28 * remove breaking changes * loosen the types a bit, but no breaking change * fix format and doc * fix format * fix format * 'remove unused import and add space Co-authored-by: Mario --- src/query.js | 18 ++++++++++++------ src/read.js | 3 ++- src/types.d.ts | 15 +++++++++++---- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/query.js b/src/query.js index 816da66..b3695c6 100644 --- a/src/query.js +++ b/src/query.js @@ -9,7 +9,7 @@ import { equals } from './utils.js' * Note that using orderBy may SIGNIFICANTLY increase the query time. * * @param {ParquetReadOptions & { filter?: ParquetQueryFilter, orderBy?: string }} options - * @returns {Promise[]>} resolves when all requested rows and columns are parsed + * @returns {Promise[] | any[][]>} resolves when all requested rows and columns are parsed */ export async function parquetQuery(options) { if (!options.file || !(options.file.byteLength >= 0)) { @@ -45,12 +45,15 @@ export async function parquetQuery(options) { for (const group of metadata.row_groups) { const groupEnd = groupStart + Number(group.num_rows) // TODO: if expected > group size, start fetching next groups - const groupData = await parquetReadObjects({ + + // eslint-disable-next-line no-extra-parens + const groupData = /** @type {Record[]} */ (await parquetReadObjects({ ...options, + rowFormat: 'object', rowStart: groupStart, rowEnd: groupEnd, columns: relevantColumns, - }) + })) for (const row of groupData) { if (matchQuery(row, filter)) { if (requiresProjection && relevantColumns) { @@ -69,12 +72,15 @@ export async function parquetQuery(options) { return filteredRows.slice(rowStart, rowEnd) } else if (filter) { // read all rows, sort, and filter - const results = await parquetReadObjects({ + + // eslint-disable-next-line no-extra-parens + const results = /** @type {Record[]} */ (await parquetReadObjects({ ...options, + rowFormat: 'object', rowStart: undefined, rowEnd: undefined, columns: relevantColumns, - }) + })) if (orderBy) results.sort((a, b) => compare(a[orderBy], b[orderBy])) const filteredRows = new Array() for (const row of results) { @@ -112,7 +118,7 @@ export async function parquetQuery(options) { * Returns a sparse array of rows. * @import {ParquetQueryFilter, ParquetReadOptions} from '../src/types.d.ts' * @param {ParquetReadOptions & { rows: number[] }} options - * @returns {Promise[]>} + * @returns {Promise[] | any[][]>} */ async function parquetReadRows(options) { const { file, rows } = options diff --git a/src/read.js b/src/read.js index bc3ec77..f19b9dd 100644 --- a/src/read.js +++ b/src/read.js @@ -126,7 +126,8 @@ export async function parquetReadColumn(options) { * It is a wrapper around the more configurable parquetRead function. * * @param {Omit} options - * @returns {Promise[]>} resolves when all requested rows and columns are parsed + * @returns {Promise[] | any[][]>} resolves when all requested rows and columns are parsed. + * Resolves to an array of arrays if 'rowFormat' is 'array', else to an array of objects (default). */ export function parquetReadObjects(options) { return new Promise((onComplete, reject) => { diff --git a/src/types.d.ts b/src/types.d.ts index ed7f6f8..f3ce7b5 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -15,23 +15,30 @@ export interface MetadataOptions { parsers?: ParquetParsers // custom parsers to decode advanced types } +interface ArrayRowFormat { + rowFormat?: 'array' // format of each row passed to the onComplete function. Can be omitted, as it's the default. + onComplete?: (rows: any[][]) => void // called when all requested rows and columns are parsed +} +interface ObjectRowFormat { + rowFormat: 'object' // format of each row passed to the onComplete function + onComplete?: (rows: Record[]) => void // called when all requested rows and columns are parsed +} + /** * Parquet query options for reading data */ -export interface ParquetReadOptions { +export type ParquetReadOptions = { file: AsyncBuffer // file-like object containing parquet data metadata?: FileMetaData // parquet metadata, will be parsed if not provided columns?: string[] // columns to read, all columns if undefined - rowFormat?: 'object' | 'array' // format of each row passed to the onComplete function rowStart?: number // first requested row index (inclusive) rowEnd?: number // last requested row index (exclusive) onChunk?: (chunk: ColumnData) => void // called when a column chunk is parsed. chunks may contain data outside the requested range. onPage?: (chunk: ColumnData) => void // called when a data page is parsed. pages may contain data outside the requested range. - onComplete?: (rows: any[][]) => void // called when all requested rows and columns are parsed compressors?: Compressors // custom decompressors utf8?: boolean // decode byte arrays as utf8 strings (default true) parsers?: ParquetParsers // custom parsers to decode advanced types -} +} & (ArrayRowFormat | ObjectRowFormat) /** * Parquet query options for filtering data