From 709d6b41fc6ee5b81b602e60277ff4e5699db63e Mon Sep 17 00:00:00 2001 From: Sylvain Lesage Date: Fri, 5 Sep 2025 03:55:21 -0400 Subject: [PATCH] fix a bug in parquetQuery, when rowFormat is 'array' (#118) It silently provided an empty array, instead of throwing an Error, or providing the data in rowFormat="object". Here, I (silently) force the rowFormat to "object". --- src/query.js | 2 ++ test/query.test.js | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/query.js b/src/query.js index 816da66..b82fe7e 100644 --- a/src/query.js +++ b/src/query.js @@ -47,6 +47,7 @@ export async function parquetQuery(options) { // TODO: if expected > group size, start fetching next groups const groupData = await parquetReadObjects({ ...options, + rowFormat: 'object', rowStart: groupStart, rowEnd: groupEnd, columns: relevantColumns, @@ -71,6 +72,7 @@ export async function parquetQuery(options) { // read all rows, sort, and filter const results = await parquetReadObjects({ ...options, + rowFormat: 'object', rowStart: undefined, rowEnd: undefined, columns: relevantColumns, diff --git a/test/query.test.js b/test/query.test.js index 2b3ae02..eeb478d 100644 --- a/test/query.test.js +++ b/test/query.test.js @@ -22,6 +22,18 @@ describe('parquetQuery', () => { ]) }) + it('returns rows in "array" format if asked', async () => { + const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet') + const rows = await parquetQuery({ file, rowFormat: 'array' }) + expect(rows).toEqual([ + [ 'abc', 1, 2, true, [1, 2, 3] ], + [ 'abc', 2, 3, true, undefined ], + [ 'abc', 3, 4, true, undefined ], + [ null, 4, 5, false, [1, 2, 3] ], + [ 'abc', 5, 2, true, [1, 2] ], + ]) + }) + it('reads data with orderBy', async () => { const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet') const rows = await parquetQuery({ file, orderBy: 'c' }) @@ -63,6 +75,18 @@ describe('parquetQuery', () => { ]) }) + it('always returns rows in "object" format if filter is provided', async () => { + const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet') + const expected = [ + { a: 'abc', b: 1, c: 2, d: true, e: [ 1, 2, 3 ] }, + { a: 'abc', b: 5, c: 2, d: true, e: [ 1, 2 ] }, + ] + const filter = { c: { $eq: 2 } } + expect(await parquetQuery({ file, filter, rowFormat: 'array' })).toEqual(expected) + expect(await parquetQuery({ file, filter, rowFormat: 'object' })).toEqual(expected) + expect(await parquetQuery({ file, filter })).toEqual(expected) + }) + it('reads data with filter and rowStart/rowEnd', async () => { const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet') const rows = await parquetQuery({ file, filter: { c: { $eq: 2 } }, rowStart: 1, rowEnd: 5 })