fix a bug in parquetQuery, when rowFormat is 'array' (#118)

It silently provided an empty array, instead of throwing an Error, or
providing the data in rowFormat="object".

Here, I (silently) force the rowFormat to "object".
This commit is contained in:
Sylvain Lesage 2025-09-05 03:55:21 -04:00 committed by GitHub
parent fe57d00555
commit 709d6b41fc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 26 additions and 0 deletions

@ -47,6 +47,7 @@ export async function parquetQuery(options) {
// TODO: if expected > group size, start fetching next groups
const groupData = await parquetReadObjects({
...options,
rowFormat: 'object',
rowStart: groupStart,
rowEnd: groupEnd,
columns: relevantColumns,
@ -71,6 +72,7 @@ export async function parquetQuery(options) {
// read all rows, sort, and filter
const results = await parquetReadObjects({
...options,
rowFormat: 'object',
rowStart: undefined,
rowEnd: undefined,
columns: relevantColumns,

@ -22,6 +22,18 @@ describe('parquetQuery', () => {
])
})
it('returns rows in "array" format if asked', async () => {
const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
const rows = await parquetQuery({ file, rowFormat: 'array' })
expect(rows).toEqual([
[ 'abc', 1, 2, true, [1, 2, 3] ],
[ 'abc', 2, 3, true, undefined ],
[ 'abc', 3, 4, true, undefined ],
[ null, 4, 5, false, [1, 2, 3] ],
[ 'abc', 5, 2, true, [1, 2] ],
])
})
it('reads data with orderBy', async () => {
const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
const rows = await parquetQuery({ file, orderBy: 'c' })
@ -63,6 +75,18 @@ describe('parquetQuery', () => {
])
})
it('always returns rows in "object" format if filter is provided', async () => {
const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
const expected = [
{ a: 'abc', b: 1, c: 2, d: true, e: [ 1, 2, 3 ] },
{ a: 'abc', b: 5, c: 2, d: true, e: [ 1, 2 ] },
]
const filter = { c: { $eq: 2 } }
expect(await parquetQuery({ file, filter, rowFormat: 'array' })).toEqual(expected)
expect(await parquetQuery({ file, filter, rowFormat: 'object' })).toEqual(expected)
expect(await parquetQuery({ file, filter })).toEqual(expected)
})
it('reads data with filter and rowStart/rowEnd', async () => {
const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
const rows = await parquetQuery({ file, filter: { c: { $eq: 2 } }, rowStart: 1, rowEnd: 5 })