hyparquet/test/read.test.js

import { describe, expect, it, vi } from 'vitest'
import { convertWithDictionary } from '../src/convert.js'
import { parquetMetadataAsync, parquetRead, parquetReadObjects } from '../src/index.js'
import { asyncBufferFromFile } from '../src/node.js'
import { countingBuffer } from './helpers.js'

vi.mock('../src/convert.js', { spy: true })

describe('parquetRead', () => {
  it('throws error for undefined file', async () => {
    // @ts-expect-error testing invalid input
    await expect(parquetRead({ file: undefined }))
      .rejects.toThrow('parquet expected AsyncBuffer')
  })

  it('throws error for undefined byteLength', async () => {
    const file = { byteLength: undefined, slice: () => new ArrayBuffer(0) }
    // @ts-expect-error testing invalid input
    await expect(parquetRead({ file }))
      .rejects.toThrow('parquet expected AsyncBuffer')
  })

  it('read row range', async () => {
    const file = await asyncBufferFromFile('test/files/rowgroups.parquet')
    await parquetRead({
      file,
      rowStart: 2,
      rowEnd: 4,
      onComplete(rows) {
        expect(rows).toEqual([[3n], [4n]])
      },
    })
  })

  it('row range overestimate', async () => {
    const file = await asyncBufferFromFile('test/files/rowgroups.parquet')
    await parquetRead({
      file,
      rowEnd: 100,
      onComplete(rows) {
        expect(rows).toEqual([
          [1n], [2n], [3n], [4n], [5n], [6n], [7n], [8n], [9n], [10n], [11n], [12n], [13n], [14n], [15n],
        ])
      },
    })
  })

  it('read a single column as typed array', async () => {
    const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
    await parquetRead({
      file,
      columns: ['b'],
      onChunk(chunk) {
        expect(chunk).toEqual({
          columnName: 'b',
          columnData: new Int32Array([1, 2, 3, 4, 5]),
          rowStart: 0,
          rowEnd: 5,
        })
        expect(chunk.columnData).toBeInstanceOf(Int32Array)
      },
    })
  })

  it('read a list-like column', async () => {
    const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
    await parquetRead({
      file,
      columns: ['e'],
      onChunk(chunk) {
        expect(chunk).toEqual({
          columnName: 'e',
          columnData: [[1, 2, 3], undefined, undefined, [1, 2, 3], [1, 2]],
          rowStart: 0,
          rowEnd: 5,
        })
      },
      onComplete(rows) {
        expect(rows).toEqual([
          [[1, 2, 3]],
          [undefined],
          [undefined],
          [[1, 2, 3]],
          [[1, 2]],
        ])
      },
    })
  })

  it('read a map-like column', async () => {
    const file = await asyncBufferFromFile('test/files/nullable.impala.parquet')
    await parquetRead({
      file,
      columns: ['int_map'],
      onChunk(chunk) {
        expect(chunk).toEqual({
          columnName: 'int_map',
          columnData: [
            { k1: 1, k2: 100 },
            { k1: 2, k2: null },
            { },
            { },
            { },
            undefined,
            { k1: null, k3: null },
          ],
          rowStart: 0,
          rowEnd: 7,
        })
      },
      onComplete(rows) {
        expect(rows).toEqual([
          [{ k1: 1, k2: 100 }],
          [{ k1: 2, k2: null }],
          [{ }],
          [{ }],
          [{ }],
          [undefined],
          [{ k1: null, k3: null }],
        ])
      },
    })
  })

  it('format row as object', async () => {
    const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
    await parquetRead({
      file,
      columns: ['c'],
      rowFormat: 'object',
      onComplete(rows) {
        expect(rows).toEqual([
          { c: 2 },
          { c: 3 },
          { c: 4 },
          { c: 5 },
          { c: 2 },
        ])
      },
    })
  })

  it('read columns out of order', async () => {
    const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
    await parquetRead({
      file,
      columns: ['c', 'missing', 'b', 'c'],
      onComplete(rows) {
        expect(rows).toEqual([
          [2, undefined, 1, 2],
          [3, undefined, 2, 3],
          [4, undefined, 3, 4],
          [5, undefined, 4, 5],
          [2, undefined, 5, 2],
        ])
      },
    })
  })

  it('read objects and return a promise', async () => {
    const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')
    const rows = await parquetReadObjects({ file })
    expect(rows).toEqual([
      { a: 'abc', b: 1, c: 2, d: true, e: [1, 2, 3] },
      { a: 'abc', b: 2, c: 3, d: true },
      { a: 'abc', b: 3, c: 4, d: true },
      { a: null, b: 4, c: 5, d: false, e: [1, 2, 3] },
      { a: 'abc', b: 5, c: 2, d: true, e: [1, 2] },
    ])
  })

  it('skips converting unnecessary pages', async () => {
    const file = await asyncBufferFromFile('test/files/page_indexed.parquet')
    const metadata = await parquetMetadataAsync(file)
    vi.mocked(convertWithDictionary).mockClear()
    const rows = await parquetReadObjects({
      file,
      metadata,
      rowStart: 90,
      rowEnd: 91,
    })
    expect(rows).toEqual([{ row: 90n, quality: 'bad' }])
    expect(convertWithDictionary).toHaveBeenCalledTimes(4)
  })

  it('reads only required row groups on the boundary', async () => {
    const originalFile = await asyncBufferFromFile('test/files/alpha.parquet')
    const metadata = await parquetMetadataAsync(originalFile)
    const file = countingBuffer(originalFile)
    await parquetReadObjects({
      file,
      metadata,
      rowStart: 100,
      rowEnd: 200,
    })
    expect(file.fetches).toBe(1) // 1 rowgroup
    expect(file.bytes).toBe(441) // bytes for 2nd rowgroup
  })

  it('reads individual pages', async () => {
    const file = countingBuffer(await asyncBufferFromFile('test/files/page_indexed.parquet'))
    /** @type {import('../src/types.js').ColumnData[]} */
    const pages = []

    // check onPage callback
    await parquetRead({
      file,
      onPage(page) {
        pages.push(page)
      },
    })

    const expectedPages = [
      {
        columnName: 'row',
        columnData: Array.from({ length: 100 }, (_, i) => BigInt(i)),
        rowStart: 0,
        rowEnd: 100,
      },
      {
        columnName: 'quality',
        columnData: [
          'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'bad', 'bad',
          'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',
          'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good',
          'bad', 'bad', 'good', 'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'bad',
          'bad', 'bad', 'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',
          'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'good', 'bad',
          'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad',
          'bad', 'bad', 'good', 'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'bad',
          'bad', 'bad', 'bad', 'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',
          'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad',
        ],
        rowStart: 0,
        rowEnd: 100,
      },
      {
        columnName: 'row',
        columnData: Array.from({ length: 100 }, (_, i) => BigInt(i + 100)),
        rowStart: 100,
        rowEnd: 200,
      },
      {
        columnName: 'quality',
        columnData: [
          'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good',
          'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',
          'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad',
          'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',
          'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'bad', 'good', 'bad',
          'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',
          'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'bad', 'bad', 'good', 'bad',
          'bad', 'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',
          'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',
          'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',
        ],
        rowStart: 100,
        rowEnd: 200,
      },
    ]

    // expect each page to exist in expected
    for (const expected of expectedPages) {
      const page = pages.find(p => p.columnName === expected.columnName && p.rowStart === expected.rowStart)
      expect(page).toEqual(expected)
    }
    expect(file.fetches).toBe(3) // 1 metadata, 2 rowgroups
    expect(file.bytes).toBe(6421)
  })
})
Test for reading the last row of files 2025-04-07 04:21:24 +00:00			`import { describe, expect, it, vi } from 'vitest'`
			`import { convertWithDictionary } from '../src/convert.js'`
Move hyparquet.js to index.js (#84) 2025-05-30 22:47:02 +00:00			`import { parquetMetadataAsync, parquetRead, parquetReadObjects } from '../src/index.js'`
Node-specific exports for asyncBufferFromFile (#80) * Update README for asyncBufferFromFile * Simplify asyncBufferFromFile 2025-05-30 20:01:20 +00:00			`import { asyncBufferFromFile } from '../src/node.js'`
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`import { countingBuffer } from './helpers.js'`
Custom decompressors 2024-02-23 18:25:06 +00:00
Test for reading the last row of files 2025-04-07 04:21:24 +00:00			`vi.mock('../src/convert.js', { spy: true })`

Fix isRequired and add parquet-testing impala test 2024-02-17 00:07:09 +00:00			`describe('parquetRead', () => {`
Fix rowStart in onChunk callback. Also split out readFiles tests. 2024-04-12 20:09:31 +00:00			`it('throws error for undefined file', async () => {`
Fix typescript definitions 2024-04-26 19:52:42 +00:00			`// @ts-expect-error testing invalid input`
			`await expect(parquetRead({ file: undefined }))`
Simplify error messages 2025-05-26 00:43:26 +00:00			`.rejects.toThrow('parquet expected AsyncBuffer')`
Error handling for undefined parquet file 2024-04-05 18:08:10 +00:00			`})`

Error handling for missing byteLength 2024-04-18 00:45:15 +00:00			`it('throws error for undefined byteLength', async () => {`
			`const file = { byteLength: undefined, slice: () => new ArrayBuffer(0) }`
Fix typescript definitions 2024-04-26 19:52:42 +00:00			`// @ts-expect-error testing invalid input`
Error handling for missing byteLength 2024-04-18 00:45:15 +00:00			`await expect(parquetRead({ file }))`
Simplify error messages 2025-05-26 00:43:26 +00:00			`.rejects.toThrow('parquet expected AsyncBuffer')`
Error handling for missing byteLength 2024-04-18 00:45:15 +00:00			`})`

Fix plan row boundaries 2025-11-21 08:11:07 +00:00			`it('read row range', async () => {`
utils: asyncBufferFromFile 2024-07-26 22:01:01 +00:00			`const file = await asyncBufferFromFile('test/files/rowgroups.parquet')`
Oops fix the other tests 2024-02-27 03:33:38 +00:00			`await parquetRead({`
Fix rowStart in onChunk callback. Also split out readFiles tests. 2024-04-12 20:09:31 +00:00			`file,`
Fix handling of multiple pages 2024-06-08 02:30:30 +00:00			`rowStart: 2,`
			`rowEnd: 4,`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`onComplete(rows) {`
			`expect(rows).toEqual([[3n], [4n]])`
Fix handling of multiple pages 2024-06-08 02:30:30 +00:00			`},`
			`})`
			`})`

Fix plan row boundaries 2025-11-21 08:11:07 +00:00			`it('row range overestimate', async () => {`
utils: asyncBufferFromFile 2024-07-26 22:01:01 +00:00			`const file = await asyncBufferFromFile('test/files/rowgroups.parquet')`
Fix handling of multiple pages 2024-06-08 02:30:30 +00:00			`await parquetRead({`
			`file,`
			`rowEnd: 100,`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`onComplete(rows) {`
			`expect(rows).toEqual([`
			`[1n], [2n], [3n], [4n], [5n], [6n], [7n], [8n], [9n], [10n], [11n], [12n], [13n], [14n], [15n],`
			`])`
Fix rowStart in onChunk callback. Also split out readFiles tests. 2024-04-12 20:09:31 +00:00			`},`
			`})`
			`})`

Return typed arrays in onChunk. Change readColumn to return DecodedArray[]. (#67) Refactored readColumn to avoid `concat` operations. This avoids extra copying and allocation. 2025-03-11 06:33:47 +00:00			`it('read a single column as typed array', async () => {`
utils: asyncBufferFromFile 2024-07-26 22:01:01 +00:00			`const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')`
Fix rowStart in onChunk callback. Also split out readFiles tests. 2024-04-12 20:09:31 +00:00			`await parquetRead({`
			`file,`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`columns: ['b'],`
			`onChunk(chunk) {`
			`expect(chunk).toEqual({`
			`columnName: 'b',`
Return typed arrays in onChunk. Change readColumn to return DecodedArray[]. (#67) Refactored readColumn to avoid `concat` operations. This avoids extra copying and allocation. 2025-03-11 06:33:47 +00:00			`columnData: new Int32Array([1, 2, 3, 4, 5]),`
Oops fix the other tests 2024-02-27 03:33:38 +00:00			`rowStart: 0,`
			`rowEnd: 5,`
			`})`
Return typed arrays in onChunk. Change readColumn to return DecodedArray[]. (#67) Refactored readColumn to avoid `concat` operations. This avoids extra copying and allocation. 2025-03-11 06:33:47 +00:00			`expect(chunk.columnData).toBeInstanceOf(Int32Array)`
Oops fix the other tests 2024-02-27 03:33:38 +00:00			`},`
			`})`
			`})`
List-like column names 2024-03-14 23:39:03 +00:00
Fix rowStart in onChunk callback. Also split out readFiles tests. 2024-04-12 20:09:31 +00:00			`it('read a list-like column', async () => {`
utils: asyncBufferFromFile 2024-07-26 22:01:01 +00:00			`const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')`
List-like column names 2024-03-14 23:39:03 +00:00			`await parquetRead({`
Fix rowStart in onChunk callback. Also split out readFiles tests. 2024-04-12 20:09:31 +00:00			`file,`
List-like column names 2024-03-14 23:39:03 +00:00			`columns: ['e'],`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`onChunk(chunk) {`
			`expect(chunk).toEqual({`
List-like column names 2024-03-14 23:39:03 +00:00			`columnName: 'e',`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`columnData: [[1, 2, 3], undefined, undefined, [1, 2, 3], [1, 2]],`
List-like column names 2024-03-14 23:39:03 +00:00			`rowStart: 0,`
			`rowEnd: 5,`
			`})`
			`},`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`onComplete(rows) {`
			`expect(rows).toEqual([`
Fix struct map parsing 2024-03-12 02:35:57 +00:00			`[[1, 2, 3]],`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`[undefined],`
			`[undefined],`
Fix struct map parsing 2024-03-12 02:35:57 +00:00			`[[1, 2, 3]],`
			`[[1, 2]],`
			`])`
			`},`
			`})`
			`})`

Fix rowStart in onChunk callback. Also split out readFiles tests. 2024-04-12 20:09:31 +00:00			`it('read a map-like column', async () => {`
utils: asyncBufferFromFile 2024-07-26 22:01:01 +00:00			`const file = await asyncBufferFromFile('test/files/nullable.impala.parquet')`
Fix struct map parsing 2024-03-12 02:35:57 +00:00			`await parquetRead({`
Fix rowStart in onChunk callback. Also split out readFiles tests. 2024-04-12 20:09:31 +00:00			`file,`
Fix struct map parsing 2024-03-12 02:35:57 +00:00			`columns: ['int_map'],`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`onChunk(chunk) {`
			`expect(chunk).toEqual({`
Fix struct map parsing 2024-03-12 02:35:57 +00:00			`columnName: 'int_map',`
			`columnData: [`
			`{ k1: 1, k2: 100 },`
Undefined int map 2024-03-19 06:54:58 +00:00			`{ k1: 2, k2: null },`
Fix struct map parsing 2024-03-12 02:35:57 +00:00			`{ },`
Undefined int map 2024-03-19 06:54:58 +00:00			`{ },`
			`{ },`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`undefined,`
Undefined int map 2024-03-19 06:54:58 +00:00			`{ k1: null, k3: null },`
Fix struct map parsing 2024-03-12 02:35:57 +00:00			`],`
			`rowStart: 0,`
Undefined int map 2024-03-19 06:54:58 +00:00			`rowEnd: 7,`
Fix struct map parsing 2024-03-12 02:35:57 +00:00			`})`
			`},`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`onComplete(rows) {`
			`expect(rows).toEqual([`
Fix struct map parsing 2024-03-12 02:35:57 +00:00			`[{ k1: 1, k2: 100 }],`
Undefined int map 2024-03-19 06:54:58 +00:00			`[{ k1: 2, k2: null }],`
			`[{ }],`
			`[{ }],`
			`[{ }],`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`[undefined],`
Undefined int map 2024-03-19 06:54:58 +00:00			`[{ k1: null, k3: null }],`
List-like column names 2024-03-14 23:39:03 +00:00			`])`
			`},`
			`})`
			`})`
Add an option to return each row as an object keyed by column name (#25) * Add an option to return each row as an object keyed by column name * rename option to rowFormat and address feedback 2024-08-13 16:15:59 +00:00
			`it('format row as object', async () => {`
			`const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')`
			`await parquetRead({`
			`file,`
			`columns: ['c'],`
			`rowFormat: 'object',`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`onComplete(rows) {`
			`expect(rows).toEqual([`
Add an option to return each row as an object keyed by column name (#25) * Add an option to return each row as an object keyed by column name * rename option to rowFormat and address feedback 2024-08-13 16:15:59 +00:00			`{ c: 2 },`
			`{ c: 3 },`
			`{ c: 4 },`
			`{ c: 5 },`
			`{ c: 2 },`
			`])`
			`},`
			`})`
			`})`
return column names in the order requested (#27) * return column names in the order requested * retain correct ordering of columns in object rows as well 2024-08-14 07:01:47 +00:00
			`it('read columns out of order', async () => {`
			`const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')`
			`await parquetRead({`
			`file,`
			`columns: ['c', 'missing', 'b', 'c'],`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`onComplete(rows) {`
			`expect(rows).toEqual([`
			`[2, undefined, 1, 2],`
			`[3, undefined, 2, 3],`
			`[4, undefined, 3, 4],`
			`[5, undefined, 4, 5],`
			`[2, undefined, 5, 2],`
return column names in the order requested (#27) * return column names in the order requested * retain correct ordering of columns in object rows as well 2024-08-14 07:01:47 +00:00			`])`
			`},`
			`})`
			`})`
Promisified parquetReadObjects function 2024-08-20 18:30:39 +00:00
			`it('read objects and return a promise', async () => {`
			`const file = await asyncBufferFromFile('test/files/datapage_v2.snappy.parquet')`
			`const rows = await parquetReadObjects({ file })`
Remove unnecessary toJson in tests 2025-03-11 02:32:31 +00:00			`expect(rows).toEqual([`
Promisified parquetReadObjects function 2024-08-20 18:30:39 +00:00			`{ a: 'abc', b: 1, c: 2, d: true, e: [1, 2, 3] },`
			`{ a: 'abc', b: 2, c: 3, d: true },`
			`{ a: 'abc', b: 3, c: 4, d: true },`
			`{ a: null, b: 4, c: 5, d: false, e: [1, 2, 3] },`
			`{ a: 'abc', b: 5, c: 2, d: true, e: [1, 2] },`
			`])`
			`})`
Test for reading the last row of files 2025-04-07 04:21:24 +00:00
			`it('skips converting unnecessary pages', async () => {`
			`const file = await asyncBufferFromFile('test/files/page_indexed.parquet')`
			`const metadata = await parquetMetadataAsync(file)`
			`vi.mocked(convertWithDictionary).mockClear()`
			`const rows = await parquetReadObjects({`
			`file,`
			`metadata,`
			`rowStart: 90,`
			`rowEnd: 91,`
			`})`
Add another column to page_indexed test 2025-04-26 23:59:06 +00:00			`expect(rows).toEqual([{ row: 90n, quality: 'bad' }])`
			`expect(convertWithDictionary).toHaveBeenCalledTimes(4)`
Test for reading the last row of files 2025-04-07 04:21:24 +00:00			`})`
Add onPage callback to parquetRead 2025-04-11 06:29:58 +00:00
Fix plan row boundaries 2025-11-21 08:11:07 +00:00			`it('reads only required row groups on the boundary', async () => {`
			`const originalFile = await asyncBufferFromFile('test/files/alpha.parquet')`
			`const metadata = await parquetMetadataAsync(originalFile)`
			`const file = countingBuffer(originalFile)`
			`await parquetReadObjects({`
			`file,`
			`metadata,`
			`rowStart: 100,`
			`rowEnd: 200,`
			`})`
			`expect(file.fetches).toBe(1) // 1 rowgroup`
			`expect(file.bytes).toBe(441) // bytes for 2nd rowgroup`
			`})`

Add onPage callback to parquetRead 2025-04-11 06:29:58 +00:00			`it('reads individual pages', async () => {`
Add another column to page_indexed test 2025-04-26 23:59:06 +00:00			`const file = countingBuffer(await asyncBufferFromFile('test/files/page_indexed.parquet'))`
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`/** @type {import('../src/types.js').ColumnData[]} */`
Add onPage callback to parquetRead 2025-04-11 06:29:58 +00:00			`const pages = []`

Parquet Query Planner: plan byte ranges, pre-fetch in parallel (#75) * Parquet Query Planner: plan byte ranges, pre-fetch in parallel. - parquetPlan() that returns lists of byte ranges to fetch. - prefetchAsyncBuffer() pre-fetches all byte ranges in parallel. throws exception if non-pre-fetched slice is requested later. 2025-04-30 07:49:40 +00:00			`// check onPage callback`
Add onPage callback to parquetRead 2025-04-11 06:29:58 +00:00			`await parquetRead({`
			`file,`
			`onPage(page) {`
			`pages.push(page)`
			`},`
			`})`

Parquet Query Planner: plan byte ranges, pre-fetch in parallel (#75) * Parquet Query Planner: plan byte ranges, pre-fetch in parallel. - parquetPlan() that returns lists of byte ranges to fetch. - prefetchAsyncBuffer() pre-fetches all byte ranges in parallel. throws exception if non-pre-fetched slice is requested later. 2025-04-30 07:49:40 +00:00			`const expectedPages = [`
Add onPage callback to parquetRead 2025-04-11 06:29:58 +00:00			`{`
Add another column to page_indexed test 2025-04-26 23:59:06 +00:00			`columnName: 'row',`
			`columnData: Array.from({ length: 100 }, (_, i) => BigInt(i)),`
			`rowStart: 0,`
			`rowEnd: 100,`
			`},`
			`{`
			`columnName: 'quality',`
Add onPage callback to parquetRead 2025-04-11 06:29:58 +00:00			`columnData: [`
			`'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'bad', 'bad',`
			`'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',`
			`'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good',`
			`'bad', 'bad', 'good', 'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'bad',`
			`'bad', 'bad', 'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',`
			`'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'good', 'bad',`
			`'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad',`
			`'bad', 'bad', 'good', 'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'bad',`
			`'bad', 'bad', 'bad', 'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',`
			`'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad',`
			`],`
			`rowStart: 0,`
			`rowEnd: 100,`
			`},`
			`{`
Add another column to page_indexed test 2025-04-26 23:59:06 +00:00			`columnName: 'row',`
			`columnData: Array.from({ length: 100 }, (_, i) => BigInt(i + 100)),`
			`rowStart: 100,`
			`rowEnd: 200,`
			`},`
			`{`
			`columnName: 'quality',`
Add onPage callback to parquetRead 2025-04-11 06:29:58 +00:00			`columnData: [`
			`'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good',`
			`'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',`
			`'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad',`
			`'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',`
			`'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'bad', 'good', 'bad',`
			`'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',`
			`'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'bad', 'bad', 'good', 'bad',`
			`'bad', 'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',`
			`'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',`
			`'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',`
			`],`
			`rowStart: 100,`
			`rowEnd: 200,`
			`},`
Parquet Query Planner: plan byte ranges, pre-fetch in parallel (#75) * Parquet Query Planner: plan byte ranges, pre-fetch in parallel. - parquetPlan() that returns lists of byte ranges to fetch. - prefetchAsyncBuffer() pre-fetches all byte ranges in parallel. throws exception if non-pre-fetched slice is requested later. 2025-04-30 07:49:40 +00:00			`]`

			`// expect each page to exist in expected`
			`for (const expected of expectedPages) {`
			`const page = pages.find(p => p.columnName === expected.columnName && p.rowStart === expected.rowStart)`
			`expect(page).toEqual(expected)`
			`}`
Add another column to page_indexed test 2025-04-26 23:59:06 +00:00			`expect(file.fetches).toBe(3) // 1 metadata, 2 rowgroups`
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`expect(file.bytes).toBe(6421)`
Add onPage callback to parquetRead 2025-04-11 06:29:58 +00:00			`})`
Async parquetRead with options 2024-01-15 19:08:48 +00:00			`})`