hyparquet/src/query.js

import { matchFilter } from './filter.js'
import { parquetMetadataAsync, parquetSchema } from './metadata.js'
import { parquetReadColumn, parquetReadObjects } from './read.js'

/**
 * @import {ParquetQueryFilter, BaseParquetReadOptions} from '../src/types.js'
 */
/**
 * Wraps parquetRead with orderBy support.
 * This is a parquet-aware query engine that can read a subset of rows and columns.
 * Accepts optional orderBy column name to sort the results.
 * Note that using orderBy may SIGNIFICANTLY increase the query time.
 *
 * @param {BaseParquetReadOptions & { orderBy?: string }} options
 * @returns {Promise<Record<string, any>[]>} resolves when all requested rows and columns are parsed
 */
export async function parquetQuery(options) {
  if (!options.file || !(options.file.byteLength >= 0)) {
    throw new Error('parquet expected AsyncBuffer')
  }
  options.metadata ??= await parquetMetadataAsync(options.file, options)

  const { metadata, rowStart = 0, columns, orderBy, filter } = options
  if (rowStart < 0) throw new Error('parquet rowStart must be positive')
  const rowEnd = options.rowEnd ?? Number(metadata.num_rows)

  // Collect columns needed for the query
  const filterColumns = columnsNeededForFilter(filter)
  const allColumns = parquetSchema(options.metadata).children.map(c => c.element.name)
  // Check if all filter columns exist
  const missingColumns = filterColumns.filter(column => !allColumns.includes(column))
  if (missingColumns.length) {
    throw new Error(`parquet filter columns not found: ${missingColumns.join(', ')}`)
  }
  if (orderBy && !allColumns.includes(orderBy)) {
    throw new Error(`parquet orderBy column not found: ${orderBy}`)
  }
  const relevantColumns = columns ? allColumns.filter(column =>
    columns.includes(column) || filterColumns.includes(column) || column === orderBy
  ) : undefined
  // Is the output a subset of the relevant columns?
  const requiresProjection = columns && relevantColumns ? columns.length < relevantColumns.length : false

  if (filter && !orderBy && rowEnd < metadata.num_rows) {
    // iterate through row groups and filter until we have enough rows
    /** @type {Record<string, any>[]} */
    const filteredRows = new Array()
    let groupStart = 0
    for (const group of metadata.row_groups) {
      const groupEnd = groupStart + Number(group.num_rows)
      // TODO: if expected > group size, start fetching next groups
      const groupData = await parquetReadObjects({
        ...options, rowStart: groupStart, rowEnd: groupEnd, columns: relevantColumns,
      })
      // filter and project rows
      for (const row of groupData) {
        if (matchFilter(row, filter)) {
          if (requiresProjection && relevantColumns) {
            for (const column of relevantColumns) {
              if (columns && !columns.includes(column)) {
                delete row[column] // remove columns not in the projection
              }
            }
          }
          filteredRows.push(row)
        }
      }
      if (filteredRows.length >= rowEnd) break
      groupStart = groupEnd
    }
    return filteredRows.slice(rowStart, rowEnd)
  } else if (filter) {
    // read all rows, sort, and filter
    const results = await parquetReadObjects({
      ...options, rowStart: undefined, rowEnd: undefined, columns: relevantColumns,
    })

    // sort
    if (orderBy) results.sort((a, b) => compare(a[orderBy], b[orderBy]))

    // filter and project rows
    /** @type {Record<string, any>[]} */
    const filteredRows = new Array()
    for (const row of results) {
      if (matchFilter(row, filter)) {
        if (requiresProjection && relevantColumns) {
          for (const column of relevantColumns) {
            if (columns && !columns.includes(column)) {
              delete row[column] // remove columns not in the projection
            }
          }
        }
        filteredRows.push(row)
      }
    }
    return filteredRows.slice(rowStart, rowEnd)
  } else if (typeof orderBy === 'string') {
    // sorted but unfiltered: fetch orderBy column first
    const orderColumn = await parquetReadColumn({
      ...options, rowStart: undefined, rowEnd: undefined, columns: [orderBy],
    })

    // compute row groups to fetch
    const sortedIndices = Array.from(orderColumn, (_, index) => index)
      .sort((a, b) => compare(orderColumn[a], orderColumn[b]))
      .slice(rowStart, rowEnd)

    const sparseData = await parquetReadRows({ ...options, rows: sortedIndices })
    // warning: the type Record<string, any> & {__index__: number})[] is simplified into Record<string, any>[]
    // when returning. The data contains the __index__ property, but it's not exposed as such.
    const data = sortedIndices.map(index => sparseData[index])
    return data
  } else {
    return await parquetReadObjects(options)
  }
}

/**
 * Reads a list rows from a parquet file, reading only the row groups that contain the rows.
 * Returns a sparse array of rows.
 * @param {BaseParquetReadOptions & { rows: number[] }} options
 * @returns {Promise<(Record<string, any> & {__index__: number})[]>}
 */
async function parquetReadRows(options) {
  const { file, rows } = options
  options.metadata ||= await parquetMetadataAsync(file, options)
  const { row_groups: rowGroups } = options.metadata
  // Compute row groups to fetch
  const groupIncluded = Array(rowGroups.length).fill(false)
  let groupStart = 0
  const groupEnds = rowGroups.map(group => groupStart += Number(group.num_rows))
  for (const index of rows) {
    const groupIndex = groupEnds.findIndex(end => index < end)
    groupIncluded[groupIndex] = true
  }

  // Compute row ranges to fetch
  const rowRanges = []
  let rangeStart
  groupStart = 0
  for (let i = 0; i < groupIncluded.length; i++) {
    const groupEnd = groupStart + Number(rowGroups[i].num_rows)
    if (groupIncluded[i]) {
      if (rangeStart === undefined) {
        rangeStart = groupStart
      }
    } else {
      if (rangeStart !== undefined) {
        rowRanges.push([rangeStart, groupEnd])
        rangeStart = undefined
      }
    }
    groupStart = groupEnd
  }
  if (rangeStart !== undefined) {
    rowRanges.push([rangeStart, groupStart])
  }

  // Fetch by row group and map to rows
  /** @type {(Record<string, any> & {__index__: number})[]} */
  const sparseData = new Array(Number(options.metadata.num_rows))
  for (const [rangeStart, rangeEnd] of rowRanges) {
    // TODO: fetch in parallel
    const groupData = await parquetReadObjects({ ...options, rowStart: rangeStart, rowEnd: rangeEnd })
    for (let i = rangeStart; i < rangeEnd; i++) {
      // warning: if the row contains a column named __index__, it will overwrite the index.
      sparseData[i] = { __index__: i, ...groupData[i - rangeStart] }
    }
  }
  return sparseData
}

/**
 * @param {any} a
 * @param {any} b
 * @returns {number}
 */
function compare(a, b) {
  if (a < b) return -1
  if (a > b) return 1
  return 0 // TODO: null handling
}

/**
 * Returns an array of column names that are needed to evaluate the mongo filter.
 *
 * @param {ParquetQueryFilter} [filter]
 * @returns {string[]}
 */
function columnsNeededForFilter(filter) {
  if (!filter) return []
  /** @type {string[]} */
  const columns = []
  if ('$and' in filter && Array.isArray(filter.$and)) {
    columns.push(...filter.$and.flatMap(columnsNeededForFilter))
  } else if ('$or' in filter && Array.isArray(filter.$or)) {
    columns.push(...filter.$or.flatMap(columnsNeededForFilter))
  } else if ('$nor' in filter && Array.isArray(filter.$nor)) {
    columns.push(...filter.$nor.flatMap(columnsNeededForFilter))
  } else {
    // Column filters
    columns.push(...Object.keys(filter))
  }
  return columns
}
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00			`import { matchFilter } from './filter.js'`
Fix filter on unselected column (#95) 2025-06-30 08:47:05 +00:00			`import { parquetMetadataAsync, parquetSchema } from './metadata.js'`
fix circular import (#111) should have been part of #108. 2025-08-18 16:42:57 +00:00			`import { parquetReadColumn, parquetReadObjects } from './read.js'`
Query api 2024-09-15 04:12:30 +00:00
try to fix the types again (#120) * try to fix the types again * fix test (breaking) * [breaking] only support object format for parquetReadObjects and parquetQuery * remove internal types * remove redundant test * override __index__ with original data if present Also: add comments to explain special cases. * remove the need to slice arrays * loosen the types to avoid code duplication * always write the index, because the results should be consistent * Revert "always write the index, because the results should be consistent" This reverts commit fd4e3060674fa6e81bd32fc894d7c366103e004a. 2025-09-16 22:29:44 +00:00			`/**`
			`* @import {ParquetQueryFilter, BaseParquetReadOptions} from '../src/types.js'`
			`*/`
Query api 2024-09-15 04:12:30 +00:00			`/**`
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00			`* Wraps parquetRead with orderBy support.`
build types before publishing to npm (#46) * build types before publishing to npm * use prepare instead of prepublishOnly + make it clear that we only build types doc for prepare vs prepublishOnly is here: https://docs.npmjs.com/cli/v8/using-npm/scripts * no jsx in this lib * relative imports from the root, so that it works from types/ * remove unused hyparquet.d.ts + report differences to jsdoc in files * try to understand if this is the cause of the failing CI check tsc fails: https://github.com/hyparam/hyparquet/actions/runs/12040954822/job/33571851170?pr=46 * Revert "try to understand if this is the cause of the failing CI check" This reverts commit 5e2fc8ca179064369de71793ab1cda3facefddc7. * not sure what happens, but we just need to ensure the types are created correctly * increment version * Explicitly export types for use in downstream typescript projects * Use new typescript jsdoc imports for smaller package * Combine some files and use @import jsdoc * use the local typescript --------- Co-authored-by: Kenny Daniel <platypii@gmail.com> 2024-12-02 16:47:42 +00:00			`* This is a parquet-aware query engine that can read a subset of rows and columns.`
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00			`* Accepts optional orderBy column name to sort the results.`
build types before publishing to npm (#46) * build types before publishing to npm * use prepare instead of prepublishOnly + make it clear that we only build types doc for prepare vs prepublishOnly is here: https://docs.npmjs.com/cli/v8/using-npm/scripts * no jsx in this lib * relative imports from the root, so that it works from types/ * remove unused hyparquet.d.ts + report differences to jsdoc in files * try to understand if this is the cause of the failing CI check tsc fails: https://github.com/hyparam/hyparquet/actions/runs/12040954822/job/33571851170?pr=46 * Revert "try to understand if this is the cause of the failing CI check" This reverts commit 5e2fc8ca179064369de71793ab1cda3facefddc7. * not sure what happens, but we just need to ensure the types are created correctly * increment version * Explicitly export types for use in downstream typescript projects * Use new typescript jsdoc imports for smaller package * Combine some files and use @import jsdoc * use the local typescript --------- Co-authored-by: Kenny Daniel <platypii@gmail.com> 2024-12-02 16:47:42 +00:00			`* Note that using orderBy may SIGNIFICANTLY increase the query time.`
Query api 2024-09-15 04:12:30 +00:00			`*`
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00			`* @param {BaseParquetReadOptions & { orderBy?: string }} options`
Revert "Fix onComplete return type (#104)" (#117) This reverts commit 49bd895fb51dd13631f7a4f61e46e0baf8f1c0c5. 2025-09-04 05:15:51 +00:00			`* @returns {Promise<Record<string, any>[]>} resolves when all requested rows and columns are parsed`
Query api 2024-09-15 04:12:30 +00:00			`*/`
			`export async function parquetQuery(options) {`
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`if (!options.file \|\| !(options.file.byteLength >= 0)) {`
Simplify error messages 2025-05-26 00:43:26 +00:00			`throw new Error('parquet expected AsyncBuffer')`
Better error messages 2025-03-04 17:38:39 +00:00			`}`
Minimal support for GeoParquet (#133) * Initial support for GeoParquet * pr comments * convert crs * add test file + expected JSON files * add sentence to README * Apply suggestion from @platypii Co-authored-by: Kenny Daniel <platypii@gmail.com> * PR comments * update README * review comment --------- Co-authored-by: Kenny Daniel <platypii@gmail.com> 2025-10-16 08:22:01 +00:00			`options.metadata ??= await parquetMetadataAsync(options.file, options)`
Fix filter on unselected column (#95) 2025-06-30 08:47:05 +00:00
			`const { metadata, rowStart = 0, columns, orderBy, filter } = options`
Simplify error messages 2025-05-26 00:43:26 +00:00			`if (rowStart < 0) throw new Error('parquet rowStart must be positive')`
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`const rowEnd = options.rowEnd ?? Number(metadata.num_rows)`
Query api 2024-09-15 04:12:30 +00:00
Fix filter on unselected column (#95) 2025-06-30 08:47:05 +00:00			`// Collect columns needed for the query`
			`const filterColumns = columnsNeededForFilter(filter)`
			`const allColumns = parquetSchema(options.metadata).children.map(c => c.element.name)`
			`// Check if all filter columns exist`
			`const missingColumns = filterColumns.filter(column => !allColumns.includes(column))`
			`if (missingColumns.length) {`
			throw new Error(`parquet filter columns not found: ${missingColumns.join(', ')}`)
			`}`
			`if (orderBy && !allColumns.includes(orderBy)) {`
			throw new Error(`parquet orderBy column not found: ${orderBy}`)
			`}`
			`const relevantColumns = columns ? allColumns.filter(column =>`
			`columns.includes(column) \|\| filterColumns.includes(column) \|\| column === orderBy`
			`) : undefined`
			`// Is the output a subset of the relevant columns?`
			`const requiresProjection = columns && relevantColumns ? columns.length < relevantColumns.length : false`

Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`if (filter && !orderBy && rowEnd < metadata.num_rows) {`
			`// iterate through row groups and filter until we have enough rows`
try to fix the types again (#120) * try to fix the types again * fix test (breaking) * [breaking] only support object format for parquetReadObjects and parquetQuery * remove internal types * remove redundant test * override __index__ with original data if present Also: add comments to explain special cases. * remove the need to slice arrays * loosen the types to avoid code duplication * always write the index, because the results should be consistent * Revert "always write the index, because the results should be consistent" This reverts commit fd4e3060674fa6e81bd32fc894d7c366103e004a. 2025-09-16 22:29:44 +00:00			`/** @type {Record<string, any>[]} */`
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`const filteredRows = new Array()`
			`let groupStart = 0`
			`for (const group of metadata.row_groups) {`
			`const groupEnd = groupStart + Number(group.num_rows)`
			`// TODO: if expected > group size, start fetching next groups`
Revert "Fix onComplete return type (#104)" (#117) This reverts commit 49bd895fb51dd13631f7a4f61e46e0baf8f1c0c5. 2025-09-04 05:15:51 +00:00			`const groupData = await parquetReadObjects({`
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00			`...options, rowStart: groupStart, rowEnd: groupEnd, columns: relevantColumns,`
Revert "Fix onComplete return type (#104)" (#117) This reverts commit 49bd895fb51dd13631f7a4f61e46e0baf8f1c0c5. 2025-09-04 05:15:51 +00:00			`})`
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00			`// filter and project rows`
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`for (const row of groupData) {`
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00			`if (matchFilter(row, filter)) {`
Fix filter on unselected column (#95) 2025-06-30 08:47:05 +00:00			`if (requiresProjection && relevantColumns) {`
			`for (const column of relevantColumns) {`
			`if (columns && !columns.includes(column)) {`
			`delete row[column] // remove columns not in the projection`
			`}`
			`}`
			`}`
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`filteredRows.push(row)`
			`}`
			`}`
			`if (filteredRows.length >= rowEnd) break`
			`groupStart = groupEnd`
			`}`
			`return filteredRows.slice(rowStart, rowEnd)`
			`} else if (filter) {`
			`// read all rows, sort, and filter`
Revert "Fix onComplete return type (#104)" (#117) This reverts commit 49bd895fb51dd13631f7a4f61e46e0baf8f1c0c5. 2025-09-04 05:15:51 +00:00			`const results = await parquetReadObjects({`
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00			`...options, rowStart: undefined, rowEnd: undefined, columns: relevantColumns,`
Revert "Fix onComplete return type (#104)" (#117) This reverts commit 49bd895fb51dd13631f7a4f61e46e0baf8f1c0c5. 2025-09-04 05:15:51 +00:00			`})`
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00
			`// sort`
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`if (orderBy) results.sort((a, b) => compare(a[orderBy], b[orderBy]))`
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00
			`// filter and project rows`
try to fix the types again (#120) * try to fix the types again * fix test (breaking) * [breaking] only support object format for parquetReadObjects and parquetQuery * remove internal types * remove redundant test * override __index__ with original data if present Also: add comments to explain special cases. * remove the need to slice arrays * loosen the types to avoid code duplication * always write the index, because the results should be consistent * Revert "always write the index, because the results should be consistent" This reverts commit fd4e3060674fa6e81bd32fc894d7c366103e004a. 2025-09-16 22:29:44 +00:00			`/** @type {Record<string, any>[]} */`
Fix filter on unselected column (#95) 2025-06-30 08:47:05 +00:00			`const filteredRows = new Array()`
			`for (const row of results) {`
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00			`if (matchFilter(row, filter)) {`
Fix filter on unselected column (#95) 2025-06-30 08:47:05 +00:00			`if (requiresProjection && relevantColumns) {`
			`for (const column of relevantColumns) {`
			`if (columns && !columns.includes(column)) {`
			`delete row[column] // remove columns not in the projection`
			`}`
			`}`
			`}`
			`filteredRows.push(row)`
			`}`
			`}`
			`return filteredRows.slice(rowStart, rowEnd)`
Query filter (#56) * implement ParquetQueryFilter types * implement parquetQuery filter tests * implement parquetQuery filter * filter before ordering * apply filters before sorting/slicing * format types * add deep equality utility * document and format equals utility * use deep equality checks * update filter tests * support more types for equality * make $not unary * ensure arrays are correctly compared * support both forms of $not * add operator tests * Filter operator tests --------- Co-authored-by: Brian Park <park-brian@users.noreply.github.com> Co-authored-by: Kenny Daniel <platypii@gmail.com> 2024-12-21 23:23:57 +00:00			`} else if (typeof orderBy === 'string') {`
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`// sorted but unfiltered: fetch orderBy column first`
Pushdown filter (#141) 2025-11-21 11:07:56 +00:00			`const orderColumn = await parquetReadColumn({`
			`...options, rowStart: undefined, rowEnd: undefined, columns: [orderBy],`
			`})`
Query api 2024-09-15 04:12:30 +00:00
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`// compute row groups to fetch`
Query api 2024-09-15 04:12:30 +00:00			`const sortedIndices = Array.from(orderColumn, (_, index) => index)`
parquetReadAsync (#83) 2025-05-27 00:27:15 +00:00			`.sort((a, b) => compare(orderColumn[a], orderColumn[b]))`
Query api 2024-09-15 04:12:30 +00:00			`.slice(rowStart, rowEnd)`

			`const sparseData = await parquetReadRows({ ...options, rows: sortedIndices })`
try to fix the types again (#120) * try to fix the types again * fix test (breaking) * [breaking] only support object format for parquetReadObjects and parquetQuery * remove internal types * remove redundant test * override __index__ with original data if present Also: add comments to explain special cases. * remove the need to slice arrays * loosen the types to avoid code duplication * always write the index, because the results should be consistent * Revert "always write the index, because the results should be consistent" This reverts commit fd4e3060674fa6e81bd32fc894d7c366103e004a. 2025-09-16 22:29:44 +00:00			`// warning: the type Record<string, any> & {__index__: number})[] is simplified into Record<string, any>[]`
			`// when returning. The data contains the __index__ property, but it's not exposed as such.`
Query api 2024-09-15 04:12:30 +00:00			`const data = sortedIndices.map(index => sparseData[index])`
			`return data`
			`} else {`
			`return await parquetReadObjects(options)`
			`}`
			`}`

			`/**`
			`* Reads a list rows from a parquet file, reading only the row groups that contain the rows.`
			`* Returns a sparse array of rows.`
try to fix the types again (#120) * try to fix the types again * fix test (breaking) * [breaking] only support object format for parquetReadObjects and parquetQuery * remove internal types * remove redundant test * override __index__ with original data if present Also: add comments to explain special cases. * remove the need to slice arrays * loosen the types to avoid code duplication * always write the index, because the results should be consistent * Revert "always write the index, because the results should be consistent" This reverts commit fd4e3060674fa6e81bd32fc894d7c366103e004a. 2025-09-16 22:29:44 +00:00			`* @param {BaseParquetReadOptions & { rows: number[] }} options`
			`* @returns {Promise<(Record<string, any> & {__index__: number})[]>}`
Query api 2024-09-15 04:12:30 +00:00			`*/`
			`async function parquetReadRows(options) {`
			`const { file, rows } = options`
Minimal support for GeoParquet (#133) * Initial support for GeoParquet * pr comments * convert crs * add test file + expected JSON files * add sentence to README * Apply suggestion from @platypii Co-authored-by: Kenny Daniel <platypii@gmail.com> * PR comments * update README * review comment --------- Co-authored-by: Kenny Daniel <platypii@gmail.com> 2025-10-16 08:22:01 +00:00			`options.metadata \|\|= await parquetMetadataAsync(file, options)`
Query api 2024-09-15 04:12:30 +00:00			`const { row_groups: rowGroups } = options.metadata`
			`// Compute row groups to fetch`
			`const groupIncluded = Array(rowGroups.length).fill(false)`
			`let groupStart = 0`
			`const groupEnds = rowGroups.map(group => groupStart += Number(group.num_rows))`
			`for (const index of rows) {`
			`const groupIndex = groupEnds.findIndex(end => index < end)`
			`groupIncluded[groupIndex] = true`
			`}`

			`// Compute row ranges to fetch`
			`const rowRanges = []`
			`let rangeStart`
			`groupStart = 0`
			`for (let i = 0; i < groupIncluded.length; i++) {`
			`const groupEnd = groupStart + Number(rowGroups[i].num_rows)`
			`if (groupIncluded[i]) {`
			`if (rangeStart === undefined) {`
			`rangeStart = groupStart`
			`}`
			`} else {`
			`if (rangeStart !== undefined) {`
			`rowRanges.push([rangeStart, groupEnd])`
			`rangeStart = undefined`
			`}`
			`}`
			`groupStart = groupEnd`
			`}`
			`if (rangeStart !== undefined) {`
			`rowRanges.push([rangeStart, groupStart])`
			`}`

			`// Fetch by row group and map to rows`
try to fix the types again (#120) * try to fix the types again * fix test (breaking) * [breaking] only support object format for parquetReadObjects and parquetQuery * remove internal types * remove redundant test * override __index__ with original data if present Also: add comments to explain special cases. * remove the need to slice arrays * loosen the types to avoid code duplication * always write the index, because the results should be consistent * Revert "always write the index, because the results should be consistent" This reverts commit fd4e3060674fa6e81bd32fc894d7c366103e004a. 2025-09-16 22:29:44 +00:00			`/** @type {(Record<string, any> & {__index__: number})[]} */`
Query api 2024-09-15 04:12:30 +00:00			`const sparseData = new Array(Number(options.metadata.num_rows))`
			`for (const [rangeStart, rangeEnd] of rowRanges) {`
			`// TODO: fetch in parallel`
			`const groupData = await parquetReadObjects({ ...options, rowStart: rangeStart, rowEnd: rangeEnd })`
			`for (let i = rangeStart; i < rangeEnd; i++) {`
try to fix the types again (#120) * try to fix the types again * fix test (breaking) * [breaking] only support object format for parquetReadObjects and parquetQuery * remove internal types * remove redundant test * override __index__ with original data if present Also: add comments to explain special cases. * remove the need to slice arrays * loosen the types to avoid code duplication * always write the index, because the results should be consistent * Revert "always write the index, because the results should be consistent" This reverts commit fd4e3060674fa6e81bd32fc894d7c366103e004a. 2025-09-16 22:29:44 +00:00			`// warning: if the row contains a column named __index__, it will overwrite the index.`
			`sparseData[i] = { __index__: i, ...groupData[i - rangeStart] }`
Query api 2024-09-15 04:12:30 +00:00			`}`
			`}`
			`return sparseData`
			`}`

			`/**`
			`* @param {any} a`
			`* @param {any} b`
			`* @returns {number}`
			`*/`
			`function compare(a, b) {`
			`if (a < b) return -1`
			`if (a > b) return 1`
Fast filter by loading each row group and filtering until rowEnd (#78) 2025-05-19 09:13:37 +00:00			`return 0 // TODO: null handling`
Query api 2024-09-15 04:12:30 +00:00			`}`
Query filter (#56) * implement ParquetQueryFilter types * implement parquetQuery filter tests * implement parquetQuery filter * filter before ordering * apply filters before sorting/slicing * format types * add deep equality utility * document and format equals utility * use deep equality checks * update filter tests * support more types for equality * make $not unary * ensure arrays are correctly compared * support both forms of $not * add operator tests * Filter operator tests --------- Co-authored-by: Brian Park <park-brian@users.noreply.github.com> Co-authored-by: Kenny Daniel <platypii@gmail.com> 2024-12-21 23:23:57 +00:00
Fix filter on unselected column (#95) 2025-06-30 08:47:05 +00:00			`/**`
			`* Returns an array of column names that are needed to evaluate the mongo filter.`
			`*`
			`* @param {ParquetQueryFilter} [filter]`
			`* @returns {string[]}`
			`*/`
			`function columnsNeededForFilter(filter) {`
			`if (!filter) return []`
			`/** @type {string[]} */`
			`const columns = []`
			`if ('$and' in filter && Array.isArray(filter.$and)) {`
			`columns.push(...filter.$and.flatMap(columnsNeededForFilter))`
			`} else if ('$or' in filter && Array.isArray(filter.$or)) {`
			`columns.push(...filter.$or.flatMap(columnsNeededForFilter))`
			`} else if ('$nor' in filter && Array.isArray(filter.$nor)) {`
			`columns.push(...filter.$nor.flatMap(columnsNeededForFilter))`
			`} else {`
			`// Column filters`
			`columns.push(...Object.keys(filter))`
			`}`
			`return columns`
			`}`