Re-order types.d.ts to put important apis up front

This commit is contained in:
Kenny Daniel 2025-04-10 16:27:25 -07:00
parent 8740f14450
commit 4645e34f97
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
3 changed files with 56 additions and 57 deletions

@ -78,6 +78,10 @@ const data = await parquetReadObjects({
})
```
## Parquet Writing
To create parquet files from javascript, check out the [hyparquet-writer](https://github.com/hyparam/hyparquet-writer) package.
## Advanced Usage
### Reading Metadata
@ -180,10 +184,6 @@ await parquetRead({
The `parquetReadObjects` function defaults to returning an array of objects.
## Parquet Writing
To create parquet files from javascript, check out the [hyparquet-writer](https://github.com/hyparam/hyparquet-writer) package.
## Supported Parquet Files
The parquet format is known to be a sprawling format which includes options for a wide array of compression schemes, encoding types, and data structures.

92
src/types.d.ts vendored

@ -1,4 +1,48 @@
export type Awaitable<T> = T | Promise<T>
/**
* Parquet query options for reading data
*/
export interface ParquetReadOptions {
file: AsyncBuffer // file-like object containing parquet data
metadata?: FileMetaData // parquet metadata, will be parsed if not provided
columns?: string[] // columns to read, all columns if undefined
rowFormat?: string // format of each row passed to the onComplete function
rowStart?: number // first requested row index (inclusive)
rowEnd?: number // last requested row index (exclusive)
onChunk?: (chunk: ColumnData) => void // called when a column chunk is parsed. chunks may contain data outside the requested range.
onComplete?: (rows: any[][]) => void // called when all requested rows and columns are parsed
compressors?: Compressors // custom decompressors
utf8?: boolean // decode byte arrays as utf8 strings (default true)
}
/**
* Parquet query options for filtering data
*/
export interface ParquetQueryFilter {
[key: string]: ParquetQueryValue | ParquetQueryOperator | ParquetQueryFilter[] | undefined
$and?: ParquetQueryFilter[]
$or?: ParquetQueryFilter[]
$not?: ParquetQueryFilter
}
export type ParquetQueryValue = string | number | boolean | object | null | undefined
export type ParquetQueryOperator = {
$gt?: ParquetQueryValue
$gte?: ParquetQueryValue
$lt?: ParquetQueryValue
$lte?: ParquetQueryValue
$ne?: ParquetQueryValue
$in?: ParquetQueryValue[]
$nin?: ParquetQueryValue[]
}
/**
* A run of column data
*/
export interface ColumnData {
columnName: string
columnData: DecodedArray
rowStart: number
rowEnd: number
}
/**
* File-like object that can read slices of a file asynchronously.
@ -7,6 +51,7 @@ export interface AsyncBuffer {
byteLength: number
slice(start: number, end?: number): Awaitable<ArrayBuffer>
}
export type Awaitable<T> = T | Promise<T>
export interface DataReader {
view: DataView
@ -329,50 +374,5 @@ export interface ColumnIndex {
export type BoundaryOrder = 'UNORDERED' | 'ASCENDING' | 'DESCENDING'
/**
* A run of column data
*/
export interface ColumnData {
columnName: string
columnData: ArrayLike<any>
rowStart: number
rowEnd: number
}
/**
* Parquet query options for reading data
*/
export interface ParquetReadOptions {
file: AsyncBuffer // file-like object containing parquet data
metadata?: FileMetaData // parquet metadata, will be parsed if not provided
columns?: string[] // columns to read, all columns if undefined
rowFormat?: string // format of each row passed to the onComplete function
rowStart?: number // first requested row index (inclusive)
rowEnd?: number // last requested row index (exclusive)
onChunk?: (chunk: ColumnData) => void // called when a column chunk is parsed. chunks may be outside the requested range.
onComplete?: (rows: any[][]) => void // called when all requested rows and columns are parsed
compressors?: Compressors // custom decompressors
utf8?: boolean // decode byte arrays as utf8 strings (default true)
}
export type ParquetQueryValue = string | number | boolean | object | null | undefined
export type ParquetQueryOperator = {
$gt?: ParquetQueryValue
$gte?: ParquetQueryValue
$lt?: ParquetQueryValue
$lte?: ParquetQueryValue
$ne?: ParquetQueryValue
$in?: ParquetQueryValue[]
$nin?: ParquetQueryValue[]
}
export interface ParquetQueryFilter {
[key: string]: ParquetQueryValue | ParquetQueryOperator | ParquetQueryFilter[] | undefined
$and?: ParquetQueryFilter[]
$or?: ParquetQueryFilter[]
$not?: ParquetQueryFilter
}
export type ThriftObject = { [ key: `field_${number}` ]: ThriftType }
export type ThriftType = boolean | number | bigint | Uint8Array | ThriftType[] | ThriftObject

@ -1,4 +1,3 @@
import { compressors } from 'hyparquet-compressors'
import { describe, expect, it } from 'vitest'
import { getColumnRange, readColumn } from '../src/column.js'
import { parquetMetadata } from '../src/hyparquet.js'
@ -14,8 +13,8 @@ describe('readColumn', () => {
{ rowGroupEnd: 0, expected: [] },
])('readColumn with rowGroupEnd %p', async ({ rowGroupEnd, expected }) => {
const testFile = 'test/files/float16_nonzeros_and_nans.parquet'
const asyncBuffer = await asyncBufferFromFile(testFile)
const arrayBuffer = await asyncBuffer.slice(0)
const file = await asyncBufferFromFile(testFile)
const arrayBuffer = await file.slice(0)
const metadata = parquetMetadata(arrayBuffer)
const column = metadata.row_groups[0].columns[0]
@ -25,14 +24,14 @@ describe('readColumn', () => {
const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? [])
const reader = { view: new DataView(columnArrayBuffer), offset: 0 }
const result = readColumn(reader, 0, rowGroupEnd, column.meta_data, schemaPath, { file: asyncBuffer, compressors })
const result = readColumn(reader, 0, rowGroupEnd, column.meta_data, schemaPath, { file })
expect(result).toEqual(expected)
})
it('readColumn should return a typed array', async () => {
const testFile = 'test/files/datapage_v2.snappy.parquet'
const asyncBuffer = await asyncBufferFromFile(testFile)
const arrayBuffer = await asyncBuffer.slice(0)
const file = await asyncBufferFromFile(testFile)
const arrayBuffer = await file.slice(0)
const metadata = parquetMetadata(arrayBuffer)
const column = metadata.row_groups[0].columns[1] // second column
@ -42,7 +41,7 @@ describe('readColumn', () => {
const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? [])
const reader = { view: new DataView(columnArrayBuffer), offset: 0 }
const columnData = readColumn(reader, 0, Infinity, column.meta_data, schemaPath, { file: asyncBuffer, compressors })
const columnData = readColumn(reader, 0, Infinity, column.meta_data, schemaPath, { file })
expect(columnData[0]).toBeInstanceOf(Int32Array)
})
})