mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-30 00:46:38 +00:00
Re-order types.d.ts to put important apis up front
This commit is contained in:
parent
8740f14450
commit
4645e34f97
@ -78,6 +78,10 @@ const data = await parquetReadObjects({
|
||||
})
|
||||
```
|
||||
|
||||
## Parquet Writing
|
||||
|
||||
To create parquet files from javascript, check out the [hyparquet-writer](https://github.com/hyparam/hyparquet-writer) package.
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Reading Metadata
|
||||
@ -180,10 +184,6 @@ await parquetRead({
|
||||
|
||||
The `parquetReadObjects` function defaults to returning an array of objects.
|
||||
|
||||
## Parquet Writing
|
||||
|
||||
To create parquet files from javascript, check out the [hyparquet-writer](https://github.com/hyparam/hyparquet-writer) package.
|
||||
|
||||
## Supported Parquet Files
|
||||
|
||||
The parquet format is known to be a sprawling format which includes options for a wide array of compression schemes, encoding types, and data structures.
|
||||
|
||||
92
src/types.d.ts
vendored
92
src/types.d.ts
vendored
@ -1,4 +1,48 @@
|
||||
export type Awaitable<T> = T | Promise<T>
|
||||
/**
|
||||
* Parquet query options for reading data
|
||||
*/
|
||||
export interface ParquetReadOptions {
|
||||
file: AsyncBuffer // file-like object containing parquet data
|
||||
metadata?: FileMetaData // parquet metadata, will be parsed if not provided
|
||||
columns?: string[] // columns to read, all columns if undefined
|
||||
rowFormat?: string // format of each row passed to the onComplete function
|
||||
rowStart?: number // first requested row index (inclusive)
|
||||
rowEnd?: number // last requested row index (exclusive)
|
||||
onChunk?: (chunk: ColumnData) => void // called when a column chunk is parsed. chunks may contain data outside the requested range.
|
||||
onComplete?: (rows: any[][]) => void // called when all requested rows and columns are parsed
|
||||
compressors?: Compressors // custom decompressors
|
||||
utf8?: boolean // decode byte arrays as utf8 strings (default true)
|
||||
}
|
||||
|
||||
/**
|
||||
* Parquet query options for filtering data
|
||||
*/
|
||||
export interface ParquetQueryFilter {
|
||||
[key: string]: ParquetQueryValue | ParquetQueryOperator | ParquetQueryFilter[] | undefined
|
||||
$and?: ParquetQueryFilter[]
|
||||
$or?: ParquetQueryFilter[]
|
||||
$not?: ParquetQueryFilter
|
||||
}
|
||||
export type ParquetQueryValue = string | number | boolean | object | null | undefined
|
||||
export type ParquetQueryOperator = {
|
||||
$gt?: ParquetQueryValue
|
||||
$gte?: ParquetQueryValue
|
||||
$lt?: ParquetQueryValue
|
||||
$lte?: ParquetQueryValue
|
||||
$ne?: ParquetQueryValue
|
||||
$in?: ParquetQueryValue[]
|
||||
$nin?: ParquetQueryValue[]
|
||||
}
|
||||
|
||||
/**
|
||||
* A run of column data
|
||||
*/
|
||||
export interface ColumnData {
|
||||
columnName: string
|
||||
columnData: DecodedArray
|
||||
rowStart: number
|
||||
rowEnd: number
|
||||
}
|
||||
|
||||
/**
|
||||
* File-like object that can read slices of a file asynchronously.
|
||||
@ -7,6 +51,7 @@ export interface AsyncBuffer {
|
||||
byteLength: number
|
||||
slice(start: number, end?: number): Awaitable<ArrayBuffer>
|
||||
}
|
||||
export type Awaitable<T> = T | Promise<T>
|
||||
|
||||
export interface DataReader {
|
||||
view: DataView
|
||||
@ -329,50 +374,5 @@ export interface ColumnIndex {
|
||||
|
||||
export type BoundaryOrder = 'UNORDERED' | 'ASCENDING' | 'DESCENDING'
|
||||
|
||||
/**
|
||||
* A run of column data
|
||||
*/
|
||||
export interface ColumnData {
|
||||
columnName: string
|
||||
columnData: ArrayLike<any>
|
||||
rowStart: number
|
||||
rowEnd: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Parquet query options for reading data
|
||||
*/
|
||||
export interface ParquetReadOptions {
|
||||
file: AsyncBuffer // file-like object containing parquet data
|
||||
metadata?: FileMetaData // parquet metadata, will be parsed if not provided
|
||||
columns?: string[] // columns to read, all columns if undefined
|
||||
rowFormat?: string // format of each row passed to the onComplete function
|
||||
rowStart?: number // first requested row index (inclusive)
|
||||
rowEnd?: number // last requested row index (exclusive)
|
||||
onChunk?: (chunk: ColumnData) => void // called when a column chunk is parsed. chunks may be outside the requested range.
|
||||
onComplete?: (rows: any[][]) => void // called when all requested rows and columns are parsed
|
||||
compressors?: Compressors // custom decompressors
|
||||
utf8?: boolean // decode byte arrays as utf8 strings (default true)
|
||||
}
|
||||
|
||||
export type ParquetQueryValue = string | number | boolean | object | null | undefined
|
||||
|
||||
export type ParquetQueryOperator = {
|
||||
$gt?: ParquetQueryValue
|
||||
$gte?: ParquetQueryValue
|
||||
$lt?: ParquetQueryValue
|
||||
$lte?: ParquetQueryValue
|
||||
$ne?: ParquetQueryValue
|
||||
$in?: ParquetQueryValue[]
|
||||
$nin?: ParquetQueryValue[]
|
||||
}
|
||||
|
||||
export interface ParquetQueryFilter {
|
||||
[key: string]: ParquetQueryValue | ParquetQueryOperator | ParquetQueryFilter[] | undefined
|
||||
$and?: ParquetQueryFilter[]
|
||||
$or?: ParquetQueryFilter[]
|
||||
$not?: ParquetQueryFilter
|
||||
}
|
||||
|
||||
export type ThriftObject = { [ key: `field_${number}` ]: ThriftType }
|
||||
export type ThriftType = boolean | number | bigint | Uint8Array | ThriftType[] | ThriftObject
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
import { compressors } from 'hyparquet-compressors'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { getColumnRange, readColumn } from '../src/column.js'
|
||||
import { parquetMetadata } from '../src/hyparquet.js'
|
||||
@ -14,8 +13,8 @@ describe('readColumn', () => {
|
||||
{ rowGroupEnd: 0, expected: [] },
|
||||
])('readColumn with rowGroupEnd %p', async ({ rowGroupEnd, expected }) => {
|
||||
const testFile = 'test/files/float16_nonzeros_and_nans.parquet'
|
||||
const asyncBuffer = await asyncBufferFromFile(testFile)
|
||||
const arrayBuffer = await asyncBuffer.slice(0)
|
||||
const file = await asyncBufferFromFile(testFile)
|
||||
const arrayBuffer = await file.slice(0)
|
||||
const metadata = parquetMetadata(arrayBuffer)
|
||||
|
||||
const column = metadata.row_groups[0].columns[0]
|
||||
@ -25,14 +24,14 @@ describe('readColumn', () => {
|
||||
const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? [])
|
||||
const reader = { view: new DataView(columnArrayBuffer), offset: 0 }
|
||||
|
||||
const result = readColumn(reader, 0, rowGroupEnd, column.meta_data, schemaPath, { file: asyncBuffer, compressors })
|
||||
const result = readColumn(reader, 0, rowGroupEnd, column.meta_data, schemaPath, { file })
|
||||
expect(result).toEqual(expected)
|
||||
})
|
||||
|
||||
it('readColumn should return a typed array', async () => {
|
||||
const testFile = 'test/files/datapage_v2.snappy.parquet'
|
||||
const asyncBuffer = await asyncBufferFromFile(testFile)
|
||||
const arrayBuffer = await asyncBuffer.slice(0)
|
||||
const file = await asyncBufferFromFile(testFile)
|
||||
const arrayBuffer = await file.slice(0)
|
||||
const metadata = parquetMetadata(arrayBuffer)
|
||||
|
||||
const column = metadata.row_groups[0].columns[1] // second column
|
||||
@ -42,7 +41,7 @@ describe('readColumn', () => {
|
||||
const schemaPath = getSchemaPath(metadata.schema, column.meta_data?.path_in_schema ?? [])
|
||||
const reader = { view: new DataView(columnArrayBuffer), offset: 0 }
|
||||
|
||||
const columnData = readColumn(reader, 0, Infinity, column.meta_data, schemaPath, { file: asyncBuffer, compressors })
|
||||
const columnData = readColumn(reader, 0, Infinity, column.meta_data, schemaPath, { file })
|
||||
expect(columnData[0]).toBeInstanceOf(Int32Array)
|
||||
})
|
||||
})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user