mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Export asyncBufferFromFile, asyncBufferFromUrl and add to README
This commit is contained in:
parent
a5122e61d6
commit
83e06c3465
35
README.md
35
README.md
@ -52,18 +52,9 @@ npm install hyparquet
|
||||
To read the entire contents of a parquet file in a node.js environment:
|
||||
|
||||
```js
|
||||
const { parquetRead } = await import('hyparquet')
|
||||
const { createReadStream } = await import('fs')
|
||||
const file = { // AsyncBuffer
|
||||
byteLength: stat.size,
|
||||
async slice(start, end) {
|
||||
// read file slice
|
||||
const readStream = createReadStream(filename, { start, end })
|
||||
return await readStreamToArrayBuffer(readStream)
|
||||
}
|
||||
}
|
||||
const { asyncBufferFromFile, parquetRead } = await import('hyparquet')
|
||||
await parquetRead({
|
||||
file,
|
||||
file: await asyncBufferFromFile(filename),
|
||||
onComplete: data => console.log(data)
|
||||
})
|
||||
```
|
||||
@ -71,30 +62,16 @@ await parquetRead({
|
||||
### Browser
|
||||
|
||||
Hyparquet supports asynchronous fetching of parquet files over a network.
|
||||
You can provide an `AsyncBuffer` which is like a js `ArrayBuffer` but the `slice` method returns `Promise<ArrayBuffer>`.
|
||||
|
||||
```js
|
||||
const { parquetRead } = await import('https://cdn.jsdelivr.net/npm/hyparquet/src/hyparquet.min.js')
|
||||
const file = { // AsyncBuffer
|
||||
byteLength,
|
||||
async slice(start, end) {
|
||||
// fetch byte range from url
|
||||
const headers = new Headers()
|
||||
headers.set('Range', `bytes=${start}-${end - 1}`)
|
||||
const res = await fetch(url, { headers })
|
||||
if (!res.ok || !res.body) throw new Error('fetch failed')
|
||||
return res.arrayBuffer()
|
||||
},
|
||||
}
|
||||
const { asyncBufferFromUrl, parquetRead } = await import('https://cdn.jsdelivr.net/npm/hyparquet/src/hyparquet.min.js')
|
||||
const url = 'https://hyperparam-public.s3.amazonaws.com/bunnies.parquet'
|
||||
await parquetRead({
|
||||
file,
|
||||
file: await asyncBufferFromUrl(url),
|
||||
onComplete: data => console.log(data)
|
||||
})
|
||||
```
|
||||
|
||||
In a node.js environment:
|
||||
|
||||
|
||||
## Metadata
|
||||
|
||||
You can read just the metadata, including schema and data statistics using the `parquetMetadata` function:
|
||||
@ -122,7 +99,7 @@ const metadata = parquetMetadata(arrayBuffer)
|
||||
|
||||
To parse parquet files from a user drag-and-drop action, see example in [index.html](index.html).
|
||||
|
||||
## Filtering
|
||||
## Filtering by Row and Column
|
||||
|
||||
To read large parquet files, it is recommended that you filter by row and column.
|
||||
Hyparquet is designed to load only the minimal amount of data needed to fulfill a query.
|
||||
|
||||
16
src/hyparquet.d.ts
vendored
16
src/hyparquet.d.ts
vendored
@ -88,6 +88,22 @@ export function snappyUncompress(input: Uint8Array, output: Uint8Array): boolean
|
||||
*/
|
||||
export function toJson(obj: any): any
|
||||
|
||||
/**
|
||||
* Construct an AsyncBuffer for a URL.
|
||||
*
|
||||
* @param {string} url
|
||||
* @returns {Promise<AsyncBuffer>}
|
||||
*/
|
||||
export function asyncBufferFromUrl(url: string): Promise<AsyncBuffer>
|
||||
|
||||
/**
|
||||
* Construct an AsyncBuffer for a local file using node fs package.
|
||||
*
|
||||
* @param {string} filename
|
||||
* @returns {Promise<AsyncBuffer>}
|
||||
*/
|
||||
export function asyncBufferFromFile(filename: string): Promise<AsyncBuffer>
|
||||
|
||||
/**
|
||||
* Parquet query options for reading data
|
||||
*/
|
||||
|
||||
@ -7,5 +7,5 @@ export { parquetRead }
|
||||
import { snappyUncompress } from './snappy.js'
|
||||
export { snappyUncompress }
|
||||
|
||||
import { toJson } from './utils.js'
|
||||
export { toJson }
|
||||
import { asyncBufferFromFile, asyncBufferFromUrl, toJson } from './utils.js'
|
||||
export { asyncBufferFromFile, asyncBufferFromUrl, toJson }
|
||||
|
||||
Loading…
Reference in New Issue
Block a user