Validate url for asyncBufferFromUrl

This commit is contained in:
Kenny Daniel 2024-12-17 09:25:54 -08:00
parent 0dca631aa9
commit 7ce11ad844
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
4 changed files with 17 additions and 11 deletions

@ -3,10 +3,11 @@
![hyparquet parakeet](hyparquet.jpg)
[![npm](https://img.shields.io/npm/v/hyparquet)](https://www.npmjs.com/package/hyparquet)
[![minzipped](https://img.shields.io/bundlephobia/minzip/hyparquet)](https://www.npmjs.com/package/hyparquet)
[![workflow status](https://github.com/hyparam/hyparquet/actions/workflows/ci.yml/badge.svg)](https://github.com/hyparam/hyparquet/actions)
[![mit license](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![dependencies](https://img.shields.io/badge/Dependencies-0-blueviolet)](https://www.npmjs.com/package/hyparquet?activeTab=dependencies)
[![mit license](https://img.shields.io/badge/License-MIT-orange.svg)](https://opensource.org/licenses/MIT)
![coverage](https://img.shields.io/badge/Coverage-96-darkred)
[![dependencies](https://img.shields.io/badge/Dependencies-0-blueviolet)](https://www.npmjs.com/package/hyparquet?activeTab=dependencies)
Dependency free since 2023!
@ -64,7 +65,7 @@ Note: Hyparquet is published as an ES module, so dynamic `import()` may be requi
In the browser use `asyncBufferFromUrl` to wrap a url for reading asyncronously over the network.
It is recommended that you filter by row and column to limit fetch size:
```js
```javascript
const { asyncBufferFromUrl, parquetRead } = await import('https://cdn.jsdelivr.net/npm/hyparquet/src/hyparquet.min.js')
const url = 'https://hyperparam-public.s3.amazonaws.com/bunnies.parquet'
@ -109,7 +110,7 @@ You can define your own `AsyncBuffer` to create a virtual file that can be read
Pass the `requestInit` option to `asyncBufferFromUrl` to provide authentication information to a remote web server. For example:
```js
```javascript
await parquetRead({
file: await asyncBufferFromUrl({url, requestInit: {headers: {Authorization: 'Bearer my_token'}}}),
onComplete: data => console.log(data)
@ -163,7 +164,7 @@ For faster snappy decompression, try [hysnappy](https://github.com/hyparam/hysna
You can include support for ALL parquet `compressors` plus hysnappy using the [hyparquet-compressors](https://github.com/hyparam/hyparquet-compressors) package.
```js
```javascript
import { parquetRead } from 'hyparquet'
import { compressors } from 'hyparquet-compressors'

@ -2,8 +2,12 @@
"name": "hyparquet",
"version": "1.6.4",
"description": "parquet file parser for javascript",
"author": "Hyperparam",
"homepage": "https://hyperparam.app",
"keywords": [
"hyparquet",
"parquet",
"parquetjs",
"parser",
"snappy",
"thrift"
@ -23,18 +27,18 @@
"scripts": {
"build:types": "tsc -p ./tsconfig.build.json",
"coverage": "vitest run --coverage --coverage.include=src",
"lint": "eslint .",
"lint": "eslint",
"prepare": "npm run build:types",
"test": "vitest run"
},
"devDependencies": {
"@types/node": "22.10.1",
"@types/node": "22.10.2",
"@vitest/coverage-v8": "2.1.8",
"eslint": "9.16.0",
"eslint-plugin-jsdoc": "50.6.0",
"eslint": "9.17.0",
"eslint-plugin-jsdoc": "50.6.1",
"hyparquet-compressors": "1.0.0",
"typescript": "5.7.2",
"typescript-eslint": "8.18.0",
"typescript-eslint": "8.18.1",
"vitest": "2.1.8"
}
}

@ -67,6 +67,7 @@ export async function byteLengthFromUrl(url, requestInit) {
* @returns {Promise<AsyncBuffer>}
*/
export async function asyncBufferFromUrl({ url, byteLength, requestInit }) {
if (!url) throw new Error('missing url')
// byte length from HEAD request
byteLength ||= await byteLengthFromUrl(url, requestInit)
const init = requestInit || {}

@ -1,7 +1,7 @@
import fs from 'fs'
/**
* Read .parquet file into JSON
* Read file and parse as JSON
*
* @param {string} filePath
* @returns {any}