Update dependencies

This commit is contained in:
Kenny Daniel 2024-09-24 16:47:56 -07:00
parent c77388fb5e
commit 9a2f4fdcba
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
7 changed files with 34 additions and 24 deletions

@ -14,7 +14,7 @@ Dependency free since 2023!
Hyparquet is a lightweight, pure JavaScript library for parsing [Apache Parquet](https://parquet.apache.org) files. Apache Parquet is a popular columnar storage format that is widely used in data engineering, data science, and machine learning applications for efficiently storing and processing large datasets.
Hyparquet allows you to read and extract data from Parquet files directly in JavaScript environments, both in Node.js and in the browser. It is designed to be fast, memory-efficient, and easy to use.
Hyparquet allows you to read and extract data from Parquet files directly in JavaScript environments, both in Node.js and in the browser, without any dependencies. Designed for performance and ease of use, hyparquet is ideal for data engineering, data science, and machine learning applications that require efficient data processing.
## Demo
@ -53,7 +53,7 @@ npm install hyparquet
To read the entire contents of a parquet file in a node.js environment:
```js
```javascript
const { asyncBufferFromFile, parquetRead } = await import('hyparquet')
await parquetRead({
file: await asyncBufferFromFile(filename),
@ -78,7 +78,7 @@ await parquetRead({
You can read just the metadata, including schema and data statistics using the `parquetMetadata` function:
```js
```javascript
const { parquetMetadata } = await import('hyparquet')
const fs = await import('fs')
@ -91,7 +91,7 @@ If you're in a browser environment, you'll probably get parquet file data from e
To load parquet data in the browser from a remote server using `fetch`:
```js
```javascript
import { parquetMetadata } from 'hyparquet'
const res = await fetch(url)
@ -108,7 +108,7 @@ Hyparquet is designed to load only the minimal amount of data needed to fulfill
You can filter rows by number, or columns by name,
and columns will be returned in the same order they were requested:
```js
```javascript
import { parquetRead } from 'hyparquet'
await parquetRead({
@ -125,7 +125,7 @@ await parquetRead({
By default, data returned in the `onComplete` function will be one array of columns per row.
If you would like each row to be an object with each key the name of the column, set the option `rowFormat` to `object`.
```js
```javascript
import { parquetRead } from 'hyparquet'
await parquetRead({
@ -151,7 +151,7 @@ interface AsyncBuffer {
You can read parquet files asynchronously using HTTP Range requests so that only the necessary byte ranges from a `url` will be fetched:
```js
```javascript
import { parquetRead } from 'hyparquet'
const url = 'https://hyperparam-public.s3.amazonaws.com/wiki-en-00000-of-00041.parquet'

@ -8,6 +8,13 @@ interface LayoutProps {
metadata: FileMetaData
}
/**
* Renders the file layout of a parquet file as nested rowgroups and columns.
* @param {Object} props
* @param {number} props.byteLength
* @param {FileMetaData} props.metadata
* @returns {ReactNode}
*/
export default function ParquetLayout({ byteLength, metadata }: LayoutProps) {
const metadataStart = byteLength - metadata.metadata_length - 4
const metadataEnd = byteLength - 4

@ -6,6 +6,12 @@ interface MetadataProps {
metadata: FileMetaData
}
/**
* Renders the metadata of a parquet file as JSON.
* @param {Object} props
* @param {FileMetaData} props.metadata
* @returns {ReactNode}
*/
export default function ParquetMetadata({ metadata }: MetadataProps) {
return <code className='viewer'>
{JSON.stringify(toJson(metadata), null, ' ')}

4
demo/bundle.min.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -27,15 +27,15 @@
"test": "vitest run"
},
"devDependencies": {
"@rollup/plugin-commonjs": "26.0.1",
"@rollup/plugin-node-resolve": "15.2.3",
"@rollup/plugin-replace": "5.0.7",
"@rollup/plugin-commonjs": "28.0.0",
"@rollup/plugin-node-resolve": "15.3.0",
"@rollup/plugin-replace": "6.0.1",
"@rollup/plugin-terser": "0.4.4",
"@rollup/plugin-typescript": "11.1.6",
"@types/node": "22.5.5",
"@types/react": "18.3.8",
"@rollup/plugin-typescript": "12.1.0",
"@types/node": "22.6.1",
"@types/react": "18.3.9",
"@types/react-dom": "18.3.0",
"@typescript-eslint/eslint-plugin": "8.6.0",
"@typescript-eslint/eslint-plugin": "8.7.0",
"@vitest/coverage-v8": "2.1.1",
"eslint": "8.57.0",
"eslint-plugin-import": "2.30.0",
@ -45,7 +45,7 @@
"hyparquet-compressors": "0.1.4",
"react": "18.3.1",
"react-dom": "18.3.1",
"rollup": "4.22.2",
"rollup": "4.22.4",
"typescript": "5.6.2",
"vitest": "2.1.1"
}

@ -1,14 +1,11 @@
import { parquetMetadata, parquetMetadataAsync, parquetSchema } from './metadata.js'
export { parquetMetadata, parquetMetadataAsync, parquetSchema }
export { parquetMetadata, parquetMetadataAsync, parquetSchema } from './metadata.js'
import { parquetRead } from './read.js'
export { parquetRead }
import { snappyUncompress } from './snappy.js'
export { snappyUncompress }
export { snappyUncompress } from './snappy.js'
import { asyncBufferFromFile, asyncBufferFromUrl, toJson } from './utils.js'
export { asyncBufferFromFile, asyncBufferFromUrl, toJson }
export { asyncBufferFromFile, asyncBufferFromUrl, toJson } from './utils.js'
/**
* @param {import('./hyparquet.js').ParquetReadOptions} options