diff --git a/README.md b/README.md index d27ceaf..e11a05c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ # hyparquet decompressors -This package exports a `compressors` object intended to be passed into [hyparquet](https://github.com/hyparam/hyparquet) in order to support all possible Apache Parquet files. +This package exports a `compressors` object intended to be passed into [hyparquet](https://github.com/hyparam/hyparquet). + +[Apache Parquet](https://parquet.apache.org) is a popular columnar storage format that is widely used in data engineering, data science, and machine learning applications for efficiently storing and processing large datasets. It supports a number of different compression formats, but most parquet files use snappy compression. + +The hyparquet library by default only supports `uncompressed` and `snappy` compressed files. The `hyparquet-compressors` package extends support for all legal parquet compression formats. ## Usage @@ -8,5 +12,24 @@ This package exports a `compressors` object intended to be passed into [hyparque import { parquetRead } from 'hyparquet' import { compressors } from 'hyparquet-compressors' -parquetRead({ file, compressors }) +await parquetRead({ file, compressors, onComplete: console.log }) ``` + +# Supported compression formats + +Parquet compression types supported with `hyparquet-compressors`: + - [X] Uncompressed + - [X] Snappy + - [x] GZip + - [ ] LZO + - [ ] Brotli + - [X] LZ4 + - [ ] ZSTD + - [X] LZ4_RAW + +# References + + - https://parquet.apache.org/docs/file-format/data-pages/compression/ + - https://en.wikipedia.org/wiki/Gzip + - https://en.wikipedia.org/wiki/LZ4_(compression_algorithm) + - https://en.wikipedia.org/wiki/Snappy_(compression) diff --git a/package.json b/package.json index 3aaa053..6e62e73 100644 --- a/package.json +++ b/package.json @@ -6,6 +6,8 @@ "decompress", "decompression", "decompressor", + "gzip", + "lz4", "hyparquet", "parquet" ], diff --git a/src/index.js b/src/index.js index 3d02e80..8c9095a 100644 --- a/src/index.js +++ b/src/index.js @@ -11,4 +11,5 @@ export const compressors = { BROTLI: () => new Uint8Array(), // TODO ZSTD: () => new Uint8Array(), // TODO LZ4, + LZ4_RAW: LZ4, }