Zstandard

This commit is contained in:
Kenny Daniel 2024-05-20 00:03:23 -07:00
parent 03befc14a5
commit 567d14617e
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
6 changed files with 1049 additions and 3 deletions

@ -26,9 +26,9 @@ Parquet compression types supported with `hyparquet-compressors`:
- [X] Snappy
- [x] GZip
- [ ] LZO
- [ ] Brotli
- [X] Brotli
- [X] LZ4
- [ ] ZSTD
- [X] ZSTD
- [X] LZ4_RAW
# References

@ -27,6 +27,7 @@
},
"dependencies": {
"brotli": "1.3.3",
"fzstd": "0.1.1",
"hysnappy": "0.3.1",
"pako": "2.1.0"
},

@ -1,5 +1,6 @@
// @ts-ignore
import BROTLI from 'brotli/decompress'
import { decompress as ZSTD } from 'fzstd'
import { snappyUncompressor } from 'hysnappy'
import pako from 'pako'
import { LZ4, LZ4_RAW } from './lz4.js'
@ -11,7 +12,7 @@ export const compressors = {
SNAPPY: snappyUncompressor(),
GZIP: input => pako.ungzip(input),
BROTLI,
ZSTD: () => new Uint8Array(), // TODO
ZSTD: input => ZSTD(input),
LZ4,
LZ4_RAW,
}

1027
test/files/wiki_1k.zstd.json Normal file

File diff suppressed because it is too large Load Diff

Binary file not shown.

17
test/zstd.test.js Normal file

@ -0,0 +1,17 @@
import fs from 'fs'
import { parquetRead, toJson } from 'hyparquet'
import { describe, expect, it } from 'vitest'
import { compressors } from '../src/index.js'
describe('zstd compressor', () => {
it('read zstd compressed parquet file wiki_1k', async () => {
const buffer = fs.readFileSync('test/files/wiki_1k.zstd.parquet')
const file = new Uint8Array(buffer).buffer
const expected = fs.readFileSync('test/files/wiki_1k.zstd.json').toString()
await parquetRead({ file, compressors, onComplete: data => {
expect(data.length).toBe(1024)
expect(toJson(data)).toEqual(JSON.parse(expected))
} })
})
})