This commit is contained in:
Kenny Daniel 2024-05-19 16:21:38 -07:00
parent d2204e2184
commit c7fef01eff
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
7 changed files with 54 additions and 6 deletions

@ -25,19 +25,21 @@
},
"dependencies": {
"hysnappy": "0.3.1",
"lz4": "0.6.5",
"pako": "2.1.0"
},
"devDependencies": {
"@babel/eslint-parser": "7.24.5",
"@rollup/plugin-node-resolve": "15.2.3",
"@rollup/plugin-terser": "0.4.4",
"@types/lz4": "0.6.4",
"@types/node": "20.12.12",
"@types/pako": "2.0.3",
"@vitest/coverage-v8": "1.6.0",
"eslint": "8.57.0",
"eslint-plugin-import": "2.29.1",
"eslint-plugin-jsdoc": "48.2.5",
"hyparquet": "0.9.3",
"hyparquet": "0.9.4",
"rollup": "4.17.2",
"typescript": "5.4.5",
"vitest": "1.6.0"

@ -1,12 +1,18 @@
import { snappyUncompressor } from 'hysnappy'
import lz4 from 'lz4'
import pako from 'pako'
/**
* @typedef {import('hyparquet').Compressors} Compressors
* @type {import('hyparquet').Compressors}
*/
export const compressors = {
SNAPPY: snappyUncompressor(),
GZIP: (/** @type {Uint8Array} */ input) => pako.ungzip(input),
GZIP: input => pako.ungzip(input),
BROTLI: () => new Uint8Array(), // TODO
ZSTD: () => new Uint8Array(), // TODO
LZ4: (input, outputLength) => {
const out = Buffer.alloc(outputLength)
lz4.decodeBlock(Buffer.from(input), out)
return out
},
}

@ -0,0 +1,22 @@
[
[
1593604800,
[97, 98, 99],
42
],
[
1593604800,
[100, 101, 102],
7.7
],
[
1593604801,
[97, 98, 99],
42.125
],
[
1593604801,
[100, 101, 102],
7.7
]
]

Binary file not shown.

@ -4,7 +4,7 @@ import { describe, expect, it } from 'vitest'
import { compressors } from '../src/index.js'
describe('gzip compressor', () => {
it('should read gzip compressed file', async () => {
it('should read gzip compressed parquet file', async () => {
const buffer = fs.readFileSync('test/files/concatenated_gzip_members.parquet')
const file = new Uint8Array(buffer).buffer
const expected = fs.readFileSync('test/files/concatenated_gzip_members.json').toString()

17
test/lz4.test.js Normal file

@ -0,0 +1,17 @@
import fs from 'fs'
import { parquetRead, toJson } from 'hyparquet'
import { describe, expect, it } from 'vitest'
import { compressors } from '../src/index.js'
describe('lz4 compressor', () => {
it('should read lz4 compressed parquet file', async () => {
const buffer = fs.readFileSync('test/files/non_hadoop_lz4_compressed.parquet')
const file = new Uint8Array(buffer).buffer
const expected = fs.readFileSync('test/files/non_hadoop_lz4_compressed.json').toString()
await parquetRead({ file, compressors, onComplete: data => {
expect(data.length).toBe(4)
expect(toJson(data)).toEqual(JSON.parse(expected))
} })
})
})

@ -12,8 +12,9 @@ describe('package.json', () => {
expect(packageJson.license).toBe('MIT')
})
it('should have precise dependency versions', () => {
const { devDependencies } = packageJson
Object.values(devDependencies).forEach(version => {
const { dependencies, devDependencies } = packageJson
const allDependencies = { ...dependencies, ...devDependencies }
Object.values(allDependencies).forEach(version => {
expect(version).toMatch(/^\d+\.\d+\.\d+$/)
})
})