From c7fef01effb8b6df1e76222d85f144d7353a9914 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Sun, 19 May 2024 16:21:38 -0700 Subject: [PATCH] lz4 --- package.json | 4 +++- src/index.js | 10 +++++++-- test/files/non_hadoop_lz4_compressed.json | 22 +++++++++++++++++++ test/files/non_hadoop_lz4_compressed.parquet | Bin 0 -> 1228 bytes test/gzip.test.js | 2 +- test/lz4.test.js | 17 ++++++++++++++ test/package.test.js | 5 +++-- 7 files changed, 54 insertions(+), 6 deletions(-) create mode 100644 test/files/non_hadoop_lz4_compressed.json create mode 100644 test/files/non_hadoop_lz4_compressed.parquet create mode 100644 test/lz4.test.js diff --git a/package.json b/package.json index d3a376a..b0164b0 100644 --- a/package.json +++ b/package.json @@ -25,19 +25,21 @@ }, "dependencies": { "hysnappy": "0.3.1", + "lz4": "0.6.5", "pako": "2.1.0" }, "devDependencies": { "@babel/eslint-parser": "7.24.5", "@rollup/plugin-node-resolve": "15.2.3", "@rollup/plugin-terser": "0.4.4", + "@types/lz4": "0.6.4", "@types/node": "20.12.12", "@types/pako": "2.0.3", "@vitest/coverage-v8": "1.6.0", "eslint": "8.57.0", "eslint-plugin-import": "2.29.1", "eslint-plugin-jsdoc": "48.2.5", - "hyparquet": "0.9.3", + "hyparquet": "0.9.4", "rollup": "4.17.2", "typescript": "5.4.5", "vitest": "1.6.0" diff --git a/src/index.js b/src/index.js index d671fb6..c0611e9 100644 --- a/src/index.js +++ b/src/index.js @@ -1,12 +1,18 @@ import { snappyUncompressor } from 'hysnappy' +import lz4 from 'lz4' import pako from 'pako' /** - * @typedef {import('hyparquet').Compressors} Compressors + * @type {import('hyparquet').Compressors} */ export const compressors = { SNAPPY: snappyUncompressor(), - GZIP: (/** @type {Uint8Array} */ input) => pako.ungzip(input), + GZIP: input => pako.ungzip(input), BROTLI: () => new Uint8Array(), // TODO ZSTD: () => new Uint8Array(), // TODO + LZ4: (input, outputLength) => { + const out = Buffer.alloc(outputLength) + lz4.decodeBlock(Buffer.from(input), out) + return out + }, } diff --git a/test/files/non_hadoop_lz4_compressed.json b/test/files/non_hadoop_lz4_compressed.json new file mode 100644 index 0000000..9956d35 --- /dev/null +++ b/test/files/non_hadoop_lz4_compressed.json @@ -0,0 +1,22 @@ +[ + [ + 1593604800, + [97, 98, 99], + 42 + ], + [ + 1593604800, + [100, 101, 102], + 7.7 + ], + [ + 1593604801, + [97, 98, 99], + 42.125 + ], + [ + 1593604801, + [100, 101, 102], + 7.7 + ] +] diff --git a/test/files/non_hadoop_lz4_compressed.parquet b/test/files/non_hadoop_lz4_compressed.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cfbdc7ef2db3aa70119e5941701a0fb1647e2de3 GIT binary patch literal 1228 zcmcIk!EVz)5S_JM%ZhSN|T4;eU z;D8Y4p85ej@da?}q2-7WC&U*Z&K#Jr;}B>#Ak?R5XZOwQ+qbiFI;!iVfh9cKq+rYf zd=u|p{I~;z_oIk_vsfsjMHMERu5D!*+sbMs%uQiQ$J_;8Nr)q%d*jiM< zoV4VDkROEC@T`APb;|4&H6UjkfP;(0>JLqBo&Qpi`rJJ7w?y8GMCsV*v5Zc>xDk*7gGP!Em{1?&|AItbuYJs>j$l#|WO zGPZSe$S4Nf%;PLEducN@eI2~-ri;0eSAE-0&3+d0lkmRIc)K&y(Bm3^QJ!)Y&1qJ_ zHn*aCgNJ=Isl(?qel@^+GVuX(G~ts$T8`^1p5*)f@5`KUx8z$xT6BMk1pvpj^Qzlz z-@cS~&)(P0GcLx8u)+i3x>7qTjMWvTVpU60DkQ4K$(mAEIflO?tvD4{m~ZotNYyr2 zniipYo{+bAq { - it('should read gzip compressed file', async () => { + it('should read gzip compressed parquet file', async () => { const buffer = fs.readFileSync('test/files/concatenated_gzip_members.parquet') const file = new Uint8Array(buffer).buffer const expected = fs.readFileSync('test/files/concatenated_gzip_members.json').toString() diff --git a/test/lz4.test.js b/test/lz4.test.js new file mode 100644 index 0000000..d2e9556 --- /dev/null +++ b/test/lz4.test.js @@ -0,0 +1,17 @@ +import fs from 'fs' +import { parquetRead, toJson } from 'hyparquet' +import { describe, expect, it } from 'vitest' +import { compressors } from '../src/index.js' + +describe('lz4 compressor', () => { + it('should read lz4 compressed parquet file', async () => { + const buffer = fs.readFileSync('test/files/non_hadoop_lz4_compressed.parquet') + const file = new Uint8Array(buffer).buffer + const expected = fs.readFileSync('test/files/non_hadoop_lz4_compressed.json').toString() + + await parquetRead({ file, compressors, onComplete: data => { + expect(data.length).toBe(4) + expect(toJson(data)).toEqual(JSON.parse(expected)) + } }) + }) +}) diff --git a/test/package.test.js b/test/package.test.js index 375e3c2..2fda96a 100644 --- a/test/package.test.js +++ b/test/package.test.js @@ -12,8 +12,9 @@ describe('package.json', () => { expect(packageJson.license).toBe('MIT') }) it('should have precise dependency versions', () => { - const { devDependencies } = packageJson - Object.values(devDependencies).forEach(version => { + const { dependencies, devDependencies } = packageJson + const allDependencies = { ...dependencies, ...devDependencies } + Object.values(allDependencies).forEach(version => { expect(version).toMatch(/^\d+\.\d+\.\d+$/) }) })