diff --git a/package.json b/package.json index d3a376a..b0164b0 100644 --- a/package.json +++ b/package.json @@ -25,19 +25,21 @@ }, "dependencies": { "hysnappy": "0.3.1", + "lz4": "0.6.5", "pako": "2.1.0" }, "devDependencies": { "@babel/eslint-parser": "7.24.5", "@rollup/plugin-node-resolve": "15.2.3", "@rollup/plugin-terser": "0.4.4", + "@types/lz4": "0.6.4", "@types/node": "20.12.12", "@types/pako": "2.0.3", "@vitest/coverage-v8": "1.6.0", "eslint": "8.57.0", "eslint-plugin-import": "2.29.1", "eslint-plugin-jsdoc": "48.2.5", - "hyparquet": "0.9.3", + "hyparquet": "0.9.4", "rollup": "4.17.2", "typescript": "5.4.5", "vitest": "1.6.0" diff --git a/src/index.js b/src/index.js index d671fb6..c0611e9 100644 --- a/src/index.js +++ b/src/index.js @@ -1,12 +1,18 @@ import { snappyUncompressor } from 'hysnappy' +import lz4 from 'lz4' import pako from 'pako' /** - * @typedef {import('hyparquet').Compressors} Compressors + * @type {import('hyparquet').Compressors} */ export const compressors = { SNAPPY: snappyUncompressor(), - GZIP: (/** @type {Uint8Array} */ input) => pako.ungzip(input), + GZIP: input => pako.ungzip(input), BROTLI: () => new Uint8Array(), // TODO ZSTD: () => new Uint8Array(), // TODO + LZ4: (input, outputLength) => { + const out = Buffer.alloc(outputLength) + lz4.decodeBlock(Buffer.from(input), out) + return out + }, } diff --git a/test/files/non_hadoop_lz4_compressed.json b/test/files/non_hadoop_lz4_compressed.json new file mode 100644 index 0000000..9956d35 --- /dev/null +++ b/test/files/non_hadoop_lz4_compressed.json @@ -0,0 +1,22 @@ +[ + [ + 1593604800, + [97, 98, 99], + 42 + ], + [ + 1593604800, + [100, 101, 102], + 7.7 + ], + [ + 1593604801, + [97, 98, 99], + 42.125 + ], + [ + 1593604801, + [100, 101, 102], + 7.7 + ] +] diff --git a/test/files/non_hadoop_lz4_compressed.parquet b/test/files/non_hadoop_lz4_compressed.parquet new file mode 100644 index 0000000..cfbdc7e Binary files /dev/null and b/test/files/non_hadoop_lz4_compressed.parquet differ diff --git a/test/gzip.test.js b/test/gzip.test.js index 2547651..941f5bf 100644 --- a/test/gzip.test.js +++ b/test/gzip.test.js @@ -4,7 +4,7 @@ import { describe, expect, it } from 'vitest' import { compressors } from '../src/index.js' describe('gzip compressor', () => { - it('should read gzip compressed file', async () => { + it('should read gzip compressed parquet file', async () => { const buffer = fs.readFileSync('test/files/concatenated_gzip_members.parquet') const file = new Uint8Array(buffer).buffer const expected = fs.readFileSync('test/files/concatenated_gzip_members.json').toString() diff --git a/test/lz4.test.js b/test/lz4.test.js new file mode 100644 index 0000000..d2e9556 --- /dev/null +++ b/test/lz4.test.js @@ -0,0 +1,17 @@ +import fs from 'fs' +import { parquetRead, toJson } from 'hyparquet' +import { describe, expect, it } from 'vitest' +import { compressors } from '../src/index.js' + +describe('lz4 compressor', () => { + it('should read lz4 compressed parquet file', async () => { + const buffer = fs.readFileSync('test/files/non_hadoop_lz4_compressed.parquet') + const file = new Uint8Array(buffer).buffer + const expected = fs.readFileSync('test/files/non_hadoop_lz4_compressed.json').toString() + + await parquetRead({ file, compressors, onComplete: data => { + expect(data.length).toBe(4) + expect(toJson(data)).toEqual(JSON.parse(expected)) + } }) + }) +}) diff --git a/test/package.test.js b/test/package.test.js index 375e3c2..2fda96a 100644 --- a/test/package.test.js +++ b/test/package.test.js @@ -12,8 +12,9 @@ describe('package.json', () => { expect(packageJson.license).toBe('MIT') }) it('should have precise dependency versions', () => { - const { devDependencies } = packageJson - Object.values(devDependencies).forEach(version => { + const { dependencies, devDependencies } = packageJson + const allDependencies = { ...dependencies, ...devDependencies } + Object.values(allDependencies).forEach(version => { expect(version).toMatch(/^\d+\.\d+\.\d+$/) }) })