diff --git a/src/lz4.js b/src/lz4.js index 7844118..2b4c344 100644 --- a/src/lz4.js +++ b/src/lz4.js @@ -10,6 +10,10 @@ export function LZ4(input, outputLength) { let len = 0 // output position for (let i = 0; i < input.length;) { const token = input[i++] + if (!token) { + i += 7 // leading length + continue + } let literals = token >> 4 if (literals) { @@ -24,7 +28,7 @@ export function LZ4(input, outputLength) { } const offset = input[i++] | input[i++] << 8 - if (!offset || offset > len) throw new Error('lz4 offset out of range') + if (!offset || offset > len) throw new Error(`lz4 offset out of range ${offset}`) // match length let matchLength = (token & 0xf) + 4 let byte = matchLength + 240 diff --git a/test/files/hadoop_lz4_compressed.parquet b/test/files/hadoop_lz4_compressed.parquet new file mode 100644 index 0000000..b5fadcd Binary files /dev/null and b/test/files/hadoop_lz4_compressed.parquet differ diff --git a/test/lz4.test.js b/test/lz4.test.js index d2e9556..5a7eac1 100644 --- a/test/lz4.test.js +++ b/test/lz4.test.js @@ -4,7 +4,18 @@ import { describe, expect, it } from 'vitest' import { compressors } from '../src/index.js' describe('lz4 compressor', () => { - it('should read lz4 compressed parquet file', async () => { + it('should read lz4 compressed parquet file hadoop_lz4_compressed', async () => { + const buffer = fs.readFileSync('test/files/hadoop_lz4_compressed.parquet') + const file = new Uint8Array(buffer).buffer + const expected = fs.readFileSync('test/files/non_hadoop_lz4_compressed.json').toString() + + await parquetRead({ file, compressors, onComplete: data => { + expect(data.length).toBe(4) + expect(toJson(data)).toEqual(JSON.parse(expected)) + } }) + }) + + it('should read lz4 compressed parquet file non_hadoop_lz4_compressed', async () => { const buffer = fs.readFileSync('test/files/non_hadoop_lz4_compressed.parquet') const file = new Uint8Array(buffer).buffer const expected = fs.readFileSync('test/files/non_hadoop_lz4_compressed.json').toString()