From 514ade147a73066841750152602370e7f09168a7 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Sun, 19 May 2024 17:54:01 -0700 Subject: [PATCH] Hadoop lz4 with header --- src/lz4.js | 6 +++++- test/files/hadoop_lz4_compressed.parquet | Bin 0 -> 702 bytes test/lz4.test.js | 13 ++++++++++++- 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 test/files/hadoop_lz4_compressed.parquet diff --git a/src/lz4.js b/src/lz4.js index 7844118..2b4c344 100644 --- a/src/lz4.js +++ b/src/lz4.js @@ -10,6 +10,10 @@ export function LZ4(input, outputLength) { let len = 0 // output position for (let i = 0; i < input.length;) { const token = input[i++] + if (!token) { + i += 7 // leading length + continue + } let literals = token >> 4 if (literals) { @@ -24,7 +28,7 @@ export function LZ4(input, outputLength) { } const offset = input[i++] | input[i++] << 8 - if (!offset || offset > len) throw new Error('lz4 offset out of range') + if (!offset || offset > len) throw new Error(`lz4 offset out of range ${offset}`) // match length let matchLength = (token & 0xf) + 4 let byte = matchLength + 240 diff --git a/test/files/hadoop_lz4_compressed.parquet b/test/files/hadoop_lz4_compressed.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b5fadcd49c332450a97efd144b3cd7bcd0d6f27f GIT binary patch literal 702 zcmb7CJ4?hs5T4D3V~K|^oFxlE(p)$}kF2NBK#EuhB3Fn%K)76TAox5Vh(Ex}-yr@D zEA6$nw6wIcvT!zM5Fa28+05=Y-+VhW$;Qg2MjL%x=#aC)*f_1p7j}F8wq=fYA07c~ z=+2@bqCnxfqKdep;!0@#f*KV7PRkA%uT#ELDKPeA!riQATwe6QUlu`=6YO%L z5QD6eb4V`4q(j9G%XcX#CxAh^nm*h!xSmc8AI4WxQk}wJ*g~yrp^CWT;ojJEYFhY- zmIyfHMy@S-^D&F(fR^g4(O3ZO2Dnx)1gw`E4d^s1m!E14QYcR)f$-2)4Ph(8Lq`gE z!{ieSMDmY8s#)`u&;shjIDp`pd04N6bt!x@K1}u(f0nHuUk57X}_7BZ13cLcR9}c t%l$ZR&u3B5kD_)v52G-Oi^Z@PXG`%?m=#&BXH?<=3?DdS02}n@z5@p&dN}|9 literal 0 HcmV?d00001 diff --git a/test/lz4.test.js b/test/lz4.test.js index d2e9556..5a7eac1 100644 --- a/test/lz4.test.js +++ b/test/lz4.test.js @@ -4,7 +4,18 @@ import { describe, expect, it } from 'vitest' import { compressors } from '../src/index.js' describe('lz4 compressor', () => { - it('should read lz4 compressed parquet file', async () => { + it('should read lz4 compressed parquet file hadoop_lz4_compressed', async () => { + const buffer = fs.readFileSync('test/files/hadoop_lz4_compressed.parquet') + const file = new Uint8Array(buffer).buffer + const expected = fs.readFileSync('test/files/non_hadoop_lz4_compressed.json').toString() + + await parquetRead({ file, compressors, onComplete: data => { + expect(data.length).toBe(4) + expect(toJson(data)).toEqual(JSON.parse(expected)) + } }) + }) + + it('should read lz4 compressed parquet file non_hadoop_lz4_compressed', async () => { const buffer = fs.readFileSync('test/files/non_hadoop_lz4_compressed.parquet') const file = new Uint8Array(buffer).buffer const expected = fs.readFileSync('test/files/non_hadoop_lz4_compressed.json').toString()