From 514ade147a73066841750152602370e7f09168a7 Mon Sep 17 00:00:00 2001
From: Kenny Daniel <platypii@gmail.com>
Date: Sun, 19 May 2024 17:54:01 -0700
Subject: [PATCH] Hadoop lz4 with header

---
 src/lz4.js                               |   6 +++++-
 test/files/hadoop_lz4_compressed.parquet | Bin 0 -> 702 bytes
 test/lz4.test.js                         |  13 ++++++++++++-
 3 files changed, 17 insertions(+), 2 deletions(-)
 create mode 100644 test/files/hadoop_lz4_compressed.parquet
diff --git a/src/lz4.js b/src/lz4.js
index 7844118..2b4c344 100644
--- a/src/lz4.js
+++ b/src/lz4.js
@@ -10,6 +10,10 @@ export function LZ4(input, outputLength) {
   let len = 0 // output position
   for (let i = 0; i < input.length;) {
     const token = input[i++]
+    if (!token) {
+      i += 7 // leading length
+      continue
+    }
 
     let literals = token >> 4
     if (literals) {
@@ -24,7 +28,7 @@ export function LZ4(input, outputLength) {
     }
 
     const offset = input[i++] | input[i++] << 8
-    if (!offset || offset > len) throw new Error('lz4 offset out of range')
+    if (!offset || offset > len) throw new Error(`lz4 offset out of range ${offset}`)
     // match length
     let matchLength = (token & 0xf) + 4
     let byte = matchLength + 240
diff --git a/test/files/hadoop_lz4_compressed.parquet b/test/files/hadoop_lz4_compressed.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..b5fadcd49c332450a97efd144b3cd7bcd0d6f27f
GIT binary patch
literal 702
zcmb7CJ4?hs5T4D3V~K|^oFxlE(p)$}kF2NBK#EuhB3Fn%K)76TAox5Vh(Ex}-yr@D
zEA6$nw6wIcvT!zM5Fa28+05=Y-+VhW$;Qg2MjL%x=#aC)*f_1p7j}F8wq=fYA07c~
z=+2@bqCnxfqKdep;!0@#f*KV7PRkA%uT#E<t<U>LDKPeA!riQATwe6QUlu`=6YO%L
z5QD6eb4V`4q(j9G%XcX#CxAh^nm*h!xSmc8AI4WxQk}wJ*g~yrp^CWT;ojJEYFhY-
zmIyfHMy@S-^D&F(fR^g4(O3ZO2Dnx)1gw`E4d^s1m!E14QYcR)f$-2)4Ph(8Lq`gE
z!{ieSMDmY8s#)`u&;shjIDp`pd04N6bt!x<uNdLUm!bWVdci$()S|F!sw}lkeq{2M
z1s9ykDx0RU{W+YU+Vawt0-Eaw-NE5WaXhy>@K1}u(f0nHuUk57X}_7BZ13cLcR9}c
t%l$ZR&u3B5kD_)v52G-Oi^Z@PXG`%?m=#&BXH?<=3?DdS02}n@z5@p&dN}|9

literal 0
HcmV?d00001

diff --git a/test/lz4.test.js b/test/lz4.test.js
index d2e9556..5a7eac1 100644
--- a/test/lz4.test.js
+++ b/test/lz4.test.js
@@ -4,7 +4,18 @@ import { describe, expect, it } from 'vitest'
 import { compressors } from '../src/index.js'
 
 describe('lz4 compressor', () => {
-  it('should read lz4 compressed parquet file', async () => {
+  it('should read lz4 compressed parquet file hadoop_lz4_compressed', async () => {
+    const buffer = fs.readFileSync('test/files/hadoop_lz4_compressed.parquet')
+    const file = new Uint8Array(buffer).buffer
+    const expected = fs.readFileSync('test/files/non_hadoop_lz4_compressed.json').toString()
+
+    await parquetRead({ file, compressors, onComplete: data => {
+      expect(data.length).toBe(4)
+      expect(toJson(data)).toEqual(JSON.parse(expected))
+    } })
+  })
+
+  it('should read lz4 compressed parquet file non_hadoop_lz4_compressed', async () => {
     const buffer = fs.readFileSync('test/files/non_hadoop_lz4_compressed.parquet')
     const file = new Uint8Array(buffer).buffer
     const expected = fs.readFileSync('test/files/non_hadoop_lz4_compressed.json').toString()