Hadoop lz4 with header

This commit is contained in:
Kenny Daniel 2024-05-19 17:54:01 -07:00
parent 915f0254d3
commit 514ade147a
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
3 changed files with 17 additions and 2 deletions

@ -10,6 +10,10 @@ export function LZ4(input, outputLength) {
let len = 0 // output position
for (let i = 0; i < input.length;) {
const token = input[i++]
if (!token) {
i += 7 // leading length
continue
}
let literals = token >> 4
if (literals) {
@ -24,7 +28,7 @@ export function LZ4(input, outputLength) {
}
const offset = input[i++] | input[i++] << 8
if (!offset || offset > len) throw new Error('lz4 offset out of range')
if (!offset || offset > len) throw new Error(`lz4 offset out of range ${offset}`)
// match length
let matchLength = (token & 0xf) + 4
let byte = matchLength + 240

Binary file not shown.

@ -4,7 +4,18 @@ import { describe, expect, it } from 'vitest'
import { compressors } from '../src/index.js'
describe('lz4 compressor', () => {
it('should read lz4 compressed parquet file', async () => {
it('should read lz4 compressed parquet file hadoop_lz4_compressed', async () => {
const buffer = fs.readFileSync('test/files/hadoop_lz4_compressed.parquet')
const file = new Uint8Array(buffer).buffer
const expected = fs.readFileSync('test/files/non_hadoop_lz4_compressed.json').toString()
await parquetRead({ file, compressors, onComplete: data => {
expect(data.length).toBe(4)
expect(toJson(data)).toEqual(JSON.parse(expected))
} })
})
it('should read lz4 compressed parquet file non_hadoop_lz4_compressed', async () => {
const buffer = fs.readFileSync('test/files/non_hadoop_lz4_compressed.parquet')
const file = new Uint8Array(buffer).buffer
const expected = fs.readFileSync('test/files/non_hadoop_lz4_compressed.json').toString()