hyparquet/test/encoding.test.js

115 lines
3.6 KiB
JavaScript
Raw Normal View History

2024-01-05 10:48:55 +00:00
import { describe, expect, it } from 'vitest'
2024-05-22 09:34:42 +00:00
import { bitWidth, readRleBitPackedHybrid } from '../src/encoding.js'
2024-01-05 11:06:27 +00:00
2024-01-15 03:40:39 +00:00
describe('readRleBitPackedHybrid', () => {
2024-05-03 00:21:17 +00:00
it('reads RLE values with explicit length', () => {
2024-01-15 03:40:39 +00:00
const buffer = new ArrayBuffer(4)
const view = new DataView(buffer)
2024-05-03 00:21:17 +00:00
// RLE 3x true
view.setUint8(0, 0b00000110)
view.setUint8(1, 1)
// RLE 3x 100
view.setUint8(2, 0b00000110)
view.setUint8(3, 100)
const reader = { view, offset: 0 }
2024-04-30 21:40:18 +00:00
const values = new Array(6)
2024-05-22 00:29:13 +00:00
readRleBitPackedHybrid(reader, 1, 4, values)
expect(reader.offset).toBe(4)
2024-05-03 00:21:17 +00:00
expect(values).toEqual([1, 1, 1, 100, 100, 100])
})
it('reads RLE values with bitwidth=16', () => {
const buffer = new ArrayBuffer(6)
const view = new DataView(buffer)
// RLE 3x 65535
view.setUint8(3, 0b00000110)
view.setUint16(4, 65535, true)
const reader = { view, offset: 0 }
const values = new Array(3)
readRleBitPackedHybrid(reader, 16, 6, values)
expect(reader.offset).toBe(6)
expect(values).toEqual([65535, 65535, 65535])
2024-01-15 03:40:39 +00:00
})
2024-05-03 00:21:17 +00:00
it('reads RLE values with bitwidth=32', () => {
const buffer = new ArrayBuffer(5)
const view = new DataView(buffer)
// RLE 3x 234000
view.setUint8(0, 0b00000110)
view.setUint32(1, 234000, true)
const reader = { view, offset: 0 }
const values = new Array(3)
2024-05-22 00:29:13 +00:00
readRleBitPackedHybrid(reader, 32, 5, values)
2024-05-03 00:21:17 +00:00
expect(reader.offset).toBe(5)
expect(values).toEqual([234000, 234000, 234000])
})
it('throws for invalid bitwidth', () => {
const buffer = new ArrayBuffer(1)
const view = new DataView(buffer)
view.setUint8(0, 0b00000110)
const reader = { view, offset: 0 }
const values = new Array(3)
expect(() => readRleBitPackedHybrid(reader, 24, 3, values))
.toThrow('parquet invalid rle width 3')
})
it('reads bit-packed values with implicit length', () => {
// Bit-packed values: false, false, true
2024-01-15 03:40:39 +00:00
const buffer = new ArrayBuffer(8)
const view = new DataView(buffer)
2024-05-22 00:29:13 +00:00
view.setInt32(0, 2, true) // length 2 little-endian
2024-05-03 00:21:17 +00:00
view.setUint8(4, 0b00000011) // Bit-packed header for 1-8 values
view.setUint8(5, 0b00000100) // Bit-packed values (false, false, true)
const reader = { view, offset: 0 }
2024-05-03 00:21:17 +00:00
const values = new Array(3)
2024-04-30 21:40:18 +00:00
readRleBitPackedHybrid(reader, 1, 0, values)
2024-05-03 00:21:17 +00:00
expect(reader.offset).toBe(6)
expect(values).toEqual([0, 0, 1])
})
it('reads multi-byte bit-packed values', () => {
// Bit-packed 9x true
const buffer = new ArrayBuffer(3)
const view = new DataView(buffer)
view.setUint8(0, 0b00000101) // Bit-packed header for 9-16 values
view.setUint8(1, 0b11111111)
view.setUint8(2, 0b00000001)
const reader = { view, offset: 0 }
const values = new Array(9)
2024-05-22 00:29:13 +00:00
readRleBitPackedHybrid(reader, 1, 3, values)
2024-05-03 00:21:17 +00:00
expect(reader.offset).toBe(3)
expect(values).toEqual([1, 1, 1, 1, 1, 1, 1, 1, 1])
})
it('throws for invalid bit-packed offset', () => {
const buffer = new ArrayBuffer(1)
const view = new DataView(buffer)
view.setUint8(0, 0b00000011) // Bit-packed header for 3 values
const reader = { view, offset: 0 }
const values = new Array(3)
expect(() => readRleBitPackedHybrid(reader, 1, 3, values))
.toThrow('parquet bitpack offset 1 out of range')
})
})
2024-05-22 09:34:42 +00:00
describe('bitWidth', () => {
2024-05-03 00:21:17 +00:00
it('calculates bit widths', () => {
2024-05-22 09:34:42 +00:00
expect(bitWidth(0)).toBe(0)
expect(bitWidth(1)).toBe(1)
expect(bitWidth(7)).toBe(3)
expect(bitWidth(8)).toBe(4)
expect(bitWidth(255)).toBe(8)
expect(bitWidth(256)).toBe(9)
expect(bitWidth(1023)).toBe(10)
expect(bitWidth(1048575)).toBe(20)
2024-01-15 03:40:39 +00:00
})
})