mirror of
https://github.com/asadbek064/hyparquet.git
synced 2026-02-22 04:11:32 +00:00
Encoding tests
This commit is contained in:
parent
938076b3bc
commit
bded3fb331
@ -93,12 +93,11 @@ function readBitPacked(reader, header, bitWidth, values, seen) {
|
||||
// mask for bitWidth number of bits
|
||||
const mask = (1 << bitWidth) - 1
|
||||
|
||||
// Sometimes it tries to read outside of available memory, but it will be masked out anyway
|
||||
let data = 0
|
||||
if (reader.offset < reader.view.byteLength) {
|
||||
data = reader.view.getUint8(reader.offset)
|
||||
reader.offset++
|
||||
data = reader.view.getUint8(reader.offset++)
|
||||
} else if (mask) {
|
||||
// sometimes out-of-bounds reads are masked out
|
||||
throw new Error(`parquet bitpack offset ${reader.offset} out of range`)
|
||||
}
|
||||
let left = 8
|
||||
|
||||
@ -1,39 +1,124 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { readRleBitPackedHybrid } from '../src/encoding.js'
|
||||
import { readRleBitPackedHybrid, widthFromMaxInt } from '../src/encoding.js'
|
||||
|
||||
describe('readRleBitPackedHybrid', () => {
|
||||
it('reads RLE bit-packed hybrid values with explicit length', () => {
|
||||
// Example buffer: 1 RLE group followed by 1 bit-packed group
|
||||
// RLE values: true x3
|
||||
// Bit-packed values: false, false, true
|
||||
it('reads RLE values with explicit length', () => {
|
||||
const buffer = new ArrayBuffer(4)
|
||||
const view = new DataView(buffer)
|
||||
view.setUint8(0, 0b00000110) // RLE header for 3 true values
|
||||
view.setUint8(1, 0b00000001) // RLE value (true)
|
||||
view.setUint8(2, 0b00000011) // Bit-packed header for 3 values
|
||||
view.setUint8(3, 0b00000100) // Bit-packed values (false, false, true)
|
||||
// RLE 3x true
|
||||
view.setUint8(0, 0b00000110)
|
||||
view.setUint8(1, 1)
|
||||
// RLE 3x 100
|
||||
view.setUint8(2, 0b00000110)
|
||||
view.setUint8(3, 100)
|
||||
const reader = { view, offset: 0 }
|
||||
|
||||
const values = new Array(6)
|
||||
readRleBitPackedHybrid(reader, 1, 3, values)
|
||||
readRleBitPackedHybrid(reader, 1, 6, values)
|
||||
expect(reader.offset).toBe(4)
|
||||
expect(values).toEqual([1, 1, 1, 0, 0, 1])
|
||||
expect(values).toEqual([1, 1, 1, 100, 100, 100])
|
||||
})
|
||||
|
||||
it('reads RLE bit-packed hybrid values with implicit length', () => {
|
||||
// Example buffer: same as previous test, but with implicit length
|
||||
it('reads RLE values with bitwidth=16', () => {
|
||||
const buffer = new ArrayBuffer(6)
|
||||
const view = new DataView(buffer)
|
||||
// RLE 3x 65535
|
||||
view.setUint8(3, 0b00000110)
|
||||
view.setUint16(4, 65535, true)
|
||||
const reader = { view, offset: 0 }
|
||||
|
||||
const values = new Array(3)
|
||||
readRleBitPackedHybrid(reader, 16, 6, values)
|
||||
expect(reader.offset).toBe(6)
|
||||
expect(values).toEqual([65535, 65535, 65535])
|
||||
})
|
||||
|
||||
it('reads RLE values with bitwidth=32', () => {
|
||||
const buffer = new ArrayBuffer(5)
|
||||
const view = new DataView(buffer)
|
||||
// RLE 3x 234000
|
||||
view.setUint8(0, 0b00000110)
|
||||
view.setUint32(1, 234000, true)
|
||||
const reader = { view, offset: 0 }
|
||||
|
||||
const values = new Array(3)
|
||||
readRleBitPackedHybrid(reader, 32, 3, values)
|
||||
expect(reader.offset).toBe(5)
|
||||
expect(values).toEqual([234000, 234000, 234000])
|
||||
})
|
||||
|
||||
it('throws for invalid bitwidth', () => {
|
||||
const buffer = new ArrayBuffer(1)
|
||||
const view = new DataView(buffer)
|
||||
view.setUint8(0, 0b00000110)
|
||||
const reader = { view, offset: 0 }
|
||||
|
||||
const values = new Array(3)
|
||||
expect(() => readRleBitPackedHybrid(reader, 24, 3, values))
|
||||
.toThrow('parquet invalid rle width 3')
|
||||
})
|
||||
|
||||
it('reads bit-packed values with implicit length', () => {
|
||||
// Bit-packed values: false, false, true
|
||||
const buffer = new ArrayBuffer(8)
|
||||
const view = new DataView(buffer)
|
||||
view.setInt32(0, 3, true) // length 3 little-endian
|
||||
view.setUint8(4, 0b00000110) // RLE header for 3 true values
|
||||
view.setUint8(5, 0b00000001) // RLE value (true)
|
||||
view.setUint8(6, 0b00000011) // Bit-packed header for 3 values
|
||||
view.setUint8(7, 0b00000100) // Bit-packed values (false, false, true)
|
||||
view.setUint8(4, 0b00000011) // Bit-packed header for 1-8 values
|
||||
view.setUint8(5, 0b00000100) // Bit-packed values (false, false, true)
|
||||
const reader = { view, offset: 0 }
|
||||
|
||||
const values = new Array(6)
|
||||
const values = new Array(3)
|
||||
readRleBitPackedHybrid(reader, 1, 0, values)
|
||||
expect(reader.offset).toBe(8)
|
||||
expect(values).toEqual([1, 1, 1, 0, 0, 1])
|
||||
expect(reader.offset).toBe(6)
|
||||
expect(values).toEqual([0, 0, 1])
|
||||
})
|
||||
|
||||
it('reads multi-byte bit-packed values', () => {
|
||||
// Bit-packed 9x true
|
||||
const buffer = new ArrayBuffer(3)
|
||||
const view = new DataView(buffer)
|
||||
view.setUint8(0, 0b00000101) // Bit-packed header for 9-16 values
|
||||
view.setUint8(1, 0b11111111)
|
||||
view.setUint8(2, 0b00000001)
|
||||
const reader = { view, offset: 0 }
|
||||
|
||||
const values = new Array(9)
|
||||
readRleBitPackedHybrid(reader, 1, 9, values)
|
||||
expect(reader.offset).toBe(3)
|
||||
expect(values).toEqual([1, 1, 1, 1, 1, 1, 1, 1, 1])
|
||||
})
|
||||
|
||||
it('throws for invalid bit-packed offset', () => {
|
||||
const buffer = new ArrayBuffer(1)
|
||||
const view = new DataView(buffer)
|
||||
view.setUint8(0, 0b00000011) // Bit-packed header for 3 values
|
||||
const reader = { view, offset: 0 }
|
||||
|
||||
const values = new Array(3)
|
||||
expect(() => readRleBitPackedHybrid(reader, 1, 3, values))
|
||||
.toThrow('parquet bitpack offset 1 out of range')
|
||||
})
|
||||
|
||||
it('throws for negative implicit length', () => {
|
||||
const buffer = new ArrayBuffer(4)
|
||||
const view = new DataView(buffer)
|
||||
view.setInt32(0, -1, true) // negative length
|
||||
const reader = { view, offset: 0 }
|
||||
|
||||
const values = new Array(3)
|
||||
expect(() => readRleBitPackedHybrid(reader, 1, 0, values))
|
||||
.toThrow('parquet invalid rle/bitpack length -1')
|
||||
})
|
||||
})
|
||||
|
||||
describe('widthFromMaxInt', () => {
|
||||
it('calculates bit widths', () => {
|
||||
// Test a range of inputs and their expected outputs
|
||||
expect(widthFromMaxInt(0)).toBe(0)
|
||||
expect(widthFromMaxInt(1)).toBe(1)
|
||||
expect(widthFromMaxInt(255)).toBe(8)
|
||||
expect(widthFromMaxInt(256)).toBe(9)
|
||||
expect(widthFromMaxInt(1023)).toBe(10)
|
||||
expect(widthFromMaxInt(1048575)).toBe(20)
|
||||
})
|
||||
})
|
||||
|
||||
@ -3,16 +3,16 @@ import { readPlain } from '../src/plain.js'
|
||||
|
||||
describe('readPlain', () => {
|
||||
|
||||
it('reads BOOLEAN values correctly', () => {
|
||||
it('reads BOOLEAN values', () => {
|
||||
const view = new DataView(new ArrayBuffer(1))
|
||||
view.setUint8(0, 0b00000001) // Set the first bit to 1
|
||||
view.setUint8(0, 0b00000101) // true, false, true
|
||||
const reader = { view, offset: 0 }
|
||||
const result = readPlain(reader, 'BOOLEAN', 1, false)
|
||||
expect(result).toEqual([true])
|
||||
const result = readPlain(reader, 'BOOLEAN', 3, false)
|
||||
expect(result).toEqual([true, false, true])
|
||||
expect(reader.offset).toBe(1)
|
||||
})
|
||||
|
||||
it('reads INT32 values correctly', () => {
|
||||
it('reads INT32 values', () => {
|
||||
const view = new DataView(new ArrayBuffer(4))
|
||||
view.setInt32(0, 123456789, true) // little-endian
|
||||
const reader = { view, offset: 0 }
|
||||
@ -21,7 +21,7 @@ describe('readPlain', () => {
|
||||
expect(reader.offset).toBe(4)
|
||||
})
|
||||
|
||||
it('reads INT64 values correctly', () => {
|
||||
it('reads INT64 values', () => {
|
||||
const view = new DataView(new ArrayBuffer(8))
|
||||
view.setBigInt64(0, BigInt('1234567890123456789'), true)
|
||||
const reader = { view, offset: 0 }
|
||||
@ -30,11 +30,11 @@ describe('readPlain', () => {
|
||||
expect(reader.offset).toBe(8)
|
||||
})
|
||||
|
||||
it('reads INT96 values correctly', () => {
|
||||
it('reads INT96 values', () => {
|
||||
const buffer = new ArrayBuffer(12)
|
||||
const view = new DataView(buffer)
|
||||
|
||||
// Example INT96 value split into 64-bit low part and 32-bit high part
|
||||
// INT96 value split into 64-bit low part and 32-bit high part
|
||||
const low = BigInt('0x0123456789ABCDEF')
|
||||
const high = 0x02345678
|
||||
view.setBigInt64(0, low, true)
|
||||
@ -46,7 +46,7 @@ describe('readPlain', () => {
|
||||
expect(reader.offset).toBe(12)
|
||||
})
|
||||
|
||||
it('reads FLOAT values correctly', () => {
|
||||
it('reads FLOAT values', () => {
|
||||
const view = new DataView(new ArrayBuffer(4))
|
||||
view.setFloat32(0, 1234.5, true) // little-endian
|
||||
const reader = { view, offset: 0 }
|
||||
@ -55,7 +55,7 @@ describe('readPlain', () => {
|
||||
expect(reader.offset).toBe(4)
|
||||
})
|
||||
|
||||
it('reads DOUBLE values correctly', () => {
|
||||
it('reads DOUBLE values', () => {
|
||||
const view = new DataView(new ArrayBuffer(8))
|
||||
view.setFloat64(0, 12345.6789, true) // little-endian
|
||||
const reader = { view, offset: 0 }
|
||||
@ -64,10 +64,10 @@ describe('readPlain', () => {
|
||||
expect(reader.offset).toBe(8)
|
||||
})
|
||||
|
||||
it('reads BYTE_ARRAY values correctly', () => {
|
||||
it('reads BYTE_ARRAY values', () => {
|
||||
const view = new DataView(new ArrayBuffer(10))
|
||||
view.setInt32(0, 3, true) // length of the first byte array
|
||||
view.setUint8(4, 1) // first byte array data
|
||||
view.setInt32(0, 3, true) // length 3
|
||||
view.setUint8(4, 1)
|
||||
view.setUint8(5, 2)
|
||||
view.setUint8(6, 3)
|
||||
const reader = { view, offset: 0 }
|
||||
@ -76,7 +76,19 @@ describe('readPlain', () => {
|
||||
expect(reader.offset).toBe(7)
|
||||
})
|
||||
|
||||
it('reads FIXED_LEN_BYTE_ARRAY values correctly', () => {
|
||||
it('reads BYTE_ARRAY values as strings', () => {
|
||||
const view = new DataView(new ArrayBuffer(10))
|
||||
view.setInt32(0, 3, true) // length 3
|
||||
view.setUint8(4, 65)
|
||||
view.setUint8(5, 66)
|
||||
view.setUint8(6, 67)
|
||||
const reader = { view, offset: 0 }
|
||||
const result = readPlain(reader, 'BYTE_ARRAY', 1, true)
|
||||
expect(result).toEqual(['ABC'])
|
||||
expect(reader.offset).toBe(7)
|
||||
})
|
||||
|
||||
it('reads FIXED_LEN_BYTE_ARRAY values', () => {
|
||||
const fixedLength = 3
|
||||
const view = new DataView(new ArrayBuffer(fixedLength))
|
||||
view.setUint8(0, 4)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user