mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2026-01-05 19:16:37 +00:00
72 lines
1.8 KiB
JavaScript
72 lines
1.8 KiB
JavaScript
|
|
|
||
|
|
/**
|
||
|
|
* @import {Writer} from './writer.js'
|
||
|
|
* @param {Writer} writer
|
||
|
|
* @param {number[]} values
|
||
|
|
*/
|
||
|
|
export function writeRleBitPackedHybrid(writer, values) {
|
||
|
|
// find max bitwidth
|
||
|
|
const bitWidth = Math.ceil(Math.log2(Math.max(...values) + 1))
|
||
|
|
|
||
|
|
// TODO: Try both RLE and bit-packed and choose the best
|
||
|
|
writeBitPacked(writer, values, bitWidth)
|
||
|
|
}
|
||
|
|
|
||
|
|
/**
|
||
|
|
* @param {Writer} writer
|
||
|
|
* @param {number[]} values
|
||
|
|
* @param {number} bitWidth
|
||
|
|
*/
|
||
|
|
function writeBitPacked(writer, values, bitWidth) {
|
||
|
|
// Number of 8-value groups
|
||
|
|
const numGroups = Math.ceil(values.length / 8)
|
||
|
|
|
||
|
|
// The parquet bitpack header: lower bit = 1 => "bit-packed mode"
|
||
|
|
// upper bits = number of groups
|
||
|
|
const header = numGroups << 1 | 1
|
||
|
|
|
||
|
|
// Write the header as a varint
|
||
|
|
writer.appendVarInt(header)
|
||
|
|
|
||
|
|
// If bitWidth = 0, no data is actually needed
|
||
|
|
if (bitWidth === 0 || values.length === 0) {
|
||
|
|
return
|
||
|
|
}
|
||
|
|
|
||
|
|
const mask = (1 << bitWidth) - 1
|
||
|
|
let buffer = 0 // accumulates bits
|
||
|
|
let bitsUsed = 0 // how many bits are in 'buffer' so far
|
||
|
|
|
||
|
|
// Write out each value, bit-packing into buffer
|
||
|
|
for (let i = 0; i < values.length; i++) {
|
||
|
|
const v = values[i] & mask // mask off bits exceeding bitWidth
|
||
|
|
buffer |= v << bitsUsed
|
||
|
|
bitsUsed += bitWidth
|
||
|
|
|
||
|
|
// Flush full bytes
|
||
|
|
while (bitsUsed >= 8) {
|
||
|
|
writer.appendUint8(buffer & 0xFF)
|
||
|
|
buffer >>>= 8
|
||
|
|
bitsUsed -= 8
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Pad the final partial group with zeros if needed
|
||
|
|
const totalNeeded = numGroups * 8
|
||
|
|
for (let padCount = values.length; padCount < totalNeeded; padCount++) {
|
||
|
|
// Just write a 0 value into the buffer
|
||
|
|
buffer |= 0 << bitsUsed
|
||
|
|
bitsUsed += bitWidth
|
||
|
|
while (bitsUsed >= 8) {
|
||
|
|
writer.appendUint8(buffer & 0xFF)
|
||
|
|
buffer >>>= 8
|
||
|
|
bitsUsed -= 8
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Flush any remaining bits
|
||
|
|
if (bitsUsed > 0) {
|
||
|
|
writer.appendUint8(buffer & 0xFF)
|
||
|
|
}
|
||
|
|
}
|