Fix RLE encoding length (#18)

This commit is contained in:
Kenny Daniel 2025-12-03 20:04:21 -08:00 committed by GitHub
parent dec0864455
commit 390a86fe07
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 7 additions and 5 deletions

@ -57,10 +57,10 @@
"devDependencies": {
"@babel/eslint-parser": "7.28.5",
"@types/node": "24.10.1",
"@vitest/coverage-v8": "4.0.14",
"@vitest/coverage-v8": "4.0.15",
"eslint": "9.39.1",
"eslint-plugin-jsdoc": "61.4.1",
"typescript": "5.9.3",
"vitest": "4.0.14"
"vitest": "4.0.15"
}
}

@ -39,8 +39,10 @@ export function writeDataPageV2(writer, values, column, encoding, listValues) {
writePlain(page, nonnull, type, type_length)
} else if (encoding === 'RLE') {
if (type !== 'BOOLEAN') throw new Error('RLE encoding only supported for BOOLEAN type')
page.appendUint32(nonnull.length) // prepend length
writeRleBitPackedHybrid(page, nonnull, 1)
const rleData = new ByteWriter()
writeRleBitPackedHybrid(rleData, nonnull, 1)
page.appendUint32(rleData.offset) // prepend byte length
page.appendBuffer(rleData.getBuffer())
} else if (encoding === 'PLAIN_DICTIONARY' || encoding === 'RLE_DICTIONARY') {
// find max bitwidth
let maxValue = 0

@ -37,7 +37,7 @@ export function ParquetWriter({ writer, schema, compressed = true, statistics =
* @param {ColumnSource[]} options.columnData
* @param {number | number[]} [options.rowGroupSize]
*/
ParquetWriter.prototype.write = function({ columnData, rowGroupSize = 100000 }) {
ParquetWriter.prototype.write = function({ columnData, rowGroupSize = 10000 }) {
const columnDataRows = columnData[0]?.data?.length || 0
for (const { groupStartIndex, groupSize } of groupIterator({ columnDataRows, rowGroupSize })) {
const groupStartOffset = this.writer.offset