From 390a86fe07219e2b90b82f377f764d5770374704 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Wed, 3 Dec 2025 20:04:21 -0800 Subject: [PATCH] Fix RLE encoding length (#18) --- package.json | 4 ++-- src/datapage.js | 6 ++++-- src/parquet-writer.js | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/package.json b/package.json index 0a051a0..e3f33f8 100644 --- a/package.json +++ b/package.json @@ -57,10 +57,10 @@ "devDependencies": { "@babel/eslint-parser": "7.28.5", "@types/node": "24.10.1", - "@vitest/coverage-v8": "4.0.14", + "@vitest/coverage-v8": "4.0.15", "eslint": "9.39.1", "eslint-plugin-jsdoc": "61.4.1", "typescript": "5.9.3", - "vitest": "4.0.14" + "vitest": "4.0.15" } } diff --git a/src/datapage.js b/src/datapage.js index 2654ded..4986a53 100644 --- a/src/datapage.js +++ b/src/datapage.js @@ -39,8 +39,10 @@ export function writeDataPageV2(writer, values, column, encoding, listValues) { writePlain(page, nonnull, type, type_length) } else if (encoding === 'RLE') { if (type !== 'BOOLEAN') throw new Error('RLE encoding only supported for BOOLEAN type') - page.appendUint32(nonnull.length) // prepend length - writeRleBitPackedHybrid(page, nonnull, 1) + const rleData = new ByteWriter() + writeRleBitPackedHybrid(rleData, nonnull, 1) + page.appendUint32(rleData.offset) // prepend byte length + page.appendBuffer(rleData.getBuffer()) } else if (encoding === 'PLAIN_DICTIONARY' || encoding === 'RLE_DICTIONARY') { // find max bitwidth let maxValue = 0 diff --git a/src/parquet-writer.js b/src/parquet-writer.js index fb241af..aca0638 100644 --- a/src/parquet-writer.js +++ b/src/parquet-writer.js @@ -37,7 +37,7 @@ export function ParquetWriter({ writer, schema, compressed = true, statistics = * @param {ColumnSource[]} options.columnData * @param {number | number[]} [options.rowGroupSize] */ -ParquetWriter.prototype.write = function({ columnData, rowGroupSize = 100000 }) { +ParquetWriter.prototype.write = function({ columnData, rowGroupSize = 10000 }) { const columnDataRows = columnData[0]?.data?.length || 0 for (const { groupStartIndex, groupSize } of groupIterator({ columnDataRows, rowGroupSize })) { const groupStartOffset = this.writer.offset