mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Fix RLE encoding length (#18)
This commit is contained in:
parent
dec0864455
commit
390a86fe07
@ -57,10 +57,10 @@
|
||||
"devDependencies": {
|
||||
"@babel/eslint-parser": "7.28.5",
|
||||
"@types/node": "24.10.1",
|
||||
"@vitest/coverage-v8": "4.0.14",
|
||||
"@vitest/coverage-v8": "4.0.15",
|
||||
"eslint": "9.39.1",
|
||||
"eslint-plugin-jsdoc": "61.4.1",
|
||||
"typescript": "5.9.3",
|
||||
"vitest": "4.0.14"
|
||||
"vitest": "4.0.15"
|
||||
}
|
||||
}
|
||||
|
||||
@ -39,8 +39,10 @@ export function writeDataPageV2(writer, values, column, encoding, listValues) {
|
||||
writePlain(page, nonnull, type, type_length)
|
||||
} else if (encoding === 'RLE') {
|
||||
if (type !== 'BOOLEAN') throw new Error('RLE encoding only supported for BOOLEAN type')
|
||||
page.appendUint32(nonnull.length) // prepend length
|
||||
writeRleBitPackedHybrid(page, nonnull, 1)
|
||||
const rleData = new ByteWriter()
|
||||
writeRleBitPackedHybrid(rleData, nonnull, 1)
|
||||
page.appendUint32(rleData.offset) // prepend byte length
|
||||
page.appendBuffer(rleData.getBuffer())
|
||||
} else if (encoding === 'PLAIN_DICTIONARY' || encoding === 'RLE_DICTIONARY') {
|
||||
// find max bitwidth
|
||||
let maxValue = 0
|
||||
|
||||
@ -37,7 +37,7 @@ export function ParquetWriter({ writer, schema, compressed = true, statistics =
|
||||
* @param {ColumnSource[]} options.columnData
|
||||
* @param {number | number[]} [options.rowGroupSize]
|
||||
*/
|
||||
ParquetWriter.prototype.write = function({ columnData, rowGroupSize = 100000 }) {
|
||||
ParquetWriter.prototype.write = function({ columnData, rowGroupSize = 10000 }) {
|
||||
const columnDataRows = columnData[0]?.data?.length || 0
|
||||
for (const { groupStartIndex, groupSize } of groupIterator({ columnDataRows, rowGroupSize })) {
|
||||
const groupStartOffset = this.writer.offset
|
||||
|
||||
Loading…
Reference in New Issue
Block a user