Fix plain_dictionary encoding

This commit is contained in:
Kenny Daniel 2024-02-11 20:43:54 -08:00
parent bc896bc528
commit 69d2e725dd
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
2 changed files with 11 additions and 8 deletions

@ -57,12 +57,11 @@ export function readDataPage(bytes, daph, schema, columnMetadata) {
const plainObj = readPlain(dataView, columnMetadata.type, nval, offset)
values = plainObj.value
offset += plainObj.byteLength
} else if (daph.encoding === Encoding.PLAIN_DICTIONARY) {
const plainObj = readPlain(dataView, columnMetadata.type, nval, offset)
values = plainObj.value
offset += plainObj.byteLength
// TODO: dictionary decoding
} else if (daph.encoding === Encoding.RLE_DICTIONARY) {
} else if (
daph.encoding === Encoding.PLAIN_DICTIONARY ||
daph.encoding === Encoding.RLE_DICTIONARY ||
daph.encoding === Encoding.RLE
) {
// bit width is stored as single byte
let bitWidth
// TODO: RLE encoding uses bitWidth = schemaElement.type_length
@ -178,7 +177,9 @@ function readDefinitionLevels(dataView, offset, daph, schema, columnMetadata) {
* @param {number} prevIndex 1 + index where the last row in the previous page was inserted (0 if first page)
* @returns {any[]} array of values
*/
export function assembleObjects(definitionLevels, repetitionLevels, value, isNull, nullValue, maxDefinitionLevel, prevIndex) {
export function assembleObjects(
definitionLevels, repetitionLevels, value, isNull, nullValue, maxDefinitionLevel, prevIndex
) {
let vali = 0
let started = false
let haveNull = false

@ -132,7 +132,9 @@ async function readRowGroup(options, rowGroup) {
// read column data async
promises.push(buffer.then(arrayBuffer => {
// TODO: extract SchemaElement for this column
const columnData = readColumn(arrayBuffer, bufferOffset, rowGroup, columnMetadata, metadata.schema)
const columnData = readColumn(
arrayBuffer, bufferOffset, rowGroup, columnMetadata, metadata.schema
)
if (columnData.length !== Number(rowGroup.num_rows)) {
throw new Error('parquet column length does not match row group length')
}