Fix complex.parquet

This commit is contained in:
Kenny Daniel 2024-05-23 22:11:47 -07:00
parent 0926bfc2a0
commit 10b9b299d8
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
4 changed files with 10 additions and 11 deletions

@ -37,7 +37,7 @@ export function assembleLists(
const rep = repetitionLevels[i]
// Pop up to start of rep level
while (currentDepth && (rep < currentRepLevel || repetitionPath[currentDepth] === 'OPTIONAL')) {
while (currentDepth && (rep < currentRepLevel || repetitionPath[currentDepth] !== 'REPEATED')) {
if (repetitionPath[currentDepth] !== 'REQUIRED') {
containerStack.pop()
currentDefLevel--

@ -3,7 +3,6 @@ import { deltaBinaryUnpack, deltaByteArray, deltaLengthByteArray } from './delta
import { bitWidth, byteStreamSplit, readRleBitPackedHybrid } from './encoding.js'
import { readPlain } from './plain.js'
import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js'
import { readVarInt } from './thrift.js'
/**
* Read a data page from the given Uint8Array.
@ -29,14 +28,9 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata,
// repetition levels
const repetitionLevels = readRepetitionLevelsV2(reader, daph2, schemaPath)
// assert(reader.offset === daph2.repetition_levels_byte_length)
reader.offset = daph2.repetition_levels_byte_length // readVarInt() => len for boolean v2?
// definition levels
const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath)
if (columnMetadata.type === 'BOOLEAN' && maxDefinitionLevel) {
// special case for boolean data page v2
readVarInt(reader) // assert(=== num_values)
}
const definitionLevels = readDefinitionLevelsV2(reader, daph2, schemaPath)
// assert(reader.offset === daph2.repetition_levels_byte_length + daph2.definition_levels_byte_length)

@ -140,7 +140,5 @@ export function snappyUncompress(input, output) {
}
}
if (outPos !== outputLength) {
throw new Error('premature end of input')
}
if (outPos !== outputLength) throw new Error('premature end of input')
}

@ -189,4 +189,11 @@ describe('assembleLists', () => {
const result = assembleLists([], repetitionLevels, values, nullable, 3, 1)
expect(result).toEqual([[['a', 'b', 'c']], [['d', 'e', 'f']]])
})
it('handle complex.parquet with nested require', () => {
const definitionLevels = [1, 1]
const values = ['a', 'b']
const result = assembleLists(definitionLevels, [], values, [undefined, 'OPTIONAL', 'REQUIRED', 'REQUIRED'], 1, 0)
expect(result).toEqual([['a'], ['b']])
})
})