From 10b9b299d842bd63cbe8902c6a3837c661d13afa Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Thu, 23 May 2024 22:11:47 -0700 Subject: [PATCH] Fix complex.parquet --- src/assemble.js | 2 +- src/datapageV2.js | 8 +------- src/snappy.js | 4 +--- test/assemble.test.js | 7 +++++++ 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/assemble.js b/src/assemble.js index 2218a9b..d1865cb 100644 --- a/src/assemble.js +++ b/src/assemble.js @@ -37,7 +37,7 @@ export function assembleLists( const rep = repetitionLevels[i] // Pop up to start of rep level - while (currentDepth && (rep < currentRepLevel || repetitionPath[currentDepth] === 'OPTIONAL')) { + while (currentDepth && (rep < currentRepLevel || repetitionPath[currentDepth] !== 'REPEATED')) { if (repetitionPath[currentDepth] !== 'REQUIRED') { containerStack.pop() currentDefLevel-- diff --git a/src/datapageV2.js b/src/datapageV2.js index 978b54b..38a45f3 100644 --- a/src/datapageV2.js +++ b/src/datapageV2.js @@ -3,7 +3,6 @@ import { deltaBinaryUnpack, deltaByteArray, deltaLengthByteArray } from './delta import { bitWidth, byteStreamSplit, readRleBitPackedHybrid } from './encoding.js' import { readPlain } from './plain.js' import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js' -import { readVarInt } from './thrift.js' /** * Read a data page from the given Uint8Array. @@ -29,14 +28,9 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata, // repetition levels const repetitionLevels = readRepetitionLevelsV2(reader, daph2, schemaPath) - // assert(reader.offset === daph2.repetition_levels_byte_length) + reader.offset = daph2.repetition_levels_byte_length // readVarInt() => len for boolean v2? // definition levels - const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath) - if (columnMetadata.type === 'BOOLEAN' && maxDefinitionLevel) { - // special case for boolean data page v2 - readVarInt(reader) // assert(=== num_values) - } const definitionLevels = readDefinitionLevelsV2(reader, daph2, schemaPath) // assert(reader.offset === daph2.repetition_levels_byte_length + daph2.definition_levels_byte_length) diff --git a/src/snappy.js b/src/snappy.js index 6e95383..5b85102 100644 --- a/src/snappy.js +++ b/src/snappy.js @@ -140,7 +140,5 @@ export function snappyUncompress(input, output) { } } - if (outPos !== outputLength) { - throw new Error('premature end of input') - } + if (outPos !== outputLength) throw new Error('premature end of input') } diff --git a/test/assemble.test.js b/test/assemble.test.js index d7ac460..aba4f96 100644 --- a/test/assemble.test.js +++ b/test/assemble.test.js @@ -189,4 +189,11 @@ describe('assembleLists', () => { const result = assembleLists([], repetitionLevels, values, nullable, 3, 1) expect(result).toEqual([[['a', 'b', 'c']], [['d', 'e', 'f']]]) }) + + it('handle complex.parquet with nested require', () => { + const definitionLevels = [1, 1] + const values = ['a', 'b'] + const result = assembleLists(definitionLevels, [], values, [undefined, 'OPTIONAL', 'REQUIRED', 'REQUIRED'], 1, 0) + expect(result).toEqual([['a'], ['b']]) + }) })