diff --git a/src/assemble.js b/src/assemble.js index 253d6a9..acf3a04 100644 --- a/src/assemble.js +++ b/src/assemble.js @@ -1,3 +1,5 @@ +import { isListLike, isMapLike } from './schema.js' + /** * Dremel-assembly of arrays of values into lists * @@ -5,16 +7,17 @@ * according to Dremel encoding. * * @typedef {import('./types.d.ts').DecodedArray} DecodedArray + * @typedef {import('./types.d.ts').FieldRepetitionType} FieldRepetitionType * @param {number[] | undefined} definitionLevels * @param {number[]} repetitionLevels * @param {DecodedArray} values - * @param {boolean} isNullable can entries be null? + * @param {(FieldRepetitionType | undefined)[]} repetitionPath * @param {number} maxDefinitionLevel definition level that corresponds to non-null * @param {number} maxRepetitionLevel repetition level that corresponds to a new row * @returns {DecodedArray} array of values */ export function assembleLists( - definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel + definitionLevels, repetitionLevels, values, repetitionPath, maxDefinitionLevel, maxRepetitionLevel ) { const n = definitionLevels?.length || repetitionLevels.length let valueIndex = 0 @@ -24,49 +27,56 @@ export function assembleLists( // Track state of nested structures const containerStack = [output] let currentContainer = output + let currentDepth = 0 // schema depth + let currentDefLevel = 0 // list depth + let currentRepLevel = 0 for (let i = 0; i < n; i++) { + // assert(currentDefLevel === containerStack.length - 1) const def = definitionLevels?.length ? definitionLevels[i] : maxDefinitionLevel const rep = repetitionLevels[i] - if (rep !== maxRepetitionLevel) { - // Move back to the parent container - while (rep < containerStack.length - 1) { + // Pop up to start of rep level + while (currentDepth && (rep < currentRepLevel || repetitionPath[currentDepth] === 'OPTIONAL')) { + if (repetitionPath[currentDepth] !== 'REQUIRED') { containerStack.pop() + currentDefLevel-- } - // Construct new lists up to max repetition level - // @ts-expect-error won't be empty - currentContainer = containerStack.at(-1) + if (repetitionPath[currentDepth] === 'REPEATED') currentRepLevel-- + currentDepth-- } + // @ts-expect-error won't be empty + currentContainer = containerStack.at(-1) - // Add lists up to definition level - const targetDepth = isNullable ? (def + 1) / 2 : maxRepetitionLevel + 1 - for (let j = containerStack.length; j < targetDepth; j++) { - /** @type {any[]} */ - const newList = [] - currentContainer.push(newList) - currentContainer = newList - containerStack.push(newList) + // Go deeper to end of definition level + while (currentDepth < repetitionPath.length - 2 && currentDefLevel < def) { + currentDepth++ + if (repetitionPath[currentDepth] !== 'REQUIRED') { + /** @type {any[]} */ + const newList = [] + currentContainer.push(newList) + currentContainer = newList + containerStack.push(newList) + currentDefLevel++ + } + if (repetitionPath[currentDepth] === 'REPEATED') currentRepLevel++ } // Add value or null based on definition level if (def === maxDefinitionLevel) { + // assert(currentDepth === maxDefinitionLevel || currentDepth === repetitionPath.length - 2) currentContainer.push(values[valueIndex++]) - } else if (isNullable) { - // TODO: actually depends on level required or not - if (def % 2 === 0) { - currentContainer.push(undefined) - } else { - currentContainer.push([]) - } + } else if (currentDepth === repetitionPath.length - 2) { + currentContainer.push(null) + } else { + currentContainer.push([]) } } // Handle edge cases for empty inputs or single-level data if (output.length === 0) { if (values.length > 0 && maxRepetitionLevel === 0) { - // All values belong to the same (root) list - return [values] + return values // flat list } // return max definition level of nested lists for (let i = 0; i < maxDefinitionLevel; i++) { @@ -80,4 +90,147 @@ export function assembleLists( return output } -// TODO: depends on prior def level +/** + * Assemble a nested structure from subcolumn data. + * https://github.com/apache/parquet-format/blob/apache-parquet-format-2.10.0/LogicalTypes.md#nested-types + * + * @typedef {import('./types.d.ts').SchemaTree} SchemaTree + * @param {Map} subcolumnData + * @param {SchemaTree} schema top-level schema element + * @param {number} [depth] depth of nested structure + */ +export function assembleNested(subcolumnData, schema, depth = 0) { + const path = schema.path.join('.') + const optional = schema.element.repetition_type === 'OPTIONAL' + const nextDepth = optional ? depth + 1 : depth + + if (isListLike(schema)) { + const sublist = schema.children[0].children[0] + assembleNested(subcolumnData, sublist, nextDepth + 1) + + const subcolumn = sublist.path.join('.') + const values = subcolumnData.get(subcolumn) + if (!values) throw new Error('parquet list-like column missing values') + if (optional) flattenAtDepth(values, depth) + subcolumnData.set(path, values) + subcolumnData.delete(subcolumn) + return + } + + if (isMapLike(schema)) { + const mapName = schema.children[0].element.name + + // Assemble keys and values + assembleNested(subcolumnData, schema.children[0].children[0], nextDepth + 1) + assembleNested(subcolumnData, schema.children[0].children[1], nextDepth + 1) + + const keys = subcolumnData.get(`${path}.${mapName}.key`) + const values = subcolumnData.get(`${path}.${mapName}.value`) + + if (!keys) throw new Error('parquet map-like column missing keys') + if (!values) throw new Error('parquet map-like column missing values') + if (keys.length !== values.length) { + throw new Error('parquet map-like column key/value length mismatch') + } + + const out = assembleMaps(keys, values, nextDepth) + if (optional) flattenAtDepth(out, depth) + + subcolumnData.delete(`${path}.${mapName}.key`) + subcolumnData.delete(`${path}.${mapName}.value`) + subcolumnData.set(path, out) + return + } + + // Struct-like column + if (schema.children.length) { + // construct a meta struct and then invert + /** @type {Record} */ + const struct = {} + for (const child of schema.children) { + assembleNested(subcolumnData, child, nextDepth) + const childData = subcolumnData.get(child.path.join('.')) + if (!childData) throw new Error('parquet struct-like column missing child data') + if (child.element.repetition_type === 'OPTIONAL') { + flattenAtDepth(childData, depth) + } + struct[child.element.name] = childData + } + // remove children + for (const child of schema.children) { + subcolumnData.delete(child.path.join('.')) + } + // invert struct by depth + subcolumnData.set(path, invertStruct(struct, depth)) + return + } + // assert(schema.element.repetition_type !== 'REPEATED') +} + +/** + * @param {any[]} arr + * @param {number} depth + */ +function flattenAtDepth(arr, depth) { + for (let i = 0; i < arr.length; i++) { + if (depth) { + flattenAtDepth(arr[i], depth - 1) + } else { + arr[i] = arr[i][0] + } + } +} + +/** + * @param {any[]} keys + * @param {any[]} values + * @param {number} depth + * @returns {any[]} + */ +function assembleMaps(keys, values, depth) { + const out = [] + for (let i = 0; i < keys.length; i++) { + if (depth) { + out.push(assembleMaps(keys[i], values[i], depth - 1)) // go deeper + } else { + if (keys[i]) { + /** @type {Record} */ + const obj = {} + for (let j = 0; j < keys[i].length; j++) { + const value = values[i][j] + obj[keys[i][j]] = value === undefined ? null : value + } + out.push(obj) + } else { + out.push(undefined) + } + } + } + return out +} + +/** + * Invert a struct-like object by depth. + * + * @param {Record} struct + * @param {number} depth + * @returns {any[]} + */ +function invertStruct(struct, depth) { + const keys = Object.keys(struct) + const length = struct[keys[0]]?.length + const out = [] + for (let i = 0; i < length; i++) { + /** @type {Record} */ + const obj = {} + for (const key of keys) { + obj[key] = struct[key][i] + } + if (depth) { + out.push(invertStruct(obj, depth - 1)) // deeper + } else { + out.push(obj) + } + } + return out +} diff --git a/src/column.js b/src/column.js index 4b9f4c0..408a17b 100644 --- a/src/column.js +++ b/src/column.js @@ -3,7 +3,7 @@ import { convert } from './convert.js' import { readDataPage, readDictionaryPage } from './datapage.js' import { readDataPageV2 } from './datapageV2.js' import { parquetHeader } from './header.js' -import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired } from './schema.js' +import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js' import { snappyUncompress } from './snappy.js' import { concat } from './utils.js' @@ -39,7 +39,7 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, // parse column header const header = parquetHeader(reader) if (header.compressed_page_size === undefined) { - throw new Error(`parquet compressed page size is undefined in column '${columnMetadata.path_in_schema}'`) + throw new Error('parquet compressed page size is undefined') } // read compressed_page_size bytes starting at offset @@ -68,9 +68,9 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, // Use repetition levels to construct lists const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath) const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath) - const isNullable = columnMetadata && !isRequired(schemaPath.slice(0, 2)) + const repetitionPath = schemaPath.map(({ element }) => element.repetition_type) values = assembleLists( - definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel + definitionLevels, repetitionLevels, values, repetitionPath, maxDefinitionLevel, maxRepetitionLevel ) } else { // wrap nested flat data by depth @@ -95,11 +95,11 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, values = convert(dataPage, element) if (repetitionLevels.length || definitionLevels?.length) { // Use repetition levels to construct lists - const isNullable = columnMetadata && !isRequired(schemaPath.slice(0, 2)) const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath) const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath) + const repetitionPath = schemaPath.map(({ element }) => element.repetition_type) values = assembleLists( - definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel + definitionLevels, repetitionLevels, values, repetitionPath, maxDefinitionLevel, maxRepetitionLevel ) } concat(rowData, values) diff --git a/src/read.js b/src/read.js index e6c989e..66558f8 100644 --- a/src/read.js +++ b/src/read.js @@ -1,7 +1,8 @@ +import { assembleNested } from './assemble.js' import { getColumnOffset, readColumn } from './column.js' import { parquetMetadataAsync } from './metadata.js' -import { getSchemaPath, isMapLike } from './schema.js' +import { getSchemaPath } from './schema.js' import { concat } from './utils.js' /** @@ -111,7 +112,10 @@ async function readRowGroup(options, rowGroup, groupStart) { /** @type {any[][]} */ const groupColumnData = [] const promises = [] - const maps = new Map() + // Top-level columns to assemble + const { children } = getSchemaPath(metadata.schema, [])[0] + const subcolumnNames = new Map(children.map(child => [child.element.name, getSubcolumns(child)])) + const subcolumnData = new Map() // columns to assemble as maps // read column data for (let columnIndex = 0; columnIndex < rowGroup.columns.length; columnIndex++) { const columnMetadata = rowGroup.columns[columnIndex].meta_data @@ -152,48 +156,21 @@ async function readRowGroup(options, rowGroup, groupStart) { let columnData = readColumn( arrayBuffer, bufferOffset, rowGroup, columnMetadata, schemaPath, compressors ) - if (columnData.length !== Number(rowGroup.num_rows)) { - throw new Error(`parquet column length ${columnData.length} does not match row group length ${rowGroup.num_rows}`) - } + // assert(columnData.length === Number(rowGroup.num_rows) - if (isMapLike(schemaPath[schemaPath.length - 3])) { - const name = columnMetadata.path_in_schema.slice(0, -2).join('.') - if (!maps.has(name)) { - maps.set(name, columnData) - columnData = undefined // do not emit column data until both key and value are read - } else { - if (columnMetadata.path_in_schema[0] === 'key') { - throw new Error('parquet map-like column key is not first') // TODO: support value-first - } else { - const values = columnData - const keys = maps.get(name) - const out = [] - if (keys.length !== values.length) { - throw new Error('parquet map-like column key/value length mismatch') - } - // assemble map-like column data - for (let i = 0; i < keys.length; i++) { - // keys will be empty for {} and undefined for null - if (keys[i]) { - /** @type {Record} */ - const obj = {} - for (let j = 0; j < keys[i].length; j++) { - if (Array.isArray(keys[i][j])) { - // TODO: key should not be an array, this is an assemble bug? - keys[i][j] = keys[i][j][0] - values[i][j] = values[i][j][0] - } - if (!keys[i][j]) continue - obj[keys[i][j]] = values[i][j] === undefined ? null : values[i][j] - } - out.push(obj) - } else { - out.push(undefined) - } - } - columnData = out - } - maps.delete(name) + // TODO: fast path for non-nested columns + // Save column data for assembly + const subcolumn = columnMetadata.path_in_schema.join('.') + subcolumnData.set(subcolumn, columnData) + columnData = undefined + + const subcolumns = subcolumnNames.get(columnName) + if (subcolumns?.every(name => subcolumnData.has(name))) { + // We have all data needed to assemble a top level column + assembleNested(subcolumnData, schemaPath[1]) + columnData = subcolumnData.get(columnName) + if (!columnData) { + throw new Error(`parquet column data not assembled: ${columnName}`) } } @@ -217,3 +194,22 @@ async function readRowGroup(options, rowGroup, groupStart) { } return [] } + + +/** + * Return a list of sub-columns needed to construct a top-level column. + * + * @param {import('./types.js').SchemaTree} schema + * @param {string[]} output + * @returns {string[]} + */ +function getSubcolumns(schema, output = []) { + if (schema.children.length) { + for (const child of schema.children) { + getSubcolumns(child, output) + } + } else { + output.push(schema.path.join('.')) + } + return output +} diff --git a/test/assemble.test.js b/test/assemble.test.js index 23d24f1..d7ac460 100644 --- a/test/assemble.test.js +++ b/test/assemble.test.js @@ -1,11 +1,34 @@ import { describe, expect, it } from 'vitest' import { assembleLists } from '../src/assemble.js' -describe('assembleObjects', () => { +/** @typedef {import('../src/types.js').FieldRepetitionType | undefined} FieldRepetitionType */ + +describe('assembleLists', () => { + /** @type {FieldRepetitionType[]} */ + const nonnullable = [undefined, 'REQUIRED', 'REPEATED', 'REQUIRED'] + /** @type {FieldRepetitionType[]} */ + const nullable = [undefined, 'OPTIONAL', 'REPEATED', 'OPTIONAL'] + /** @type {FieldRepetitionType[]} */ + const nestedRequired = [undefined, 'REQUIRED', 'REPEATED', 'REQUIRED', 'REPEATED', 'REQUIRED'] + /** @type {FieldRepetitionType[]} */ + const nestedOptional = [undefined, 'OPTIONAL', 'REPEATED', 'OPTIONAL', 'REPEATED', 'OPTIONAL'] + + it('should not change flat objects', () => { + const values = ['a', 'b'] + const result = assembleLists([], [], values, [undefined, 'REQUIRED'], 0, 0) + expect(result).toEqual(['a', 'b']) + }) + + it('should not change nested required objects', () => { + const values = ['a', 'b'] + const result = assembleLists([], [], values, [undefined, 'REQUIRED', 'REQUIRED'], 0, 0) + expect(result).toEqual(['a', 'b']) + }) + it('should assemble objects with non-null values', () => { const repetitionLevels = [0, 1] const values = ['a', 'b'] - const result = assembleLists([], repetitionLevels, values, false, 1, 1) + const result = assembleLists([], repetitionLevels, values, nonnullable, 1, 1) expect(result).toEqual([['a', 'b']]) }) @@ -13,26 +36,26 @@ describe('assembleObjects', () => { const definitionLevels = [3, 0, 3] const repetitionLevels = [0, 1, 1] const values = ['a', 'c'] - const result = assembleLists(definitionLevels, repetitionLevels, values, true, 3, 1) - expect(result).toEqual([['a', undefined, 'c']]) + const result = assembleLists(definitionLevels, repetitionLevels, values, nullable, 3, 1) + expect(result).toEqual([[['a', null, 'c']]]) }) it('should handle empty lists', () => { - expect(assembleLists([], [], [], false, 0, 0)).toEqual([]) - expect(assembleLists([], [], [], false, 1, 0)).toEqual([[]]) + expect(assembleLists([], [], [], nonnullable, 0, 0)).toEqual([]) + expect(assembleLists([], [], [], nonnullable, 1, 0)).toEqual([[]]) }) it('should handle multiple lists', () => { const repetitionLevels = [0, 0] const values = [22, 33] - const result = assembleLists([], repetitionLevels, values, false, 1, 1) + const result = assembleLists([], repetitionLevels, values, nonnullable, 1, 1) expect(result).toEqual([[22], [33]]) }) it('should handle multiple lists (6)', () => { const repetitionLevels = [0, 1, 1, 0, 1, 1] const values = [1, 2, 3, 4, 5, 6] - const result = assembleLists([], repetitionLevels, values, false, 1, 1) + const result = assembleLists([], repetitionLevels, values, nonnullable, 1, 1) expect(result).toEqual([[1, 2, 3], [4, 5, 6]]) }) @@ -40,15 +63,15 @@ describe('assembleObjects', () => { const definitionLevels = [3, 3, 0, 3, 3] const repetitionLevels = [0, 1, 0, 0, 1] const values = ['a', 'b', 'd', 'e'] - const result = assembleLists(definitionLevels, repetitionLevels, values, true, 3, 1) - expect(result).toEqual([['a', 'b'], undefined, ['d', 'e']]) + const result = assembleLists(definitionLevels, repetitionLevels, values, nullable, 3, 1) + expect(result).toEqual([[['a', 'b']], [], [['d', 'e']]]) }) // it('should handle continuing a row from the previous page', () => { // const definitionLevels = [3, 3, 3, 1] // const repetitionLevels = [1, 0, 1, 0] // const values = ['a', 'b', 'c', 'd'] - // const result = assembleObjects(definitionLevels, repetitionLevels, values, false, 3, 1) + // const result = assembleObjects(definitionLevels, repetitionLevels, values, nullable, 3, 1) // expect(result).toEqual([['b', 'c'], [undefined]]) // }) @@ -56,7 +79,7 @@ describe('assembleObjects', () => { // from nullable.impala.parquet const repetitionLevels = [0, 2, 1, 2] const values = [1, 2, 3, 4] - const result = assembleLists([], repetitionLevels, values, false, 2, 2) + const result = assembleLists([], repetitionLevels, values, nestedRequired, 2, 2) expect(result).toEqual([[[1, 2], [3, 4]]]) }) @@ -65,26 +88,28 @@ describe('assembleObjects', () => { const definitionLevels = [2, 2, 2, 2, 1, 1, 1, 0, 2, 2] const repetitionLevels = [0, 1, 0, 1, 0, 0, 0, 0, 0, 1] const values = ['k1', 'k2', 'k1', 'k2', 'k1', 'k3'] - const result = assembleLists(definitionLevels, repetitionLevels, values, true, 2, 1) + /** @type {FieldRepetitionType[]} */ + const repetitionPath = ['REQUIRED', 'OPTIONAL', 'REPEATED', 'REQUIRED'] // map key required + const result = assembleLists(definitionLevels, repetitionLevels, values, repetitionPath, 2, 1) expect(result).toEqual([ - ['k1', 'k2'], - ['k1', 'k2'], + [['k1', 'k2']], + [['k1', 'k2']], + [[]], + [[]], + [[]], [], - [], - [], - undefined, - ['k1', 'k3'], + [['k1', 'k3']], ]) }) it('should handle empty lists with definition level', () => { // from nonnullable.impala.parquet - expect(assembleLists([0], [0], [], false, 1, 2)).toEqual([[[]]]) + expect(assembleLists([0], [0], [], nonnullable, 1, 2)).toEqual([[]]) }) it('should handle nonnullable lists', () => { // from nonnullable.impala.parquet - expect(assembleLists([1], [0], [-1], false, 1, 2)).toEqual([[[-1]]]) + expect(assembleLists([1], [0], [-1], nonnullable, 1, 2)).toEqual([[-1]]) }) it('should handle nullable int_array', () => { @@ -93,13 +118,13 @@ describe('assembleObjects', () => { const definitionLevels = [3, 3, 3, 2, 3, 3, 2, 3, 2, 1, 0, 0] const repetitionLevels = [0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0] const values = [1, 2, 3, 1, 2, 3] - const result = assembleLists(definitionLevels, repetitionLevels, values, true, 3, 1) + const result = assembleLists(definitionLevels, repetitionLevels, values, nullable, 3, 1) expect(result).toEqual([ - [1, 2, 3], - [undefined, 1, 2, undefined, 3, undefined], + [[1, 2, 3]], + [[null, 1, 2, null, 3, null]], + [[]], + [], [], - undefined, - undefined, ]) }) @@ -109,15 +134,15 @@ describe('assembleObjects', () => { const definitionLevels = [5, 5, 5, 5, 4, 5, 5, 4, 5, 4, 5, 3, 2, 2, 1, 0, 0, 2, 5, 5] const repetitionLevels = [0, 2, 1, 2, 0, 2, 2, 2, 1, 2, 2, 1, 1, 0, 0, 0, 0, 0, 1, 2] const values = [1, 2, 3, 4, 1, 2, 3, 4, 5, 6] - const result = assembleLists(definitionLevels, repetitionLevels, values, true, 5, 2) + const result = assembleLists(definitionLevels, repetitionLevels, values, nestedOptional, 5, 2) expect(result).toEqual([ - [[1, 2], [3, 4]], - [[undefined, 1, 2, undefined], [3, undefined, 4], [], undefined], - [undefined], + [[[[1, 2]], [[3, 4]]]], + [[[[null, 1, 2, null]], [[3, null, 4]], [[]], []]], + [[[]]], + [[]], [], - undefined, - undefined, - [undefined, [5, 6]], + [], + [[[], [[5, 6]]]], ]) }) @@ -125,16 +150,16 @@ describe('assembleObjects', () => { const definitionLevels = [3, 4, 3, 3] const repetitionLevels = [0, 1, 1, 1] const values = ['k1'] - const result = assembleLists(definitionLevels, repetitionLevels, values, true, 4, 2) - expect(result).toEqual([[[], ['k1'], [], []]]) + const result = assembleLists(definitionLevels, repetitionLevels, values, nullable, 4, 2) + expect(result).toEqual([[[null, 'k1', null, null]]]) }) it('should handle nonnullable int_map_array values', () => { const definitionLevels = [3, 5, 3, 3] const repetitionLevels = [0, 1, 1, 1] const values = ['v1'] - const result = assembleLists(definitionLevels, repetitionLevels, values, true, 5, 2) - expect(result).toEqual([[[], ['v1'], [], []]]) + const result = assembleLists(definitionLevels, repetitionLevels, values, nullable, 5, 2) + expect(result).toEqual([[[null, 'v1', null, null]]]) }) it('should handle mixed optional and required', () => { @@ -142,15 +167,26 @@ describe('assembleObjects', () => { const definitionLevels = [2, 2, 2, 0, 0, 2, 2, 2, 2, 2] const repetitionLevels = [0, 1, 1, 0, 0, 0, 1, 1, 0, 1] const values = [1, 2, 3, 1, 2, 3, 1, 2] - const result = assembleLists(definitionLevels, repetitionLevels, values, true, 2, 1) - expect(result).toEqual([[1, 2, 3], undefined, undefined, [1, 2, 3], [1, 2]]) + /** @type {FieldRepetitionType[]} */ + const repetitionPath = [undefined, 'OPTIONAL', 'REPEATED', 'REQUIRED'] + const result = assembleLists(definitionLevels, repetitionLevels, values, repetitionPath, 2, 1) + expect(result).toEqual([[[1, 2, 3]], [], [], [[1, 2, 3]], [[1, 2]]]) }) it('should handle nested required', () => { // from nonnullable.impala.parquet nested_Struct i const definitionLevels = [0] const repetitionLevels = [0] - const result = assembleLists(definitionLevels, repetitionLevels, [], false, 2, 2) - expect(result).toEqual([[[]]]) + /** @type {FieldRepetitionType[]} */ + const repetitionPath = [undefined, 'REQUIRED', 'REQUIRED', 'REPEATED', 'REQUIRED', 'REQUIRED', 'REPEATED', 'REQUIRED'] + const result = assembleLists(definitionLevels, repetitionLevels, [], repetitionPath, 2, 2) + expect(result).toEqual([[]]) + }) + + it('should handle dzenilee', () => { + const repetitionLevels = [0, 1, 1, 0, 1, 1] + const values = ['a', 'b', 'c', 'd', 'e', 'f'] + const result = assembleLists([], repetitionLevels, values, nullable, 3, 1) + expect(result).toEqual([[['a', 'b', 'c']], [['d', 'e', 'f']]]) }) }) diff --git a/test/files/nonnullable.impala.json b/test/files/nonnullable.impala.json index 62a560b..bf21a0d 100644 --- a/test/files/nonnullable.impala.json +++ b/test/files/nonnullable.impala.json @@ -4,11 +4,17 @@ [-1], [[-1, -2], []], { "k1": -1 }, - { "k1": 1 }, - -1, - [-1], - [[-1]], - [["nonnullable"]], - [[]] + [{}, { "k1": 1 }, {}, {}], + { + "a": -1, + "B": [-1], + "c": { + "D": [[{ + "e": -1, + "f": "nonnullable" + }]] + }, + "G": {} + } ] ]