mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Assembly of nested column types (#11)
This commit is contained in:
parent
3f958ed25d
commit
cf4c4ba04d
205
src/assemble.js
205
src/assemble.js
@ -1,3 +1,5 @@
|
||||
import { isListLike, isMapLike } from './schema.js'
|
||||
|
||||
/**
|
||||
* Dremel-assembly of arrays of values into lists
|
||||
*
|
||||
@ -5,16 +7,17 @@
|
||||
* according to Dremel encoding.
|
||||
*
|
||||
* @typedef {import('./types.d.ts').DecodedArray} DecodedArray
|
||||
* @typedef {import('./types.d.ts').FieldRepetitionType} FieldRepetitionType
|
||||
* @param {number[] | undefined} definitionLevels
|
||||
* @param {number[]} repetitionLevels
|
||||
* @param {DecodedArray} values
|
||||
* @param {boolean} isNullable can entries be null?
|
||||
* @param {(FieldRepetitionType | undefined)[]} repetitionPath
|
||||
* @param {number} maxDefinitionLevel definition level that corresponds to non-null
|
||||
* @param {number} maxRepetitionLevel repetition level that corresponds to a new row
|
||||
* @returns {DecodedArray} array of values
|
||||
*/
|
||||
export function assembleLists(
|
||||
definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel
|
||||
definitionLevels, repetitionLevels, values, repetitionPath, maxDefinitionLevel, maxRepetitionLevel
|
||||
) {
|
||||
const n = definitionLevels?.length || repetitionLevels.length
|
||||
let valueIndex = 0
|
||||
@ -24,49 +27,56 @@ export function assembleLists(
|
||||
// Track state of nested structures
|
||||
const containerStack = [output]
|
||||
let currentContainer = output
|
||||
let currentDepth = 0 // schema depth
|
||||
let currentDefLevel = 0 // list depth
|
||||
let currentRepLevel = 0
|
||||
|
||||
for (let i = 0; i < n; i++) {
|
||||
// assert(currentDefLevel === containerStack.length - 1)
|
||||
const def = definitionLevels?.length ? definitionLevels[i] : maxDefinitionLevel
|
||||
const rep = repetitionLevels[i]
|
||||
|
||||
if (rep !== maxRepetitionLevel) {
|
||||
// Move back to the parent container
|
||||
while (rep < containerStack.length - 1) {
|
||||
// Pop up to start of rep level
|
||||
while (currentDepth && (rep < currentRepLevel || repetitionPath[currentDepth] === 'OPTIONAL')) {
|
||||
if (repetitionPath[currentDepth] !== 'REQUIRED') {
|
||||
containerStack.pop()
|
||||
currentDefLevel--
|
||||
}
|
||||
// Construct new lists up to max repetition level
|
||||
// @ts-expect-error won't be empty
|
||||
currentContainer = containerStack.at(-1)
|
||||
if (repetitionPath[currentDepth] === 'REPEATED') currentRepLevel--
|
||||
currentDepth--
|
||||
}
|
||||
// @ts-expect-error won't be empty
|
||||
currentContainer = containerStack.at(-1)
|
||||
|
||||
// Add lists up to definition level
|
||||
const targetDepth = isNullable ? (def + 1) / 2 : maxRepetitionLevel + 1
|
||||
for (let j = containerStack.length; j < targetDepth; j++) {
|
||||
/** @type {any[]} */
|
||||
const newList = []
|
||||
currentContainer.push(newList)
|
||||
currentContainer = newList
|
||||
containerStack.push(newList)
|
||||
// Go deeper to end of definition level
|
||||
while (currentDepth < repetitionPath.length - 2 && currentDefLevel < def) {
|
||||
currentDepth++
|
||||
if (repetitionPath[currentDepth] !== 'REQUIRED') {
|
||||
/** @type {any[]} */
|
||||
const newList = []
|
||||
currentContainer.push(newList)
|
||||
currentContainer = newList
|
||||
containerStack.push(newList)
|
||||
currentDefLevel++
|
||||
}
|
||||
if (repetitionPath[currentDepth] === 'REPEATED') currentRepLevel++
|
||||
}
|
||||
|
||||
// Add value or null based on definition level
|
||||
if (def === maxDefinitionLevel) {
|
||||
// assert(currentDepth === maxDefinitionLevel || currentDepth === repetitionPath.length - 2)
|
||||
currentContainer.push(values[valueIndex++])
|
||||
} else if (isNullable) {
|
||||
// TODO: actually depends on level required or not
|
||||
if (def % 2 === 0) {
|
||||
currentContainer.push(undefined)
|
||||
} else {
|
||||
currentContainer.push([])
|
||||
}
|
||||
} else if (currentDepth === repetitionPath.length - 2) {
|
||||
currentContainer.push(null)
|
||||
} else {
|
||||
currentContainer.push([])
|
||||
}
|
||||
}
|
||||
|
||||
// Handle edge cases for empty inputs or single-level data
|
||||
if (output.length === 0) {
|
||||
if (values.length > 0 && maxRepetitionLevel === 0) {
|
||||
// All values belong to the same (root) list
|
||||
return [values]
|
||||
return values // flat list
|
||||
}
|
||||
// return max definition level of nested lists
|
||||
for (let i = 0; i < maxDefinitionLevel; i++) {
|
||||
@ -80,4 +90,147 @@ export function assembleLists(
|
||||
return output
|
||||
}
|
||||
|
||||
// TODO: depends on prior def level
|
||||
/**
|
||||
* Assemble a nested structure from subcolumn data.
|
||||
* https://github.com/apache/parquet-format/blob/apache-parquet-format-2.10.0/LogicalTypes.md#nested-types
|
||||
*
|
||||
* @typedef {import('./types.d.ts').SchemaTree} SchemaTree
|
||||
* @param {Map<string, any[]>} subcolumnData
|
||||
* @param {SchemaTree} schema top-level schema element
|
||||
* @param {number} [depth] depth of nested structure
|
||||
*/
|
||||
export function assembleNested(subcolumnData, schema, depth = 0) {
|
||||
const path = schema.path.join('.')
|
||||
const optional = schema.element.repetition_type === 'OPTIONAL'
|
||||
const nextDepth = optional ? depth + 1 : depth
|
||||
|
||||
if (isListLike(schema)) {
|
||||
const sublist = schema.children[0].children[0]
|
||||
assembleNested(subcolumnData, sublist, nextDepth + 1)
|
||||
|
||||
const subcolumn = sublist.path.join('.')
|
||||
const values = subcolumnData.get(subcolumn)
|
||||
if (!values) throw new Error('parquet list-like column missing values')
|
||||
if (optional) flattenAtDepth(values, depth)
|
||||
subcolumnData.set(path, values)
|
||||
subcolumnData.delete(subcolumn)
|
||||
return
|
||||
}
|
||||
|
||||
if (isMapLike(schema)) {
|
||||
const mapName = schema.children[0].element.name
|
||||
|
||||
// Assemble keys and values
|
||||
assembleNested(subcolumnData, schema.children[0].children[0], nextDepth + 1)
|
||||
assembleNested(subcolumnData, schema.children[0].children[1], nextDepth + 1)
|
||||
|
||||
const keys = subcolumnData.get(`${path}.${mapName}.key`)
|
||||
const values = subcolumnData.get(`${path}.${mapName}.value`)
|
||||
|
||||
if (!keys) throw new Error('parquet map-like column missing keys')
|
||||
if (!values) throw new Error('parquet map-like column missing values')
|
||||
if (keys.length !== values.length) {
|
||||
throw new Error('parquet map-like column key/value length mismatch')
|
||||
}
|
||||
|
||||
const out = assembleMaps(keys, values, nextDepth)
|
||||
if (optional) flattenAtDepth(out, depth)
|
||||
|
||||
subcolumnData.delete(`${path}.${mapName}.key`)
|
||||
subcolumnData.delete(`${path}.${mapName}.value`)
|
||||
subcolumnData.set(path, out)
|
||||
return
|
||||
}
|
||||
|
||||
// Struct-like column
|
||||
if (schema.children.length) {
|
||||
// construct a meta struct and then invert
|
||||
/** @type {Record<string, any>} */
|
||||
const struct = {}
|
||||
for (const child of schema.children) {
|
||||
assembleNested(subcolumnData, child, nextDepth)
|
||||
const childData = subcolumnData.get(child.path.join('.'))
|
||||
if (!childData) throw new Error('parquet struct-like column missing child data')
|
||||
if (child.element.repetition_type === 'OPTIONAL') {
|
||||
flattenAtDepth(childData, depth)
|
||||
}
|
||||
struct[child.element.name] = childData
|
||||
}
|
||||
// remove children
|
||||
for (const child of schema.children) {
|
||||
subcolumnData.delete(child.path.join('.'))
|
||||
}
|
||||
// invert struct by depth
|
||||
subcolumnData.set(path, invertStruct(struct, depth))
|
||||
return
|
||||
}
|
||||
// assert(schema.element.repetition_type !== 'REPEATED')
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {any[]} arr
|
||||
* @param {number} depth
|
||||
*/
|
||||
function flattenAtDepth(arr, depth) {
|
||||
for (let i = 0; i < arr.length; i++) {
|
||||
if (depth) {
|
||||
flattenAtDepth(arr[i], depth - 1)
|
||||
} else {
|
||||
arr[i] = arr[i][0]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {any[]} keys
|
||||
* @param {any[]} values
|
||||
* @param {number} depth
|
||||
* @returns {any[]}
|
||||
*/
|
||||
function assembleMaps(keys, values, depth) {
|
||||
const out = []
|
||||
for (let i = 0; i < keys.length; i++) {
|
||||
if (depth) {
|
||||
out.push(assembleMaps(keys[i], values[i], depth - 1)) // go deeper
|
||||
} else {
|
||||
if (keys[i]) {
|
||||
/** @type {Record<string, any>} */
|
||||
const obj = {}
|
||||
for (let j = 0; j < keys[i].length; j++) {
|
||||
const value = values[i][j]
|
||||
obj[keys[i][j]] = value === undefined ? null : value
|
||||
}
|
||||
out.push(obj)
|
||||
} else {
|
||||
out.push(undefined)
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
/**
|
||||
* Invert a struct-like object by depth.
|
||||
*
|
||||
* @param {Record<string, any[]>} struct
|
||||
* @param {number} depth
|
||||
* @returns {any[]}
|
||||
*/
|
||||
function invertStruct(struct, depth) {
|
||||
const keys = Object.keys(struct)
|
||||
const length = struct[keys[0]]?.length
|
||||
const out = []
|
||||
for (let i = 0; i < length; i++) {
|
||||
/** @type {Record<string, any>} */
|
||||
const obj = {}
|
||||
for (const key of keys) {
|
||||
obj[key] = struct[key][i]
|
||||
}
|
||||
if (depth) {
|
||||
out.push(invertStruct(obj, depth - 1)) // deeper
|
||||
} else {
|
||||
out.push(obj)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@ import { convert } from './convert.js'
|
||||
import { readDataPage, readDictionaryPage } from './datapage.js'
|
||||
import { readDataPageV2 } from './datapageV2.js'
|
||||
import { parquetHeader } from './header.js'
|
||||
import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired } from './schema.js'
|
||||
import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js'
|
||||
import { snappyUncompress } from './snappy.js'
|
||||
import { concat } from './utils.js'
|
||||
|
||||
@ -39,7 +39,7 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
// parse column header
|
||||
const header = parquetHeader(reader)
|
||||
if (header.compressed_page_size === undefined) {
|
||||
throw new Error(`parquet compressed page size is undefined in column '${columnMetadata.path_in_schema}'`)
|
||||
throw new Error('parquet compressed page size is undefined')
|
||||
}
|
||||
|
||||
// read compressed_page_size bytes starting at offset
|
||||
@ -68,9 +68,9 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
// Use repetition levels to construct lists
|
||||
const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath)
|
||||
const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath)
|
||||
const isNullable = columnMetadata && !isRequired(schemaPath.slice(0, 2))
|
||||
const repetitionPath = schemaPath.map(({ element }) => element.repetition_type)
|
||||
values = assembleLists(
|
||||
definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel
|
||||
definitionLevels, repetitionLevels, values, repetitionPath, maxDefinitionLevel, maxRepetitionLevel
|
||||
)
|
||||
} else {
|
||||
// wrap nested flat data by depth
|
||||
@ -95,11 +95,11 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
values = convert(dataPage, element)
|
||||
if (repetitionLevels.length || definitionLevels?.length) {
|
||||
// Use repetition levels to construct lists
|
||||
const isNullable = columnMetadata && !isRequired(schemaPath.slice(0, 2))
|
||||
const maxDefinitionLevel = getMaxDefinitionLevel(schemaPath)
|
||||
const maxRepetitionLevel = getMaxRepetitionLevel(schemaPath)
|
||||
const repetitionPath = schemaPath.map(({ element }) => element.repetition_type)
|
||||
values = assembleLists(
|
||||
definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel
|
||||
definitionLevels, repetitionLevels, values, repetitionPath, maxDefinitionLevel, maxRepetitionLevel
|
||||
)
|
||||
}
|
||||
concat(rowData, values)
|
||||
|
||||
82
src/read.js
82
src/read.js
@ -1,7 +1,8 @@
|
||||
|
||||
import { assembleNested } from './assemble.js'
|
||||
import { getColumnOffset, readColumn } from './column.js'
|
||||
import { parquetMetadataAsync } from './metadata.js'
|
||||
import { getSchemaPath, isMapLike } from './schema.js'
|
||||
import { getSchemaPath } from './schema.js'
|
||||
import { concat } from './utils.js'
|
||||
|
||||
/**
|
||||
@ -111,7 +112,10 @@ async function readRowGroup(options, rowGroup, groupStart) {
|
||||
/** @type {any[][]} */
|
||||
const groupColumnData = []
|
||||
const promises = []
|
||||
const maps = new Map()
|
||||
// Top-level columns to assemble
|
||||
const { children } = getSchemaPath(metadata.schema, [])[0]
|
||||
const subcolumnNames = new Map(children.map(child => [child.element.name, getSubcolumns(child)]))
|
||||
const subcolumnData = new Map() // columns to assemble as maps
|
||||
// read column data
|
||||
for (let columnIndex = 0; columnIndex < rowGroup.columns.length; columnIndex++) {
|
||||
const columnMetadata = rowGroup.columns[columnIndex].meta_data
|
||||
@ -152,48 +156,21 @@ async function readRowGroup(options, rowGroup, groupStart) {
|
||||
let columnData = readColumn(
|
||||
arrayBuffer, bufferOffset, rowGroup, columnMetadata, schemaPath, compressors
|
||||
)
|
||||
if (columnData.length !== Number(rowGroup.num_rows)) {
|
||||
throw new Error(`parquet column length ${columnData.length} does not match row group length ${rowGroup.num_rows}`)
|
||||
}
|
||||
// assert(columnData.length === Number(rowGroup.num_rows)
|
||||
|
||||
if (isMapLike(schemaPath[schemaPath.length - 3])) {
|
||||
const name = columnMetadata.path_in_schema.slice(0, -2).join('.')
|
||||
if (!maps.has(name)) {
|
||||
maps.set(name, columnData)
|
||||
columnData = undefined // do not emit column data until both key and value are read
|
||||
} else {
|
||||
if (columnMetadata.path_in_schema[0] === 'key') {
|
||||
throw new Error('parquet map-like column key is not first') // TODO: support value-first
|
||||
} else {
|
||||
const values = columnData
|
||||
const keys = maps.get(name)
|
||||
const out = []
|
||||
if (keys.length !== values.length) {
|
||||
throw new Error('parquet map-like column key/value length mismatch')
|
||||
}
|
||||
// assemble map-like column data
|
||||
for (let i = 0; i < keys.length; i++) {
|
||||
// keys will be empty for {} and undefined for null
|
||||
if (keys[i]) {
|
||||
/** @type {Record<string, any>} */
|
||||
const obj = {}
|
||||
for (let j = 0; j < keys[i].length; j++) {
|
||||
if (Array.isArray(keys[i][j])) {
|
||||
// TODO: key should not be an array, this is an assemble bug?
|
||||
keys[i][j] = keys[i][j][0]
|
||||
values[i][j] = values[i][j][0]
|
||||
}
|
||||
if (!keys[i][j]) continue
|
||||
obj[keys[i][j]] = values[i][j] === undefined ? null : values[i][j]
|
||||
}
|
||||
out.push(obj)
|
||||
} else {
|
||||
out.push(undefined)
|
||||
}
|
||||
}
|
||||
columnData = out
|
||||
}
|
||||
maps.delete(name)
|
||||
// TODO: fast path for non-nested columns
|
||||
// Save column data for assembly
|
||||
const subcolumn = columnMetadata.path_in_schema.join('.')
|
||||
subcolumnData.set(subcolumn, columnData)
|
||||
columnData = undefined
|
||||
|
||||
const subcolumns = subcolumnNames.get(columnName)
|
||||
if (subcolumns?.every(name => subcolumnData.has(name))) {
|
||||
// We have all data needed to assemble a top level column
|
||||
assembleNested(subcolumnData, schemaPath[1])
|
||||
columnData = subcolumnData.get(columnName)
|
||||
if (!columnData) {
|
||||
throw new Error(`parquet column data not assembled: ${columnName}`)
|
||||
}
|
||||
}
|
||||
|
||||
@ -217,3 +194,22 @@ async function readRowGroup(options, rowGroup, groupStart) {
|
||||
}
|
||||
return []
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return a list of sub-columns needed to construct a top-level column.
|
||||
*
|
||||
* @param {import('./types.js').SchemaTree} schema
|
||||
* @param {string[]} output
|
||||
* @returns {string[]}
|
||||
*/
|
||||
function getSubcolumns(schema, output = []) {
|
||||
if (schema.children.length) {
|
||||
for (const child of schema.children) {
|
||||
getSubcolumns(child, output)
|
||||
}
|
||||
} else {
|
||||
output.push(schema.path.join('.'))
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
@ -1,11 +1,34 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { assembleLists } from '../src/assemble.js'
|
||||
|
||||
describe('assembleObjects', () => {
|
||||
/** @typedef {import('../src/types.js').FieldRepetitionType | undefined} FieldRepetitionType */
|
||||
|
||||
describe('assembleLists', () => {
|
||||
/** @type {FieldRepetitionType[]} */
|
||||
const nonnullable = [undefined, 'REQUIRED', 'REPEATED', 'REQUIRED']
|
||||
/** @type {FieldRepetitionType[]} */
|
||||
const nullable = [undefined, 'OPTIONAL', 'REPEATED', 'OPTIONAL']
|
||||
/** @type {FieldRepetitionType[]} */
|
||||
const nestedRequired = [undefined, 'REQUIRED', 'REPEATED', 'REQUIRED', 'REPEATED', 'REQUIRED']
|
||||
/** @type {FieldRepetitionType[]} */
|
||||
const nestedOptional = [undefined, 'OPTIONAL', 'REPEATED', 'OPTIONAL', 'REPEATED', 'OPTIONAL']
|
||||
|
||||
it('should not change flat objects', () => {
|
||||
const values = ['a', 'b']
|
||||
const result = assembleLists([], [], values, [undefined, 'REQUIRED'], 0, 0)
|
||||
expect(result).toEqual(['a', 'b'])
|
||||
})
|
||||
|
||||
it('should not change nested required objects', () => {
|
||||
const values = ['a', 'b']
|
||||
const result = assembleLists([], [], values, [undefined, 'REQUIRED', 'REQUIRED'], 0, 0)
|
||||
expect(result).toEqual(['a', 'b'])
|
||||
})
|
||||
|
||||
it('should assemble objects with non-null values', () => {
|
||||
const repetitionLevels = [0, 1]
|
||||
const values = ['a', 'b']
|
||||
const result = assembleLists([], repetitionLevels, values, false, 1, 1)
|
||||
const result = assembleLists([], repetitionLevels, values, nonnullable, 1, 1)
|
||||
expect(result).toEqual([['a', 'b']])
|
||||
})
|
||||
|
||||
@ -13,26 +36,26 @@ describe('assembleObjects', () => {
|
||||
const definitionLevels = [3, 0, 3]
|
||||
const repetitionLevels = [0, 1, 1]
|
||||
const values = ['a', 'c']
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, true, 3, 1)
|
||||
expect(result).toEqual([['a', undefined, 'c']])
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, nullable, 3, 1)
|
||||
expect(result).toEqual([[['a', null, 'c']]])
|
||||
})
|
||||
|
||||
it('should handle empty lists', () => {
|
||||
expect(assembleLists([], [], [], false, 0, 0)).toEqual([])
|
||||
expect(assembleLists([], [], [], false, 1, 0)).toEqual([[]])
|
||||
expect(assembleLists([], [], [], nonnullable, 0, 0)).toEqual([])
|
||||
expect(assembleLists([], [], [], nonnullable, 1, 0)).toEqual([[]])
|
||||
})
|
||||
|
||||
it('should handle multiple lists', () => {
|
||||
const repetitionLevels = [0, 0]
|
||||
const values = [22, 33]
|
||||
const result = assembleLists([], repetitionLevels, values, false, 1, 1)
|
||||
const result = assembleLists([], repetitionLevels, values, nonnullable, 1, 1)
|
||||
expect(result).toEqual([[22], [33]])
|
||||
})
|
||||
|
||||
it('should handle multiple lists (6)', () => {
|
||||
const repetitionLevels = [0, 1, 1, 0, 1, 1]
|
||||
const values = [1, 2, 3, 4, 5, 6]
|
||||
const result = assembleLists([], repetitionLevels, values, false, 1, 1)
|
||||
const result = assembleLists([], repetitionLevels, values, nonnullable, 1, 1)
|
||||
expect(result).toEqual([[1, 2, 3], [4, 5, 6]])
|
||||
})
|
||||
|
||||
@ -40,15 +63,15 @@ describe('assembleObjects', () => {
|
||||
const definitionLevels = [3, 3, 0, 3, 3]
|
||||
const repetitionLevels = [0, 1, 0, 0, 1]
|
||||
const values = ['a', 'b', 'd', 'e']
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, true, 3, 1)
|
||||
expect(result).toEqual([['a', 'b'], undefined, ['d', 'e']])
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, nullable, 3, 1)
|
||||
expect(result).toEqual([[['a', 'b']], [], [['d', 'e']]])
|
||||
})
|
||||
|
||||
// it('should handle continuing a row from the previous page', () => {
|
||||
// const definitionLevels = [3, 3, 3, 1]
|
||||
// const repetitionLevels = [1, 0, 1, 0]
|
||||
// const values = ['a', 'b', 'c', 'd']
|
||||
// const result = assembleObjects(definitionLevels, repetitionLevels, values, false, 3, 1)
|
||||
// const result = assembleObjects(definitionLevels, repetitionLevels, values, nullable, 3, 1)
|
||||
// expect(result).toEqual([['b', 'c'], [undefined]])
|
||||
// })
|
||||
|
||||
@ -56,7 +79,7 @@ describe('assembleObjects', () => {
|
||||
// from nullable.impala.parquet
|
||||
const repetitionLevels = [0, 2, 1, 2]
|
||||
const values = [1, 2, 3, 4]
|
||||
const result = assembleLists([], repetitionLevels, values, false, 2, 2)
|
||||
const result = assembleLists([], repetitionLevels, values, nestedRequired, 2, 2)
|
||||
expect(result).toEqual([[[1, 2], [3, 4]]])
|
||||
})
|
||||
|
||||
@ -65,26 +88,28 @@ describe('assembleObjects', () => {
|
||||
const definitionLevels = [2, 2, 2, 2, 1, 1, 1, 0, 2, 2]
|
||||
const repetitionLevels = [0, 1, 0, 1, 0, 0, 0, 0, 0, 1]
|
||||
const values = ['k1', 'k2', 'k1', 'k2', 'k1', 'k3']
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, true, 2, 1)
|
||||
/** @type {FieldRepetitionType[]} */
|
||||
const repetitionPath = ['REQUIRED', 'OPTIONAL', 'REPEATED', 'REQUIRED'] // map key required
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, repetitionPath, 2, 1)
|
||||
expect(result).toEqual([
|
||||
['k1', 'k2'],
|
||||
['k1', 'k2'],
|
||||
[['k1', 'k2']],
|
||||
[['k1', 'k2']],
|
||||
[[]],
|
||||
[[]],
|
||||
[[]],
|
||||
[],
|
||||
[],
|
||||
[],
|
||||
undefined,
|
||||
['k1', 'k3'],
|
||||
[['k1', 'k3']],
|
||||
])
|
||||
})
|
||||
|
||||
it('should handle empty lists with definition level', () => {
|
||||
// from nonnullable.impala.parquet
|
||||
expect(assembleLists([0], [0], [], false, 1, 2)).toEqual([[[]]])
|
||||
expect(assembleLists([0], [0], [], nonnullable, 1, 2)).toEqual([[]])
|
||||
})
|
||||
|
||||
it('should handle nonnullable lists', () => {
|
||||
// from nonnullable.impala.parquet
|
||||
expect(assembleLists([1], [0], [-1], false, 1, 2)).toEqual([[[-1]]])
|
||||
expect(assembleLists([1], [0], [-1], nonnullable, 1, 2)).toEqual([[-1]])
|
||||
})
|
||||
|
||||
it('should handle nullable int_array', () => {
|
||||
@ -93,13 +118,13 @@ describe('assembleObjects', () => {
|
||||
const definitionLevels = [3, 3, 3, 2, 3, 3, 2, 3, 2, 1, 0, 0]
|
||||
const repetitionLevels = [0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
|
||||
const values = [1, 2, 3, 1, 2, 3]
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, true, 3, 1)
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, nullable, 3, 1)
|
||||
expect(result).toEqual([
|
||||
[1, 2, 3],
|
||||
[undefined, 1, 2, undefined, 3, undefined],
|
||||
[[1, 2, 3]],
|
||||
[[null, 1, 2, null, 3, null]],
|
||||
[[]],
|
||||
[],
|
||||
[],
|
||||
undefined,
|
||||
undefined,
|
||||
])
|
||||
})
|
||||
|
||||
@ -109,15 +134,15 @@ describe('assembleObjects', () => {
|
||||
const definitionLevels = [5, 5, 5, 5, 4, 5, 5, 4, 5, 4, 5, 3, 2, 2, 1, 0, 0, 2, 5, 5]
|
||||
const repetitionLevels = [0, 2, 1, 2, 0, 2, 2, 2, 1, 2, 2, 1, 1, 0, 0, 0, 0, 0, 1, 2]
|
||||
const values = [1, 2, 3, 4, 1, 2, 3, 4, 5, 6]
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, true, 5, 2)
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, nestedOptional, 5, 2)
|
||||
expect(result).toEqual([
|
||||
[[1, 2], [3, 4]],
|
||||
[[undefined, 1, 2, undefined], [3, undefined, 4], [], undefined],
|
||||
[undefined],
|
||||
[[[[1, 2]], [[3, 4]]]],
|
||||
[[[[null, 1, 2, null]], [[3, null, 4]], [[]], []]],
|
||||
[[[]]],
|
||||
[[]],
|
||||
[],
|
||||
undefined,
|
||||
undefined,
|
||||
[undefined, [5, 6]],
|
||||
[],
|
||||
[[[], [[5, 6]]]],
|
||||
])
|
||||
})
|
||||
|
||||
@ -125,16 +150,16 @@ describe('assembleObjects', () => {
|
||||
const definitionLevels = [3, 4, 3, 3]
|
||||
const repetitionLevels = [0, 1, 1, 1]
|
||||
const values = ['k1']
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, true, 4, 2)
|
||||
expect(result).toEqual([[[], ['k1'], [], []]])
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, nullable, 4, 2)
|
||||
expect(result).toEqual([[[null, 'k1', null, null]]])
|
||||
})
|
||||
|
||||
it('should handle nonnullable int_map_array values', () => {
|
||||
const definitionLevels = [3, 5, 3, 3]
|
||||
const repetitionLevels = [0, 1, 1, 1]
|
||||
const values = ['v1']
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, true, 5, 2)
|
||||
expect(result).toEqual([[[], ['v1'], [], []]])
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, nullable, 5, 2)
|
||||
expect(result).toEqual([[[null, 'v1', null, null]]])
|
||||
})
|
||||
|
||||
it('should handle mixed optional and required', () => {
|
||||
@ -142,15 +167,26 @@ describe('assembleObjects', () => {
|
||||
const definitionLevels = [2, 2, 2, 0, 0, 2, 2, 2, 2, 2]
|
||||
const repetitionLevels = [0, 1, 1, 0, 0, 0, 1, 1, 0, 1]
|
||||
const values = [1, 2, 3, 1, 2, 3, 1, 2]
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, true, 2, 1)
|
||||
expect(result).toEqual([[1, 2, 3], undefined, undefined, [1, 2, 3], [1, 2]])
|
||||
/** @type {FieldRepetitionType[]} */
|
||||
const repetitionPath = [undefined, 'OPTIONAL', 'REPEATED', 'REQUIRED']
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, values, repetitionPath, 2, 1)
|
||||
expect(result).toEqual([[[1, 2, 3]], [], [], [[1, 2, 3]], [[1, 2]]])
|
||||
})
|
||||
|
||||
it('should handle nested required', () => {
|
||||
// from nonnullable.impala.parquet nested_Struct i
|
||||
const definitionLevels = [0]
|
||||
const repetitionLevels = [0]
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, [], false, 2, 2)
|
||||
expect(result).toEqual([[[]]])
|
||||
/** @type {FieldRepetitionType[]} */
|
||||
const repetitionPath = [undefined, 'REQUIRED', 'REQUIRED', 'REPEATED', 'REQUIRED', 'REQUIRED', 'REPEATED', 'REQUIRED']
|
||||
const result = assembleLists(definitionLevels, repetitionLevels, [], repetitionPath, 2, 2)
|
||||
expect(result).toEqual([[]])
|
||||
})
|
||||
|
||||
it('should handle dzenilee', () => {
|
||||
const repetitionLevels = [0, 1, 1, 0, 1, 1]
|
||||
const values = ['a', 'b', 'c', 'd', 'e', 'f']
|
||||
const result = assembleLists([], repetitionLevels, values, nullable, 3, 1)
|
||||
expect(result).toEqual([[['a', 'b', 'c']], [['d', 'e', 'f']]])
|
||||
})
|
||||
})
|
||||
|
||||
@ -4,11 +4,17 @@
|
||||
[-1],
|
||||
[[-1, -2], []],
|
||||
{ "k1": -1 },
|
||||
{ "k1": 1 },
|
||||
-1,
|
||||
[-1],
|
||||
[[-1]],
|
||||
[["nonnullable"]],
|
||||
[[]]
|
||||
[{}, { "k1": 1 }, {}, {}],
|
||||
{
|
||||
"a": -1,
|
||||
"B": [-1],
|
||||
"c": {
|
||||
"D": [[{
|
||||
"e": -1,
|
||||
"f": "nonnullable"
|
||||
}]]
|
||||
},
|
||||
"G": {}
|
||||
}
|
||||
]
|
||||
]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user