hyparquet/src/assemble.js

93 lines
3.0 KiB
JavaScript
Raw Normal View History

2024-03-18 23:36:16 +00:00
/**
* Dremel-assembly of arrays of values into lists
*
2024-03-21 00:24:25 +00:00
* Reconstructs a complex nested structure from flat arrays of definition and repetition levels,
* according to Dremel encoding. This simplified version focuses on arrays and scalar values,
* with optional support for null values.
*
2024-03-18 23:36:16 +00:00
* @param {number[] | undefined} definitionLevels definition levels, max 3
* @param {number[]} repetitionLevels repetition levels, max 1
2024-03-19 06:54:58 +00:00
* @param {ArrayLike<any>} values values to process
2024-04-28 22:58:25 +00:00
* @param {boolean} isNullable can entries be null?
2024-03-18 23:36:16 +00:00
* @param {number} maxDefinitionLevel definition level that corresponds to non-null
2024-03-21 00:24:25 +00:00
* @param {number} maxRepetitionLevel repetition level that corresponds to a new row
2024-03-18 23:36:16 +00:00
* @returns {any[]} array of values
*/
export function assembleObjects(
2024-04-28 22:58:25 +00:00
definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel
2024-03-18 23:36:16 +00:00
) {
2024-03-19 06:54:58 +00:00
let valueIndex = 0
2024-03-18 23:36:16 +00:00
/** @type {any[]} */
2024-03-19 06:54:58 +00:00
const output = []
2024-03-21 00:24:25 +00:00
let currentContainer = output
2024-03-18 23:36:16 +00:00
2024-03-21 00:24:25 +00:00
// Trackers for nested structures.
const containerStack = [output]
2024-03-18 23:36:16 +00:00
2024-03-21 00:24:25 +00:00
for (let i = 0; i < repetitionLevels.length; i++) {
const def = definitionLevels?.length ? definitionLevels[i] : maxDefinitionLevel
const rep = repetitionLevels[i]
if (rep !== maxRepetitionLevel) {
// Move back to the parent container
while (rep < containerStack.length - 1) {
containerStack.pop()
}
// Construct new lists up to max repetition level
// @ts-expect-error won't be empty
currentContainer = containerStack.at(-1)
if (def) {
for (let j = rep; j < maxRepetitionLevel; j++) {
/** @type {any[]} */
const newList = []
currentContainer.push(newList)
currentContainer = newList
containerStack.push(newList)
2024-03-18 23:36:16 +00:00
}
}
}
2024-03-21 00:24:25 +00:00
// Add value or null based on definition level
2024-03-18 23:36:16 +00:00
if (def === maxDefinitionLevel) {
2024-03-30 00:28:14 +00:00
if (!currentContainer) {
throw new Error('parquet assembleObjects: currentContainer is undefined')
}
2024-03-21 00:24:25 +00:00
currentContainer.push(values[valueIndex++])
2024-04-28 22:58:25 +00:00
} else if (isNullable) {
2024-03-21 00:24:25 +00:00
if (def) {
2024-03-30 00:28:14 +00:00
// TODO: Go up maxDefinitionLevel - def - 1 levels to add null
for (let j = def; j < maxDefinitionLevel - 1; j++) {
containerStack.pop()
// @ts-expect-error won't be empty
currentContainer = containerStack.at(-1)
}
if (def > 1) {
currentContainer.push(undefined)
}
2024-03-21 00:24:25 +00:00
} else {
currentContainer.push(undefined)
}
2024-03-18 23:36:16 +00:00
}
}
2024-03-21 00:24:25 +00:00
// Handle edge cases for empty inputs or single-level data
if (output.length === 0) {
if (values.length > 0 && maxRepetitionLevel === 0) {
// All values belong to the same (root) list
return [values]
}
// return max definition level of nested lists
/** @type {any[]} */
for (let i = 0; i < maxDefinitionLevel; i++) {
/** @type {any[]} */
const newList = []
currentContainer.push(newList)
currentContainer = newList
}
2024-03-18 23:36:16 +00:00
}
2024-03-19 06:54:58 +00:00
return output
2024-03-18 23:36:16 +00:00
}
// TODO: depends on prior def level