2024-03-18 23:36:16 +00:00
|
|
|
/**
|
|
|
|
|
* Dremel-assembly of arrays of values into lists
|
|
|
|
|
*
|
2024-03-21 00:24:25 +00:00
|
|
|
* Reconstructs a complex nested structure from flat arrays of definition and repetition levels,
|
|
|
|
|
* according to Dremel encoding. This simplified version focuses on arrays and scalar values,
|
|
|
|
|
* with optional support for null values.
|
|
|
|
|
*
|
2024-03-18 23:36:16 +00:00
|
|
|
* @param {number[] | undefined} definitionLevels definition levels, max 3
|
|
|
|
|
* @param {number[]} repetitionLevels repetition levels, max 1
|
2024-03-19 06:54:58 +00:00
|
|
|
* @param {ArrayLike<any>} values values to process
|
2024-03-18 23:36:16 +00:00
|
|
|
* @param {boolean} isNull can an entry be null?
|
|
|
|
|
* @param {number} maxDefinitionLevel definition level that corresponds to non-null
|
2024-03-21 00:24:25 +00:00
|
|
|
* @param {number} maxRepetitionLevel repetition level that corresponds to a new row
|
2024-03-18 23:36:16 +00:00
|
|
|
* @returns {any[]} array of values
|
|
|
|
|
*/
|
|
|
|
|
export function assembleObjects(
|
2024-03-21 00:24:25 +00:00
|
|
|
definitionLevels, repetitionLevels, values, isNull, maxDefinitionLevel, maxRepetitionLevel
|
2024-03-18 23:36:16 +00:00
|
|
|
) {
|
2024-03-19 06:54:58 +00:00
|
|
|
let valueIndex = 0
|
2024-03-18 23:36:16 +00:00
|
|
|
/** @type {any[]} */
|
2024-03-19 06:54:58 +00:00
|
|
|
const output = []
|
2024-03-21 00:24:25 +00:00
|
|
|
let currentContainer = output
|
2024-03-18 23:36:16 +00:00
|
|
|
|
2024-03-21 00:24:25 +00:00
|
|
|
// Trackers for nested structures.
|
|
|
|
|
const containerStack = [output]
|
2024-03-18 23:36:16 +00:00
|
|
|
|
2024-03-21 00:24:25 +00:00
|
|
|
for (let i = 0; i < repetitionLevels.length; i++) {
|
|
|
|
|
const def = definitionLevels?.length ? definitionLevels[i] : maxDefinitionLevel
|
|
|
|
|
const rep = repetitionLevels[i]
|
|
|
|
|
|
|
|
|
|
if (rep !== maxRepetitionLevel) {
|
|
|
|
|
// Move back to the parent container
|
|
|
|
|
while (rep < containerStack.length - 1) {
|
|
|
|
|
containerStack.pop()
|
|
|
|
|
}
|
|
|
|
|
// Construct new lists up to max repetition level
|
|
|
|
|
// @ts-expect-error won't be empty
|
|
|
|
|
currentContainer = containerStack.at(-1)
|
|
|
|
|
if (def) {
|
|
|
|
|
for (let j = rep; j < maxRepetitionLevel; j++) {
|
|
|
|
|
/** @type {any[]} */
|
|
|
|
|
const newList = []
|
|
|
|
|
currentContainer.push(newList)
|
|
|
|
|
currentContainer = newList
|
|
|
|
|
containerStack.push(newList)
|
2024-03-18 23:36:16 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-21 00:24:25 +00:00
|
|
|
// Add value or null based on definition level
|
2024-03-18 23:36:16 +00:00
|
|
|
if (def === maxDefinitionLevel) {
|
2024-03-30 00:28:14 +00:00
|
|
|
if (!currentContainer) {
|
|
|
|
|
throw new Error('parquet assembleObjects: currentContainer is undefined')
|
|
|
|
|
}
|
2024-03-21 00:24:25 +00:00
|
|
|
currentContainer.push(values[valueIndex++])
|
2024-03-30 00:28:14 +00:00
|
|
|
} else if (isNull) {
|
2024-03-21 00:24:25 +00:00
|
|
|
if (def) {
|
2024-03-30 00:28:14 +00:00
|
|
|
// TODO: Go up maxDefinitionLevel - def - 1 levels to add null
|
|
|
|
|
for (let j = def; j < maxDefinitionLevel - 1; j++) {
|
|
|
|
|
containerStack.pop()
|
|
|
|
|
// @ts-expect-error won't be empty
|
|
|
|
|
currentContainer = containerStack.at(-1)
|
|
|
|
|
}
|
|
|
|
|
if (def > 1) {
|
|
|
|
|
currentContainer.push(undefined)
|
|
|
|
|
}
|
2024-03-21 00:24:25 +00:00
|
|
|
} else {
|
|
|
|
|
currentContainer.push(undefined)
|
|
|
|
|
}
|
2024-03-18 23:36:16 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-21 00:24:25 +00:00
|
|
|
// Handle edge cases for empty inputs or single-level data
|
|
|
|
|
if (output.length === 0) {
|
|
|
|
|
if (values.length > 0 && maxRepetitionLevel === 0) {
|
|
|
|
|
// All values belong to the same (root) list
|
|
|
|
|
return [values]
|
|
|
|
|
}
|
|
|
|
|
// return max definition level of nested lists
|
|
|
|
|
/** @type {any[]} */
|
|
|
|
|
for (let i = 0; i < maxDefinitionLevel; i++) {
|
|
|
|
|
/** @type {any[]} */
|
|
|
|
|
const newList = []
|
|
|
|
|
currentContainer.push(newList)
|
|
|
|
|
currentContainer = newList
|
|
|
|
|
}
|
2024-03-18 23:36:16 +00:00
|
|
|
}
|
|
|
|
|
|
2024-03-19 06:54:58 +00:00
|
|
|
return output
|
2024-03-18 23:36:16 +00:00
|
|
|
}
|