hyparquet/src/assemble.js

83 lines
2.7 KiB
JavaScript
Raw Normal View History

2024-03-18 23:36:16 +00:00
/**
* Dremel-assembly of arrays of values into lists
*
2024-03-21 00:24:25 +00:00
* Reconstructs a complex nested structure from flat arrays of definition and repetition levels,
* according to Dremel encoding. This simplified version focuses on arrays and scalar values,
* with optional support for null values.
*
2024-03-18 23:36:16 +00:00
* @param {number[] | undefined} definitionLevels definition levels, max 3
* @param {number[]} repetitionLevels repetition levels, max 1
2024-03-19 06:54:58 +00:00
* @param {ArrayLike<any>} values values to process
2024-03-18 23:36:16 +00:00
* @param {boolean} isNull can an entry be null?
* @param {number} maxDefinitionLevel definition level that corresponds to non-null
2024-03-21 00:24:25 +00:00
* @param {number} maxRepetitionLevel repetition level that corresponds to a new row
2024-03-18 23:36:16 +00:00
* @returns {any[]} array of values
*/
export function assembleObjects(
2024-03-21 00:24:25 +00:00
definitionLevels, repetitionLevels, values, isNull, maxDefinitionLevel, maxRepetitionLevel
2024-03-18 23:36:16 +00:00
) {
2024-03-19 06:54:58 +00:00
let valueIndex = 0
2024-03-18 23:36:16 +00:00
/** @type {any[]} */
2024-03-19 06:54:58 +00:00
const output = []
2024-03-21 00:24:25 +00:00
let currentContainer = output
2024-03-18 23:36:16 +00:00
2024-03-21 00:24:25 +00:00
// Trackers for nested structures.
const containerStack = [output]
2024-03-18 23:36:16 +00:00
2024-03-21 00:24:25 +00:00
for (let i = 0; i < repetitionLevels.length; i++) {
const def = definitionLevels?.length ? definitionLevels[i] : maxDefinitionLevel
const rep = repetitionLevels[i]
if (rep !== maxRepetitionLevel) {
// Move back to the parent container
while (rep < containerStack.length - 1) {
containerStack.pop()
}
// Construct new lists up to max repetition level
// @ts-expect-error won't be empty
currentContainer = containerStack.at(-1)
if (def) {
for (let j = rep; j < maxRepetitionLevel; j++) {
/** @type {any[]} */
const newList = []
currentContainer.push(newList)
currentContainer = newList
containerStack.push(newList)
2024-03-18 23:36:16 +00:00
}
}
}
2024-03-21 00:24:25 +00:00
// Add value or null based on definition level
2024-03-18 23:36:16 +00:00
if (def === maxDefinitionLevel) {
2024-03-21 00:24:25 +00:00
currentContainer.push(values[valueIndex++])
} else if (isNull && def < maxDefinitionLevel) {
// Go up one level to add null
if (def) {
containerStack.pop()
// @ts-expect-error won't be empty
currentContainer = containerStack.at(-1)
} else {
currentContainer.push(undefined)
}
2024-03-18 23:36:16 +00:00
}
}
2024-03-21 00:24:25 +00:00
// Handle edge cases for empty inputs or single-level data
if (output.length === 0) {
if (values.length > 0 && maxRepetitionLevel === 0) {
// All values belong to the same (root) list
return [values]
}
// return max definition level of nested lists
/** @type {any[]} */
for (let i = 0; i < maxDefinitionLevel; i++) {
/** @type {any[]} */
const newList = []
currentContainer.push(newList)
currentContainer = newList
}
2024-03-18 23:36:16 +00:00
}
2024-03-19 06:54:58 +00:00
return output
2024-03-18 23:36:16 +00:00
}