hyparquet/src/assemble.js

83 lines
2.6 KiB
JavaScript
Raw Normal View History

2024-03-18 23:36:16 +00:00
/**
* Dremel-assembly of arrays of values into lists
*
2024-03-21 00:24:25 +00:00
* Reconstructs a complex nested structure from flat arrays of definition and repetition levels,
2024-04-29 02:03:39 +00:00
* according to Dremel encoding.
2024-03-21 00:24:25 +00:00
*
2024-04-29 02:03:39 +00:00
* @param {number[] | undefined} definitionLevels definition levels
* @param {number[]} repetitionLevels repetition levels
2024-03-19 06:54:58 +00:00
* @param {ArrayLike<any>} values values to process
2024-04-28 22:58:25 +00:00
* @param {boolean} isNullable can entries be null?
2024-03-18 23:36:16 +00:00
* @param {number} maxDefinitionLevel definition level that corresponds to non-null
2024-03-21 00:24:25 +00:00
* @param {number} maxRepetitionLevel repetition level that corresponds to a new row
2024-03-18 23:36:16 +00:00
* @returns {any[]} array of values
*/
export function assembleObjects(
2024-04-28 22:58:25 +00:00
definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel
2024-03-18 23:36:16 +00:00
) {
2024-03-19 06:54:58 +00:00
let valueIndex = 0
2024-03-18 23:36:16 +00:00
/** @type {any[]} */
2024-03-19 06:54:58 +00:00
const output = []
2024-03-21 00:24:25 +00:00
let currentContainer = output
2024-03-18 23:36:16 +00:00
2024-03-21 00:24:25 +00:00
// Trackers for nested structures.
const containerStack = [output]
2024-03-18 23:36:16 +00:00
2024-03-21 00:24:25 +00:00
for (let i = 0; i < repetitionLevels.length; i++) {
const def = definitionLevels?.length ? definitionLevels[i] : maxDefinitionLevel
const rep = repetitionLevels[i]
if (rep !== maxRepetitionLevel) {
// Move back to the parent container
while (rep < containerStack.length - 1) {
containerStack.pop()
}
// Construct new lists up to max repetition level
// @ts-expect-error won't be empty
currentContainer = containerStack.at(-1)
2024-04-29 02:03:39 +00:00
}
// Add lists up to definition level
const targetDepth = isNullable ? (def + 1) / 2 : maxRepetitionLevel + 1
for (let j = containerStack.length; j < targetDepth; j++) {
/** @type {any[]} */
const newList = []
currentContainer.push(newList)
currentContainer = newList
containerStack.push(newList)
2024-03-18 23:36:16 +00:00
}
2024-03-21 00:24:25 +00:00
// Add value or null based on definition level
2024-03-18 23:36:16 +00:00
if (def === maxDefinitionLevel) {
2024-03-21 00:24:25 +00:00
currentContainer.push(values[valueIndex++])
2024-04-28 22:58:25 +00:00
} else if (isNullable) {
2024-04-29 02:03:39 +00:00
// TODO: actually depends on level required or not
if (def % 2 === 0) {
2024-03-21 00:24:25 +00:00
currentContainer.push(undefined)
2024-04-29 02:03:39 +00:00
} else {
currentContainer.push([])
2024-03-21 00:24:25 +00:00
}
2024-03-18 23:36:16 +00:00
}
}
2024-03-21 00:24:25 +00:00
// Handle edge cases for empty inputs or single-level data
if (output.length === 0) {
if (values.length > 0 && maxRepetitionLevel === 0) {
// All values belong to the same (root) list
return [values]
}
// return max definition level of nested lists
/** @type {any[]} */
for (let i = 0; i < maxDefinitionLevel; i++) {
/** @type {any[]} */
const newList = []
currentContainer.push(newList)
currentContainer = newList
}
2024-03-18 23:36:16 +00:00
}
2024-03-19 06:54:58 +00:00
return output
2024-03-18 23:36:16 +00:00
}
// TODO: depends on prior def level