2024-01-07 04:27:18 +00:00
|
|
|
/**
|
|
|
|
|
* Build a tree from the schema elements.
|
|
|
|
|
*
|
2024-05-06 20:18:27 +00:00
|
|
|
* @typedef {import('./types.js').SchemaElement} SchemaElement
|
|
|
|
|
* @typedef {import('./types.js').SchemaTree} SchemaTree
|
2024-01-07 04:27:18 +00:00
|
|
|
* @param {SchemaElement[]} schema
|
2024-01-20 02:51:16 +00:00
|
|
|
* @param {number} rootIndex index of the root element
|
2024-05-06 20:18:27 +00:00
|
|
|
* @param {string[]} path path to the element
|
2024-01-07 04:27:18 +00:00
|
|
|
* @returns {SchemaTree} tree of schema elements
|
|
|
|
|
*/
|
2024-05-06 20:18:27 +00:00
|
|
|
function schemaTree(schema, rootIndex, path) {
|
|
|
|
|
const element = schema[rootIndex]
|
2024-01-07 04:27:18 +00:00
|
|
|
const children = []
|
2024-01-20 02:51:16 +00:00
|
|
|
let count = 1
|
2024-01-07 04:27:18 +00:00
|
|
|
|
|
|
|
|
// Read the specified number of children
|
2024-05-06 20:18:27 +00:00
|
|
|
if (element.num_children) {
|
|
|
|
|
while (children.length < element.num_children) {
|
|
|
|
|
const childElement = schema[rootIndex + count]
|
|
|
|
|
const child = schemaTree(schema, rootIndex + count, [...path, childElement.name])
|
2024-01-20 02:51:16 +00:00
|
|
|
count += child.count
|
2024-01-07 04:27:18 +00:00
|
|
|
children.push(child)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-06 20:18:27 +00:00
|
|
|
return { count, element, children, path }
|
2024-01-07 04:27:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2024-04-30 01:45:29 +00:00
|
|
|
* Get schema elements from the root to the given element name.
|
2024-01-07 04:27:18 +00:00
|
|
|
*
|
|
|
|
|
* @param {SchemaElement[]} schema
|
|
|
|
|
* @param {string[]} name path to the element
|
2024-04-30 01:45:29 +00:00
|
|
|
* @returns {SchemaTree[]} list of schema elements
|
2024-01-07 04:27:18 +00:00
|
|
|
*/
|
2024-04-30 00:38:26 +00:00
|
|
|
export function getSchemaPath(schema, name) {
|
2024-05-06 20:18:27 +00:00
|
|
|
let tree = schemaTree(schema, 0, [])
|
2024-04-30 00:38:26 +00:00
|
|
|
const path = [tree]
|
|
|
|
|
for (const part of name) {
|
|
|
|
|
const child = tree.children.find(child => child.element.name === part)
|
|
|
|
|
if (!child) throw new Error(`parquet schema element not found: ${name}`)
|
|
|
|
|
path.push(child)
|
|
|
|
|
tree = child
|
|
|
|
|
}
|
|
|
|
|
return path
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2024-04-30 01:45:29 +00:00
|
|
|
* Check if the schema path and all its ancestors are required.
|
2024-04-30 00:38:26 +00:00
|
|
|
*
|
|
|
|
|
* @param {SchemaTree[]} schemaPath
|
|
|
|
|
* @returns {boolean} true if the element is required
|
|
|
|
|
*/
|
|
|
|
|
export function isRequired(schemaPath) {
|
|
|
|
|
for (const { element } of schemaPath.slice(1)) {
|
|
|
|
|
if (element.repetition_type !== 'REQUIRED') {
|
2024-02-17 00:07:09 +00:00
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true
|
2024-01-07 04:27:18 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the max repetition level for a given schema path.
|
|
|
|
|
*
|
2024-04-30 00:38:26 +00:00
|
|
|
* @param {SchemaTree[]} schemaPath
|
2024-01-07 04:27:18 +00:00
|
|
|
* @returns {number} max repetition level
|
|
|
|
|
*/
|
2024-04-30 00:38:26 +00:00
|
|
|
export function getMaxRepetitionLevel(schemaPath) {
|
2024-01-07 04:27:18 +00:00
|
|
|
let maxLevel = 0
|
2024-04-30 01:45:29 +00:00
|
|
|
for (const { element } of schemaPath) {
|
2024-02-11 22:33:56 +00:00
|
|
|
if (element.repetition_type === 'REPEATED') {
|
2024-04-17 07:48:33 +00:00
|
|
|
maxLevel++
|
2024-01-07 04:27:18 +00:00
|
|
|
}
|
2024-04-30 00:38:26 +00:00
|
|
|
}
|
2024-01-07 04:27:18 +00:00
|
|
|
return maxLevel
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the max definition level for a given schema path.
|
|
|
|
|
*
|
2024-04-30 00:38:26 +00:00
|
|
|
* @param {SchemaTree[]} schemaPath
|
2024-01-07 04:27:18 +00:00
|
|
|
* @returns {number} max definition level
|
|
|
|
|
*/
|
2024-04-30 00:38:26 +00:00
|
|
|
export function getMaxDefinitionLevel(schemaPath) {
|
2024-01-07 04:27:18 +00:00
|
|
|
let maxLevel = 0
|
2024-04-30 00:38:26 +00:00
|
|
|
for (const { element } of schemaPath.slice(1)) {
|
2024-02-11 22:33:56 +00:00
|
|
|
if (element.repetition_type !== 'REQUIRED') {
|
2024-04-17 07:48:33 +00:00
|
|
|
maxLevel++
|
2024-01-07 04:27:18 +00:00
|
|
|
}
|
2024-04-30 00:38:26 +00:00
|
|
|
}
|
2024-01-07 04:27:18 +00:00
|
|
|
return maxLevel
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-14 23:39:03 +00:00
|
|
|
/**
|
|
|
|
|
* Check if a column is list-like.
|
|
|
|
|
*
|
2024-04-30 01:45:29 +00:00
|
|
|
* @param {SchemaTree[]} schemaPath
|
2024-03-14 23:39:03 +00:00
|
|
|
* @returns {boolean} true if map-like
|
|
|
|
|
*/
|
2024-04-30 01:45:29 +00:00
|
|
|
export function isListLike(schemaPath) {
|
|
|
|
|
const schema = schemaPath.at(-3)
|
|
|
|
|
if (!schema) return false
|
2024-03-14 23:39:03 +00:00
|
|
|
if (schema.element.converted_type !== 'LIST') return false
|
|
|
|
|
if (schema.children.length > 1) return false
|
|
|
|
|
|
|
|
|
|
const firstChild = schema.children[0]
|
|
|
|
|
if (firstChild.children.length > 1) return false
|
|
|
|
|
if (firstChild.element.repetition_type !== 'REPEATED') return false
|
|
|
|
|
|
|
|
|
|
const secondChild = firstChild.children[0]
|
|
|
|
|
if (secondChild.element.repetition_type !== 'REQUIRED') return false
|
|
|
|
|
|
|
|
|
|
return true
|
|
|
|
|
}
|
2024-03-12 02:35:57 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Check if a column is map-like.
|
|
|
|
|
*
|
2024-04-30 01:45:29 +00:00
|
|
|
* @param {SchemaTree[]} schemaPath
|
2024-03-12 02:35:57 +00:00
|
|
|
* @returns {boolean} true if map-like
|
|
|
|
|
*/
|
2024-04-30 01:45:29 +00:00
|
|
|
export function isMapLike(schemaPath) {
|
|
|
|
|
const schema = schemaPath.at(-3)
|
|
|
|
|
if (!schema) return false
|
2024-03-12 02:35:57 +00:00
|
|
|
if (schema.element.converted_type !== 'MAP') return false
|
|
|
|
|
if (schema.children.length > 1) return false
|
|
|
|
|
|
|
|
|
|
const firstChild = schema.children[0]
|
|
|
|
|
if (firstChild.children.length !== 2) return false
|
|
|
|
|
if (firstChild.element.repetition_type !== 'REPEATED') return false
|
|
|
|
|
|
|
|
|
|
const keyChild = firstChild.children.find(child => child.element.name === 'key')
|
|
|
|
|
if (keyChild?.element.repetition_type !== 'REQUIRED') return false
|
|
|
|
|
|
|
|
|
|
const valueChild = firstChild.children.find(child => child.element.name === 'value')
|
|
|
|
|
if (valueChild?.element.repetition_type === 'REPEATED') return false
|
|
|
|
|
|
|
|
|
|
return true
|
|
|
|
|
}
|