2024-01-14 23:36:58 +00:00
|
|
|
import { describe, expect, it } from 'vitest'
|
|
|
|
|
import {
|
|
|
|
|
getMaxDefinitionLevel,
|
|
|
|
|
getMaxRepetitionLevel,
|
2024-04-30 00:38:26 +00:00
|
|
|
getSchemaPath,
|
2024-04-29 23:47:52 +00:00
|
|
|
isListLike,
|
|
|
|
|
isMapLike,
|
2024-01-14 23:36:58 +00:00
|
|
|
} from '../src/schema.js'
|
|
|
|
|
|
|
|
|
|
describe('Parquet schema utils', () => {
|
2024-02-11 22:33:56 +00:00
|
|
|
/**
|
2024-12-02 16:47:42 +00:00
|
|
|
* @import {SchemaElement} from '../src/types.js'
|
2024-02-11 22:33:56 +00:00
|
|
|
* @type {SchemaElement[]}
|
|
|
|
|
*/
|
2024-01-14 23:36:58 +00:00
|
|
|
const schema = [
|
2024-05-22 09:34:42 +00:00
|
|
|
{ name: 'root', num_children: 7 },
|
|
|
|
|
{ name: 'flat', repetition_type: 'OPTIONAL' },
|
|
|
|
|
{ name: 'listy', repetition_type: 'OPTIONAL', num_children: 1, converted_type: 'LIST' },
|
2024-04-29 23:47:52 +00:00
|
|
|
{ name: 'list', repetition_type: 'REPEATED', num_children: 1 },
|
|
|
|
|
{ name: 'element', repetition_type: 'REQUIRED' },
|
2024-05-22 09:34:42 +00:00
|
|
|
{ name: 'mappy', repetition_type: 'OPTIONAL', num_children: 1, converted_type: 'MAP' },
|
2024-04-29 23:47:52 +00:00
|
|
|
{ name: 'map', repetition_type: 'REPEATED', num_children: 2 },
|
|
|
|
|
{ name: 'key', repetition_type: 'REQUIRED' },
|
|
|
|
|
{ name: 'value', repetition_type: 'OPTIONAL' },
|
2024-05-22 09:34:42 +00:00
|
|
|
{ name: 'invalid_list', repetition_type: 'OPTIONAL', num_children: 2, converted_type: 'LIST' },
|
|
|
|
|
{ name: 'list1', repetition_type: 'REPEATED' },
|
|
|
|
|
{ name: 'list2', repetition_type: 'REPEATED' },
|
|
|
|
|
{ name: 'structy', repetition_type: 'OPTIONAL', num_children: 2, converted_type: 'LIST' },
|
|
|
|
|
{ name: 'element1', repetition_type: 'REQUIRED' },
|
|
|
|
|
{ name: 'element2', repetition_type: 'REQUIRED' },
|
|
|
|
|
{ name: 'list_structy', repetition_type: 'OPTIONAL', num_children: 1, converted_type: 'LIST' },
|
|
|
|
|
{ name: 'list', repetition_type: 'REPEATED', num_children: 2 },
|
|
|
|
|
{ name: 'element1', repetition_type: 'REQUIRED' },
|
|
|
|
|
{ name: 'element2', repetition_type: 'REQUIRED' },
|
|
|
|
|
{ name: 'invalid_list', repetition_type: 'OPTIONAL', num_children: 1, converted_type: 'LIST' },
|
|
|
|
|
{ name: 'list', repetition_type: 'OPTIONAL', num_children: 1 },
|
|
|
|
|
{ name: 'element', repetition_type: 'OPTIONAL' },
|
2024-01-14 23:36:58 +00:00
|
|
|
]
|
|
|
|
|
|
2024-04-30 00:38:26 +00:00
|
|
|
describe('getSchemaPath', () => {
|
2024-05-22 09:34:42 +00:00
|
|
|
it('return the root schema path', () => {
|
|
|
|
|
const root = getSchemaPath(schema, []).at(-1)
|
|
|
|
|
expect(root?.children.length).toEqual(7)
|
|
|
|
|
expect(root).containSubset({
|
|
|
|
|
count: 22,
|
|
|
|
|
element: { name: 'root', num_children: 7 },
|
|
|
|
|
path: [],
|
|
|
|
|
})
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('return the schema path', () => {
|
|
|
|
|
expect(getSchemaPath(schema, ['flat']).at(-1)).toEqual({
|
2024-03-13 02:58:54 +00:00
|
|
|
children: [],
|
|
|
|
|
count: 1,
|
2024-05-22 09:34:42 +00:00
|
|
|
element: { name: 'flat', repetition_type: 'OPTIONAL' },
|
|
|
|
|
path: ['flat'],
|
2024-03-13 02:58:54 +00:00
|
|
|
})
|
2024-01-14 23:36:58 +00:00
|
|
|
})
|
|
|
|
|
|
2024-05-22 09:34:42 +00:00
|
|
|
it('throw an error if element not found', () => {
|
2024-04-30 00:38:26 +00:00
|
|
|
expect(() => getSchemaPath(schema, ['nonexistent']))
|
2024-01-14 23:36:58 +00:00
|
|
|
.toThrow('parquet schema element not found: nonexistent')
|
|
|
|
|
})
|
|
|
|
|
})
|
|
|
|
|
|
2024-04-29 23:47:52 +00:00
|
|
|
it('getMaxRepetitionLevel', () => {
|
2024-05-22 09:34:42 +00:00
|
|
|
expect(getMaxRepetitionLevel(getSchemaPath(schema, ['flat']))).toBe(0)
|
|
|
|
|
expect(getMaxRepetitionLevel(getSchemaPath(schema, ['listy']))).toBe(0)
|
|
|
|
|
expect(getMaxRepetitionLevel(getSchemaPath(schema, ['listy', 'list', 'element']))).toBe(1)
|
|
|
|
|
expect(getMaxRepetitionLevel(getSchemaPath(schema, ['mappy']))).toBe(0)
|
|
|
|
|
expect(getMaxRepetitionLevel(getSchemaPath(schema, ['mappy', 'map', 'key']))).toBe(1)
|
2024-01-14 23:36:58 +00:00
|
|
|
})
|
|
|
|
|
|
2024-04-29 23:47:52 +00:00
|
|
|
it('getMaxDefinitionLevel', () => {
|
2024-05-22 09:34:42 +00:00
|
|
|
expect(getMaxDefinitionLevel(getSchemaPath(schema, ['flat']))).toBe(1)
|
|
|
|
|
expect(getMaxDefinitionLevel(getSchemaPath(schema, ['listy']))).toBe(1)
|
|
|
|
|
expect(getMaxDefinitionLevel(getSchemaPath(schema, ['mappy']))).toBe(1)
|
2024-01-14 23:36:58 +00:00
|
|
|
})
|
|
|
|
|
|
2024-04-29 23:47:52 +00:00
|
|
|
it('isListLike', () => {
|
2024-05-06 20:30:14 +00:00
|
|
|
expect(isListLike(getSchemaPath(schema, [])[1])).toBe(false)
|
2024-05-22 09:34:42 +00:00
|
|
|
expect(isListLike(getSchemaPath(schema, ['flat'])[1])).toBe(false)
|
|
|
|
|
expect(isListLike(getSchemaPath(schema, ['listy'])[1])).toBe(true)
|
|
|
|
|
expect(isListLike(getSchemaPath(schema, ['listy', 'list', 'element'])[1])).toBe(true)
|
|
|
|
|
expect(isListLike(getSchemaPath(schema, ['mappy'])[1])).toBe(false)
|
|
|
|
|
expect(isListLike(getSchemaPath(schema, ['mappy', 'map', 'key'])[1])).toBe(false)
|
|
|
|
|
expect(isListLike(getSchemaPath(schema, ['invalid_list'])[1])).toBe(false)
|
|
|
|
|
expect(isListLike(getSchemaPath(schema, ['invalid_list', 'list1'])[1])).toBe(false)
|
|
|
|
|
expect(isListLike(getSchemaPath(schema, ['invalid_list', 'list2'])[1])).toBe(false)
|
|
|
|
|
expect(isListLike(getSchemaPath(schema, ['structy'])[1])).toBe(false)
|
|
|
|
|
expect(isListLike(getSchemaPath(schema, ['list_structy'])[1])).toBe(false)
|
|
|
|
|
expect(isListLike(getSchemaPath(schema, ['invalid_list'])[1])).toBe(false)
|
2024-04-29 23:47:52 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('isMapLike', () => {
|
2024-05-06 20:30:14 +00:00
|
|
|
expect(isMapLike(getSchemaPath(schema, [])[1])).toBe(false)
|
2024-05-22 09:34:42 +00:00
|
|
|
expect(isMapLike(getSchemaPath(schema, ['flat'])[1])).toBe(false)
|
|
|
|
|
expect(isMapLike(getSchemaPath(schema, ['listy'])[1])).toBe(false)
|
|
|
|
|
expect(isMapLike(getSchemaPath(schema, ['listy', 'list', 'element'])[1])).toBe(false)
|
|
|
|
|
expect(isMapLike(getSchemaPath(schema, ['mappy'])[1])).toBe(true)
|
|
|
|
|
expect(isMapLike(getSchemaPath(schema, ['mappy', 'map', 'key'])[1])).toBe(true)
|
|
|
|
|
expect(isMapLike(getSchemaPath(schema, ['mappy', 'map', 'value'])[1])).toBe(true)
|
|
|
|
|
expect(isMapLike(getSchemaPath(schema, ['invalid_list'])[1])).toBe(false)
|
|
|
|
|
expect(isMapLike(getSchemaPath(schema, ['invalid_list', 'list1'])[1])).toBe(false)
|
|
|
|
|
expect(isMapLike(getSchemaPath(schema, ['invalid_list', 'list2'])[1])).toBe(false)
|
|
|
|
|
expect(isMapLike(getSchemaPath(schema, ['structy'])[1])).toBe(false)
|
|
|
|
|
expect(isMapLike(getSchemaPath(schema, ['list_structy'])[1])).toBe(false)
|
|
|
|
|
expect(isMapLike(getSchemaPath(schema, ['invalid_list'])[1])).toBe(false)
|
2024-04-29 23:47:52 +00:00
|
|
|
})
|
2024-01-14 23:36:58 +00:00
|
|
|
})
|