hyparquet/test/assemble.test.js

210 lines
8.0 KiB
JavaScript
Raw Permalink Normal View History

2024-03-18 23:36:16 +00:00
import { describe, expect, it } from 'vitest'
2024-05-18 02:41:40 +00:00
import { assembleLists } from '../src/assemble.js'
2024-03-18 23:36:16 +00:00
2024-05-18 05:44:03 +00:00
describe('assembleLists', () => {
const nonnullable = toSchemaPath([undefined, 'REQUIRED', 'REPEATED', 'REQUIRED'])
const nullable = toSchemaPath([undefined, 'OPTIONAL', 'REPEATED', 'OPTIONAL'])
const nestedRequired = toSchemaPath([undefined, 'REQUIRED', 'REPEATED', 'REQUIRED', 'REPEATED', 'REQUIRED'])
const nestedOptional = toSchemaPath([undefined, 'OPTIONAL', 'REPEATED', 'OPTIONAL', 'REPEATED', 'OPTIONAL'])
/**
* @import {FieldRepetitionType, SchemaTree} from '../src/types.js'
* @param {(FieldRepetitionType | undefined)[]} repetitionPath
* @returns {SchemaTree[]}
*/
function toSchemaPath(repetitionPath) {
return repetitionPath.map(repetition_type => ({
element: {
name: 'name',
repetition_type,
},
count: 1,
children: [],
path: [],
}))
}
2024-05-18 05:44:03 +00:00
2024-03-18 23:36:16 +00:00
it('should assemble objects with non-null values', () => {
const repetitionLevels = [0, 1]
const values = ['a', 'b']
const result = assembleLists([], [], repetitionLevels, values, nonnullable)
2024-03-18 23:36:16 +00:00
expect(result).toEqual([['a', 'b']])
})
it('should handle null values', () => {
const definitionLevels = [3, 0, 3]
2024-03-21 00:24:25 +00:00
const repetitionLevels = [0, 1, 1]
2024-03-18 23:36:16 +00:00
const values = ['a', 'c']
const result = assembleLists([], definitionLevels, repetitionLevels, values, nullable)
2024-05-18 05:44:03 +00:00
expect(result).toEqual([[['a', null, 'c']]])
2024-03-18 23:36:16 +00:00
})
it('should handle empty lists', () => {
expect(assembleLists([], [], [], [], nonnullable)).toEqual([])
expect(assembleLists([], [], [], [], nullable)).toEqual([])
2024-03-18 23:36:16 +00:00
})
it('should handle multiple lists', () => {
2024-03-21 00:24:25 +00:00
const repetitionLevels = [0, 0]
const values = [22, 33]
const result = assembleLists([], [], repetitionLevels, values, nonnullable)
2024-03-21 00:24:25 +00:00
expect(result).toEqual([[22], [33]])
})
it('should handle multiple lists (6)', () => {
2024-03-18 23:36:16 +00:00
const repetitionLevels = [0, 1, 1, 0, 1, 1]
const values = [1, 2, 3, 4, 5, 6]
const result = assembleLists([], [], repetitionLevels, values, nonnullable)
2024-03-18 23:36:16 +00:00
expect(result).toEqual([[1, 2, 3], [4, 5, 6]])
})
it('should assemble multiple lists with nulls', () => {
const definitionLevels = [3, 3, 0, 3, 3]
const repetitionLevels = [0, 1, 0, 0, 1]
const values = ['a', 'b', 'd', 'e']
const result = assembleLists([], definitionLevels, repetitionLevels, values, nullable)
2024-05-18 05:44:03 +00:00
expect(result).toEqual([[['a', 'b']], [], [['d', 'e']]])
2024-03-18 23:36:16 +00:00
})
2025-05-25 06:35:12 +00:00
it('should continue from the previous page', () => {
2024-06-08 02:30:30 +00:00
const definitionLevels = [3, 3, 3, 1]
const repetitionLevels = [1, 0, 1, 0]
const values = ['b', 'c', 'd', 'e']
const prev = [[['a']]]
const result = assembleLists(prev, definitionLevels, repetitionLevels, values, nullable)
2024-06-08 02:30:30 +00:00
expect(result).toEqual([[['a', 'b']], [['c', 'd']], [[]]])
})
2024-03-21 00:24:25 +00:00
2025-05-25 06:35:12 +00:00
it('should continue from the previous page (depth 2)', () => {
const repetitionLevels = [2, 0, 2, 0]
const values = ['b', 'c', 'd', 'e']
const prev = [[['a']]]
const result = assembleLists(prev, [], repetitionLevels, values, nestedRequired)
expect(result).toEqual([[['a', 'b']], [['c', 'd']], [['e']]])
})
2024-03-21 00:24:25 +00:00
it('should handle nested arrays', () => {
// from nullable.impala.parquet
const repetitionLevels = [0, 2, 1, 2]
const values = [1, 2, 3, 4]
const result = assembleLists([], [], repetitionLevels, values, nestedRequired)
2024-03-21 00:24:25 +00:00
expect(result).toEqual([[[1, 2], [3, 4]]])
})
it('should handle top repetition level', () => {
// from int_map.parquet
const definitionLevels = [2, 2, 2, 2, 1, 1, 1, 0, 2, 2]
const repetitionLevels = [0, 1, 0, 1, 0, 0, 0, 0, 0, 1]
const values = ['k1', 'k2', 'k1', 'k2', 'k1', 'k3']
const schemaPath = toSchemaPath(['REQUIRED', 'OPTIONAL', 'REPEATED', 'REQUIRED'])
const result = assembleLists([], definitionLevels, repetitionLevels, values, schemaPath)
2024-03-21 00:24:25 +00:00
expect(result).toEqual([
2024-05-18 05:44:03 +00:00
[['k1', 'k2']],
[['k1', 'k2']],
[[]],
[[]],
[[]],
2024-03-21 00:24:25 +00:00
[],
2024-05-18 05:44:03 +00:00
[['k1', 'k3']],
2024-03-21 00:24:25 +00:00
])
})
it('should handle empty lists with definition level', () => {
// from nonnullable.impala.parquet
expect(assembleLists([], [0], [0], [], nonnullable)).toEqual([[]])
2024-03-21 00:24:25 +00:00
})
2024-04-29 02:03:39 +00:00
it('should handle nonnullable lists', () => {
2024-03-21 00:24:25 +00:00
// from nonnullable.impala.parquet
expect(assembleLists([], [1], [0], [-1], nonnullable)).toEqual([[-1]])
2024-03-18 23:36:16 +00:00
})
2024-03-30 00:28:14 +00:00
it('should handle nullable int_array', () => {
2024-04-29 02:03:39 +00:00
// from nullable.impala.parquet int_array
2024-03-30 00:28:14 +00:00
// [1 2 3][N 1 2 N 3 N][ ] N N
const definitionLevels = [3, 3, 3, 2, 3, 3, 2, 3, 2, 1, 0, 0]
const repetitionLevels = [0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
const values = [1, 2, 3, 1, 2, 3]
const result = assembleLists([], definitionLevels, repetitionLevels, values, nullable)
2024-03-30 00:28:14 +00:00
expect(result).toEqual([
2024-05-18 05:44:03 +00:00
[[1, 2, 3]],
[[null, 1, 2, null, 3, null]],
[[]],
[],
2024-03-30 00:28:14 +00:00
[],
])
})
2024-04-29 02:03:39 +00:00
it('should handle nullable int_array_Array', () => {
// from nullable.impala.parquet int_array_Array
// [1 2][3 4][[N 1 2 N][3 N 4] [] N][N] [] N N [N 5 6]
const definitionLevels = [5, 5, 5, 5, 4, 5, 5, 4, 5, 4, 5, 3, 2, 2, 1, 0, 0, 2, 5, 5]
const repetitionLevels = [0, 2, 1, 2, 0, 2, 2, 2, 1, 2, 2, 1, 1, 0, 0, 0, 0, 0, 1, 2]
const values = [1, 2, 3, 4, 1, 2, 3, 4, 5, 6]
const result = assembleLists([], definitionLevels, repetitionLevels, values, nestedOptional)
2024-04-29 02:03:39 +00:00
expect(result).toEqual([
2024-05-18 05:44:03 +00:00
[[[[1, 2]], [[3, 4]]]],
[[[[null, 1, 2, null]], [[3, null, 4]], [[]], []]],
[[[]]],
[[]],
[],
2024-04-29 02:03:39 +00:00
[],
2024-05-18 05:44:03 +00:00
[[[], [[5, 6]]]],
2024-04-29 02:03:39 +00:00
])
})
2024-05-09 22:42:04 +00:00
it('should handle nonnullable int_map_array keys', () => {
const definitionLevels = [3, 4, 3, 3]
const repetitionLevels = [0, 1, 1, 1]
const values = ['k1']
const schemaPath = toSchemaPath([undefined, 'OPTIONAL', 'REPEATED', 'OPTIONAL', 'REPEATED', 'REQUIRED'])
const result = assembleLists([], definitionLevels, repetitionLevels, values, schemaPath)
expect(result).toEqual([[[[[]], [['k1']], [[]], [[]]]]])
2024-05-09 22:42:04 +00:00
})
it('should handle nonnullable int_map_array values', () => {
const definitionLevels = [3, 5, 3, 3]
const repetitionLevels = [0, 1, 1, 1]
const values = [1]
const schemaPath = toSchemaPath([undefined, 'OPTIONAL', 'REPEATED', 'OPTIONAL', 'REPEATED', 'OPTIONAL'])
const result = assembleLists([], definitionLevels, repetitionLevels, values, schemaPath)
expect(result).toEqual([[[[[]], [[1]], [[]], [[]]]]])
2024-05-09 22:42:04 +00:00
})
it('should handle mixed optional and required', () => {
// from datapage_v2.snappy.parquet e
const definitionLevels = [2, 2, 2, 0, 0, 2, 2, 2, 2, 2]
const repetitionLevels = [0, 1, 1, 0, 0, 0, 1, 1, 0, 1]
const values = [1, 2, 3, 1, 2, 3, 1, 2]
const schemaPath = toSchemaPath([undefined, 'OPTIONAL', 'REPEATED', 'REQUIRED'])
const result = assembleLists([], definitionLevels, repetitionLevels, values, schemaPath)
2024-05-18 05:44:03 +00:00
expect(result).toEqual([[[1, 2, 3]], [], [], [[1, 2, 3]], [[1, 2]]])
2024-05-09 22:42:04 +00:00
})
it('should handle nested required', () => {
// from nonnullable.impala.parquet nested_Struct i
const definitionLevels = [0]
const repetitionLevels = [0]
const schemaPath = toSchemaPath([
undefined, 'REQUIRED', 'REQUIRED', 'REPEATED', 'REQUIRED', 'REQUIRED', 'REPEATED', 'REQUIRED',
])
const result = assembleLists([], definitionLevels, repetitionLevels, [], schemaPath)
2024-05-18 05:44:03 +00:00
expect(result).toEqual([[]])
})
it('should handle dzenilee', () => {
const repetitionLevels = [0, 1, 1, 0, 1, 1]
const values = ['a', 'b', 'c', 'd', 'e', 'f']
const result = assembleLists([], [], repetitionLevels, values, nullable)
2024-05-18 05:44:03 +00:00
expect(result).toEqual([[['a', 'b', 'c']], [['d', 'e', 'f']]])
2024-05-09 22:42:04 +00:00
})
2024-05-24 05:11:47 +00:00
it('handle complex.parquet with nested require', () => {
const definitionLevels = [1, 1]
const values = ['a', 'b']
const schemaPath = toSchemaPath([undefined, 'OPTIONAL', 'REQUIRED', 'REQUIRED'])
const result = assembleLists([], definitionLevels, [], values, schemaPath)
2024-05-24 05:11:47 +00:00
expect(result).toEqual([['a'], ['b']])
})
2024-03-18 23:36:16 +00:00
})