mirror of
https://github.com/asadbek064/hyparquet.git
synced 2026-01-05 10:56:38 +00:00
Split out assemble objects
This commit is contained in:
parent
4654c5eddf
commit
52721a3d30
@ -27,15 +27,15 @@
|
||||
"typecheck": "tsc"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "20.11.27",
|
||||
"@typescript-eslint/eslint-plugin": "7.2.0",
|
||||
"@vitest/coverage-v8": "1.3.1",
|
||||
"@types/node": "20.11.29",
|
||||
"@typescript-eslint/eslint-plugin": "7.3.1",
|
||||
"@vitest/coverage-v8": "1.4.0",
|
||||
"eslint": "8.57.0",
|
||||
"eslint-plugin-import": "2.29.1",
|
||||
"eslint-plugin-jsdoc": "48.2.1",
|
||||
"http-server": "14.1.1",
|
||||
"hysnappy": "0.3.0",
|
||||
"typescript": "5.4.2",
|
||||
"vitest": "1.3.1"
|
||||
"vitest": "1.4.0"
|
||||
}
|
||||
}
|
||||
|
||||
60
src/assemble.js
Normal file
60
src/assemble.js
Normal file
@ -0,0 +1,60 @@
|
||||
/**
|
||||
* Dremel-assembly of arrays of values into lists
|
||||
*
|
||||
* @param {number[] | undefined} definitionLevels definition levels, max 3
|
||||
* @param {number[]} repetitionLevels repetition levels, max 1
|
||||
* @param {ArrayLike<any>} value values to process
|
||||
* @param {boolean} isNull can an entry be null?
|
||||
* @param {number} maxDefinitionLevel definition level that corresponds to non-null
|
||||
* @returns {any[]} array of values
|
||||
*/
|
||||
export function assembleObjects(
|
||||
definitionLevels, repetitionLevels, value, isNull, maxDefinitionLevel
|
||||
) {
|
||||
let vali = 0
|
||||
let started = false
|
||||
let haveNull = false
|
||||
let i = 0
|
||||
let part = []
|
||||
/** @type {any[]} */
|
||||
const assign = []
|
||||
|
||||
for (let counter = 0; counter < repetitionLevels.length; counter++) {
|
||||
const def = definitionLevels?.length ? definitionLevels[counter] : maxDefinitionLevel
|
||||
const rep = repetitionLevels[counter]
|
||||
|
||||
if (!rep) {
|
||||
// new row - save what we have
|
||||
if (started) {
|
||||
assign[i] = haveNull ? undefined : part
|
||||
part = []
|
||||
i++
|
||||
} else {
|
||||
// first time: no row to save yet, unless it's a row continued from previous page
|
||||
if (vali > 0) {
|
||||
assign[i - 1] = assign[i - 1]?.concat(part) // add items to previous row
|
||||
part = []
|
||||
// don't increment i since we only filled i-1
|
||||
}
|
||||
started = true
|
||||
}
|
||||
}
|
||||
|
||||
if (def === maxDefinitionLevel) {
|
||||
// append real value to current item
|
||||
part.push(value[vali])
|
||||
vali++
|
||||
} else if (def > 0) {
|
||||
// append null to current item
|
||||
part.push(undefined)
|
||||
}
|
||||
|
||||
haveNull = def === 0 && isNull
|
||||
}
|
||||
|
||||
if (started) {
|
||||
assign[i] = haveNull ? undefined : part
|
||||
}
|
||||
|
||||
return assign
|
||||
}
|
||||
@ -1,6 +1,7 @@
|
||||
import { assembleObjects } from './assemble.js'
|
||||
import { PageType } from './constants.js'
|
||||
import { convert } from './convert.js'
|
||||
import { assembleObjects, readDataPage, readDictionaryPage } from './datapage.js'
|
||||
import { readDataPage, readDictionaryPage } from './datapage.js'
|
||||
import { readDataPageV2 } from './datapageV2.js'
|
||||
import { parquetHeader } from './header.js'
|
||||
import { getMaxDefinitionLevel, isRequired, schemaElement } from './schema.js'
|
||||
@ -29,7 +30,6 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
let dictionary = undefined
|
||||
let valuesSeen = 0
|
||||
let byteOffset = 0 // byteOffset within the column
|
||||
const rowIndex = [0] // map/list object index
|
||||
const rowData = []
|
||||
|
||||
while (valuesSeen < rowGroup.num_rows) {
|
||||
@ -66,10 +66,9 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
dereferenceDictionary(dictionary, dataPage)
|
||||
// Use repetition levels to construct lists
|
||||
const isNull = columnMetadata && !isRequired(schema, [columnMetadata.path_in_schema[0]])
|
||||
const nullValue = false // TODO: unused?
|
||||
const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema)
|
||||
values = assembleObjects(
|
||||
definitionLevels, repetitionLevels, dataPage, isNull, nullValue, maxDefinitionLevel, rowIndex[0]
|
||||
definitionLevels, repetitionLevels, dataPage, isNull, maxDefinitionLevel
|
||||
)
|
||||
} else if (definitionLevels?.length) {
|
||||
const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema)
|
||||
@ -115,7 +114,7 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
dereferenceDictionary(dictionary, dataPage)
|
||||
// Use repetition levels to construct lists
|
||||
rowData.push(...assembleObjects(
|
||||
definitionLevels, repetitionLevels, dataPage, true, false, maxDefinitionLevel, rowIndex[0]
|
||||
definitionLevels, repetitionLevels, dataPage, true, maxDefinitionLevel
|
||||
))
|
||||
} else if (daph2.num_nulls) {
|
||||
// skip nulls
|
||||
|
||||
@ -168,68 +168,3 @@ function readDefinitionLevels(dataView, offset, daph, schema, path_in_schema) {
|
||||
}
|
||||
return { byteLength: 0, definitionLevels: [], numNulls: 0 }
|
||||
}
|
||||
|
||||
/**
|
||||
* Dremel-assembly of arrays of values into lists
|
||||
*
|
||||
* @param {number[] | undefined} definitionLevels definition levels, max 3
|
||||
* @param {number[]} repetitionLevels repetition levels, max 1
|
||||
* @param {ArrayLike<any>} value values to process
|
||||
* @param {boolean} isNull can an entry be null?
|
||||
* @param {boolean} nullValue can list elements be null?
|
||||
* @param {number} maxDefinitionLevel definition level that corresponds to non-null
|
||||
* @param {number} prevIndex 1 + index where the last row in the previous page was inserted (0 if first page)
|
||||
* @returns {any[]} array of values
|
||||
*/
|
||||
export function assembleObjects(
|
||||
definitionLevels, repetitionLevels, value, isNull, nullValue, maxDefinitionLevel, prevIndex
|
||||
) {
|
||||
let vali = 0
|
||||
let started = false
|
||||
let haveNull = false
|
||||
let i = prevIndex
|
||||
let part = []
|
||||
/** @type {any[]} */
|
||||
const assign = []
|
||||
|
||||
for (let counter = 0; counter < repetitionLevels.length; counter++) {
|
||||
const def = definitionLevels?.length ? definitionLevels[counter] : maxDefinitionLevel
|
||||
const rep = repetitionLevels[counter]
|
||||
|
||||
if (!rep) {
|
||||
// new row - save what we have
|
||||
if (started) {
|
||||
assign[i] = haveNull ? undefined : part
|
||||
part = []
|
||||
i++
|
||||
} else {
|
||||
// first time: no row to save yet, unless it's a row continued from previous page
|
||||
if (vali > 0) {
|
||||
assign[i - 1] = assign[i - 1]?.concat(part) // add items to previous row
|
||||
part = []
|
||||
// don't increment i since we only filled i-1
|
||||
}
|
||||
started = true
|
||||
}
|
||||
}
|
||||
|
||||
if (def === maxDefinitionLevel) {
|
||||
// append real value to current item
|
||||
part.push(value[vali])
|
||||
vali++
|
||||
} else if (def > 0) {
|
||||
// append null to current item
|
||||
part.push(undefined)
|
||||
}
|
||||
|
||||
haveNull = def === 0 && isNull
|
||||
}
|
||||
|
||||
if (started) {
|
||||
assign[i] = haveNull ? undefined : part
|
||||
} else if (vali > 0) {
|
||||
assign[i - 1] = assign[i - 1]?.concat(part)
|
||||
}
|
||||
|
||||
return assign
|
||||
}
|
||||
|
||||
48
test/assemble.test.js
Normal file
48
test/assemble.test.js
Normal file
@ -0,0 +1,48 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { assembleObjects } from '../src/assemble.js'
|
||||
|
||||
describe('assembleObjects', () => {
|
||||
it('should assemble objects with non-null values', () => {
|
||||
const repetitionLevels = [0, 1]
|
||||
const values = ['a', 'b']
|
||||
const result = assembleObjects([], repetitionLevels, values, false, 3)
|
||||
expect(result).toEqual([['a', 'b']])
|
||||
})
|
||||
|
||||
it('should handle null values', () => {
|
||||
const definitionLevels = [3, 0, 3]
|
||||
const repetitionLevels = [0, 0, 1]
|
||||
const values = ['a', 'c']
|
||||
const result = assembleObjects(definitionLevels, repetitionLevels, values, true, 3)
|
||||
expect(result).toEqual([['a'], ['c']])
|
||||
})
|
||||
|
||||
it('should handle empty lists', () => {
|
||||
const result = assembleObjects([], [], [], false, 3)
|
||||
expect(result).toEqual([])
|
||||
})
|
||||
|
||||
it('should handle multiple lists', () => {
|
||||
const definitionLevels = [3, 3, 3, 3, 3, 3]
|
||||
const repetitionLevels = [0, 1, 1, 0, 1, 1]
|
||||
const values = [1, 2, 3, 4, 5, 6]
|
||||
const result = assembleObjects(definitionLevels, repetitionLevels, values, false, 3)
|
||||
expect(result).toEqual([[1, 2, 3], [4, 5, 6]])
|
||||
})
|
||||
|
||||
it('should assemble multiple lists with nulls', () => {
|
||||
const definitionLevels = [3, 3, 0, 3, 3]
|
||||
const repetitionLevels = [0, 1, 0, 0, 1]
|
||||
const values = ['a', 'b', 'd', 'e']
|
||||
const result = assembleObjects(definitionLevels, repetitionLevels, values, true, 3)
|
||||
expect(result).toEqual([['a', 'b'], undefined, ['d', 'e']])
|
||||
})
|
||||
|
||||
it('should handle continuing a row from the previous page', () => {
|
||||
const definitionLevels = [3, 3, 3, 1]
|
||||
const repetitionLevels = [1, 0, 1, 0]
|
||||
const values = ['a', 'b', 'c', 'd']
|
||||
const result = assembleObjects(definitionLevels, repetitionLevels, values, false, 3)
|
||||
expect(result).toEqual([['b', 'c'], [undefined]])
|
||||
})
|
||||
})
|
||||
Loading…
Reference in New Issue
Block a user