Undefined int map

This commit is contained in:
Kenny Daniel 2024-03-18 23:54:58 -07:00
parent b7af8d3186
commit b0d653f342
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
7 changed files with 54 additions and 36 deletions

@ -27,7 +27,7 @@
"typecheck": "tsc"
},
"devDependencies": {
"@types/node": "20.11.29",
"@types/node": "20.11.30",
"@typescript-eslint/eslint-plugin": "7.3.1",
"@vitest/coverage-v8": "1.4.0",
"eslint": "8.57.0",

@ -3,21 +3,21 @@
*
* @param {number[] | undefined} definitionLevels definition levels, max 3
* @param {number[]} repetitionLevels repetition levels, max 1
* @param {ArrayLike<any>} value values to process
* @param {ArrayLike<any>} values values to process
* @param {boolean} isNull can an entry be null?
* @param {number} maxDefinitionLevel definition level that corresponds to non-null
* @returns {any[]} array of values
*/
export function assembleObjects(
definitionLevels, repetitionLevels, value, isNull, maxDefinitionLevel
definitionLevels, repetitionLevels, values, isNull, maxDefinitionLevel
) {
let vali = 0
let valueIndex = 0
let started = false
let haveNull = false
let i = 0
let outputIndex = 0
let part = []
/** @type {any[]} */
const assign = []
const output = []
for (let counter = 0; counter < repetitionLevels.length; counter++) {
const def = definitionLevels?.length ? definitionLevels[counter] : maxDefinitionLevel
@ -26,13 +26,13 @@ export function assembleObjects(
if (!rep) {
// new row - save what we have
if (started) {
assign[i] = haveNull ? undefined : part
output[outputIndex] = haveNull ? undefined : part
part = []
i++
outputIndex++
} else {
// first time: no row to save yet, unless it's a row continued from previous page
if (vali > 0) {
assign[i - 1] = assign[i - 1]?.concat(part) // add items to previous row
if (valueIndex > 0) {
output[outputIndex - 1] = output[outputIndex - 1]?.concat(part) // add items to previous row
part = []
// don't increment i since we only filled i-1
}
@ -42,8 +42,8 @@ export function assembleObjects(
if (def === maxDefinitionLevel) {
// append real value to current item
part.push(value[vali])
vali++
part.push(values[valueIndex])
valueIndex++
} else if (def > 0) {
// append null to current item
part.push(undefined)
@ -53,8 +53,8 @@ export function assembleObjects(
}
if (started) {
assign[i] = haveNull ? undefined : part
output[outputIndex] = haveNull ? undefined : part
}
return assign
return output
}

@ -171,12 +171,18 @@ async function readRowGroup(options, rowGroup) {
}
// assemble map-like column data
for (let i = 0; i < keys.length; i++) {
/** @type {Record<string, any>} */
const obj = {}
for (let j = 0; j < keys[i].length; j++) {
obj[keys[i][j]] = values[i][j]
// keys will be empty for {} and undefined for null
if (keys[i] !== undefined) {
/** @type {Record<string, any>} */
const obj = {}
for (let j = 0; j < keys[i].length; j++) {
if (keys[i][j] === undefined) continue
obj[keys[i][j]] = values[i][j] === undefined ? null : values[i][j]
}
out.push(obj)
} else {
out.push(undefined)
}
out.push(obj)
}
columnData = out
}

@ -1,5 +1,9 @@
[
[{ "k1": 1, "k2": 100 }],
[{ "k1": 2 }],
[{ }]
[{ "k1": 2, "k2": null }],
[{ }],
[{ }],
[{ }],
[null],
[{ "k1": null, "k3": null }]
]

@ -1,8 +1,8 @@
{
"version": 1,
"created_by": "DuckDB",
"metadata_length": 241,
"num_rows": 3,
"metadata_length": 243,
"num_rows": 7,
"row_groups": [
{
"columns": [
@ -12,14 +12,14 @@
"codec": "SNAPPY",
"data_page_offset": 4,
"encodings": ["PLAIN"],
"num_values": 5,
"num_values": 10,
"path_in_schema": ["int_map", "key_value", "key"],
"statistics": {
"max": "k2",
"max": "k3",
"min": "k1"
},
"total_compressed_size": 60,
"total_uncompressed_size": 63,
"total_compressed_size": 64,
"total_uncompressed_size": 82,
"type": "BYTE_ARRAY"
}
},
@ -27,21 +27,21 @@
"file_offset": 0,
"meta_data": {
"codec": "SNAPPY",
"data_page_offset": 64,
"data_page_offset": 68,
"encodings": ["PLAIN"],
"num_values": 5,
"num_values": 10,
"path_in_schema": ["int_map", "key_value", "value"],
"statistics": {
"max": "d\u0000\u0000\u0000",
"min": "\u0001\u0000\u0000\u0000"
},
"total_compressed_size": 52,
"total_uncompressed_size": 53,
"total_compressed_size": 60,
"total_uncompressed_size": 59,
"type": "INT32"
}
}
],
"num_rows": 3,
"num_rows": 7,
"total_byte_size": 74752
}
],

Binary file not shown.

@ -96,19 +96,27 @@ describe('parquetRead', () => {
columnName: 'int_map',
columnData: [
{ k1: 1, k2: 100 },
{ k1: 2 },
{ k1: 2, k2: null },
{ },
{ },
{ },
null,
{ k1: null, k3: null },
],
rowStart: 0,
rowEnd: 3,
rowEnd: 7,
})
},
onComplete: (rows) => {
/* eslint-disable no-sparse-arrays */
expect(toJson(rows)).toEqual([
[{ k1: 1, k2: 100 }],
[{ k1: 2 }],
[{}],
[{ k1: 2, k2: null }],
[{ }],
[{ }],
[{ }],
[null],
[{ k1: null, k3: null }],
])
},
})