mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-06 06:51:54 +00:00
Undefined int map
This commit is contained in:
parent
b7af8d3186
commit
b0d653f342
@ -27,7 +27,7 @@
|
||||
"typecheck": "tsc"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "20.11.29",
|
||||
"@types/node": "20.11.30",
|
||||
"@typescript-eslint/eslint-plugin": "7.3.1",
|
||||
"@vitest/coverage-v8": "1.4.0",
|
||||
"eslint": "8.57.0",
|
||||
|
||||
@ -3,21 +3,21 @@
|
||||
*
|
||||
* @param {number[] | undefined} definitionLevels definition levels, max 3
|
||||
* @param {number[]} repetitionLevels repetition levels, max 1
|
||||
* @param {ArrayLike<any>} value values to process
|
||||
* @param {ArrayLike<any>} values values to process
|
||||
* @param {boolean} isNull can an entry be null?
|
||||
* @param {number} maxDefinitionLevel definition level that corresponds to non-null
|
||||
* @returns {any[]} array of values
|
||||
*/
|
||||
export function assembleObjects(
|
||||
definitionLevels, repetitionLevels, value, isNull, maxDefinitionLevel
|
||||
definitionLevels, repetitionLevels, values, isNull, maxDefinitionLevel
|
||||
) {
|
||||
let vali = 0
|
||||
let valueIndex = 0
|
||||
let started = false
|
||||
let haveNull = false
|
||||
let i = 0
|
||||
let outputIndex = 0
|
||||
let part = []
|
||||
/** @type {any[]} */
|
||||
const assign = []
|
||||
const output = []
|
||||
|
||||
for (let counter = 0; counter < repetitionLevels.length; counter++) {
|
||||
const def = definitionLevels?.length ? definitionLevels[counter] : maxDefinitionLevel
|
||||
@ -26,13 +26,13 @@ export function assembleObjects(
|
||||
if (!rep) {
|
||||
// new row - save what we have
|
||||
if (started) {
|
||||
assign[i] = haveNull ? undefined : part
|
||||
output[outputIndex] = haveNull ? undefined : part
|
||||
part = []
|
||||
i++
|
||||
outputIndex++
|
||||
} else {
|
||||
// first time: no row to save yet, unless it's a row continued from previous page
|
||||
if (vali > 0) {
|
||||
assign[i - 1] = assign[i - 1]?.concat(part) // add items to previous row
|
||||
if (valueIndex > 0) {
|
||||
output[outputIndex - 1] = output[outputIndex - 1]?.concat(part) // add items to previous row
|
||||
part = []
|
||||
// don't increment i since we only filled i-1
|
||||
}
|
||||
@ -42,8 +42,8 @@ export function assembleObjects(
|
||||
|
||||
if (def === maxDefinitionLevel) {
|
||||
// append real value to current item
|
||||
part.push(value[vali])
|
||||
vali++
|
||||
part.push(values[valueIndex])
|
||||
valueIndex++
|
||||
} else if (def > 0) {
|
||||
// append null to current item
|
||||
part.push(undefined)
|
||||
@ -53,8 +53,8 @@ export function assembleObjects(
|
||||
}
|
||||
|
||||
if (started) {
|
||||
assign[i] = haveNull ? undefined : part
|
||||
output[outputIndex] = haveNull ? undefined : part
|
||||
}
|
||||
|
||||
return assign
|
||||
return output
|
||||
}
|
||||
|
||||
16
src/read.js
16
src/read.js
@ -171,12 +171,18 @@ async function readRowGroup(options, rowGroup) {
|
||||
}
|
||||
// assemble map-like column data
|
||||
for (let i = 0; i < keys.length; i++) {
|
||||
/** @type {Record<string, any>} */
|
||||
const obj = {}
|
||||
for (let j = 0; j < keys[i].length; j++) {
|
||||
obj[keys[i][j]] = values[i][j]
|
||||
// keys will be empty for {} and undefined for null
|
||||
if (keys[i] !== undefined) {
|
||||
/** @type {Record<string, any>} */
|
||||
const obj = {}
|
||||
for (let j = 0; j < keys[i].length; j++) {
|
||||
if (keys[i][j] === undefined) continue
|
||||
obj[keys[i][j]] = values[i][j] === undefined ? null : values[i][j]
|
||||
}
|
||||
out.push(obj)
|
||||
} else {
|
||||
out.push(undefined)
|
||||
}
|
||||
out.push(obj)
|
||||
}
|
||||
columnData = out
|
||||
}
|
||||
|
||||
@ -1,5 +1,9 @@
|
||||
[
|
||||
[{ "k1": 1, "k2": 100 }],
|
||||
[{ "k1": 2 }],
|
||||
[{ }]
|
||||
[{ "k1": 2, "k2": null }],
|
||||
[{ }],
|
||||
[{ }],
|
||||
[{ }],
|
||||
[null],
|
||||
[{ "k1": null, "k3": null }]
|
||||
]
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
{
|
||||
"version": 1,
|
||||
"created_by": "DuckDB",
|
||||
"metadata_length": 241,
|
||||
"num_rows": 3,
|
||||
"metadata_length": 243,
|
||||
"num_rows": 7,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
@ -12,14 +12,14 @@
|
||||
"codec": "SNAPPY",
|
||||
"data_page_offset": 4,
|
||||
"encodings": ["PLAIN"],
|
||||
"num_values": 5,
|
||||
"num_values": 10,
|
||||
"path_in_schema": ["int_map", "key_value", "key"],
|
||||
"statistics": {
|
||||
"max": "k2",
|
||||
"max": "k3",
|
||||
"min": "k1"
|
||||
},
|
||||
"total_compressed_size": 60,
|
||||
"total_uncompressed_size": 63,
|
||||
"total_compressed_size": 64,
|
||||
"total_uncompressed_size": 82,
|
||||
"type": "BYTE_ARRAY"
|
||||
}
|
||||
},
|
||||
@ -27,21 +27,21 @@
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"codec": "SNAPPY",
|
||||
"data_page_offset": 64,
|
||||
"data_page_offset": 68,
|
||||
"encodings": ["PLAIN"],
|
||||
"num_values": 5,
|
||||
"num_values": 10,
|
||||
"path_in_schema": ["int_map", "key_value", "value"],
|
||||
"statistics": {
|
||||
"max": "d\u0000\u0000\u0000",
|
||||
"min": "\u0001\u0000\u0000\u0000"
|
||||
},
|
||||
"total_compressed_size": 52,
|
||||
"total_uncompressed_size": 53,
|
||||
"total_compressed_size": 60,
|
||||
"total_uncompressed_size": 59,
|
||||
"type": "INT32"
|
||||
}
|
||||
}
|
||||
],
|
||||
"num_rows": 3,
|
||||
"num_rows": 7,
|
||||
"total_byte_size": 74752
|
||||
}
|
||||
],
|
||||
|
||||
Binary file not shown.
@ -96,19 +96,27 @@ describe('parquetRead', () => {
|
||||
columnName: 'int_map',
|
||||
columnData: [
|
||||
{ k1: 1, k2: 100 },
|
||||
{ k1: 2 },
|
||||
{ k1: 2, k2: null },
|
||||
{ },
|
||||
{ },
|
||||
{ },
|
||||
null,
|
||||
{ k1: null, k3: null },
|
||||
],
|
||||
rowStart: 0,
|
||||
rowEnd: 3,
|
||||
rowEnd: 7,
|
||||
})
|
||||
},
|
||||
onComplete: (rows) => {
|
||||
/* eslint-disable no-sparse-arrays */
|
||||
expect(toJson(rows)).toEqual([
|
||||
[{ k1: 1, k2: 100 }],
|
||||
[{ k1: 2 }],
|
||||
[{}],
|
||||
[{ k1: 2, k2: null }],
|
||||
[{ }],
|
||||
[{ }],
|
||||
[{ }],
|
||||
[null],
|
||||
[{ k1: null, k3: null }],
|
||||
])
|
||||
},
|
||||
})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user