From b0d653f342fd1eefee7153fff50be6fcb29888dc Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Mon, 18 Mar 2024 23:54:58 -0700 Subject: [PATCH] Undefined int map --- package.json | 2 +- src/assemble.js | 26 +++++++++++++------------- src/read.js | 16 +++++++++++----- test/files/Int_Map.json | 8 ++++++-- test/files/Int_Map.metadata.json | 22 +++++++++++----------- test/files/Int_Map.parquet | Bin 365 -> 379 bytes test/read.test.js | 16 ++++++++++++---- 7 files changed, 54 insertions(+), 36 deletions(-) diff --git a/package.json b/package.json index 4064e29..c1c986b 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,7 @@ "typecheck": "tsc" }, "devDependencies": { - "@types/node": "20.11.29", + "@types/node": "20.11.30", "@typescript-eslint/eslint-plugin": "7.3.1", "@vitest/coverage-v8": "1.4.0", "eslint": "8.57.0", diff --git a/src/assemble.js b/src/assemble.js index 0892716..0b69670 100644 --- a/src/assemble.js +++ b/src/assemble.js @@ -3,21 +3,21 @@ * * @param {number[] | undefined} definitionLevels definition levels, max 3 * @param {number[]} repetitionLevels repetition levels, max 1 - * @param {ArrayLike} value values to process + * @param {ArrayLike} values values to process * @param {boolean} isNull can an entry be null? * @param {number} maxDefinitionLevel definition level that corresponds to non-null * @returns {any[]} array of values */ export function assembleObjects( - definitionLevels, repetitionLevels, value, isNull, maxDefinitionLevel + definitionLevels, repetitionLevels, values, isNull, maxDefinitionLevel ) { - let vali = 0 + let valueIndex = 0 let started = false let haveNull = false - let i = 0 + let outputIndex = 0 let part = [] /** @type {any[]} */ - const assign = [] + const output = [] for (let counter = 0; counter < repetitionLevels.length; counter++) { const def = definitionLevels?.length ? definitionLevels[counter] : maxDefinitionLevel @@ -26,13 +26,13 @@ export function assembleObjects( if (!rep) { // new row - save what we have if (started) { - assign[i] = haveNull ? undefined : part + output[outputIndex] = haveNull ? undefined : part part = [] - i++ + outputIndex++ } else { // first time: no row to save yet, unless it's a row continued from previous page - if (vali > 0) { - assign[i - 1] = assign[i - 1]?.concat(part) // add items to previous row + if (valueIndex > 0) { + output[outputIndex - 1] = output[outputIndex - 1]?.concat(part) // add items to previous row part = [] // don't increment i since we only filled i-1 } @@ -42,8 +42,8 @@ export function assembleObjects( if (def === maxDefinitionLevel) { // append real value to current item - part.push(value[vali]) - vali++ + part.push(values[valueIndex]) + valueIndex++ } else if (def > 0) { // append null to current item part.push(undefined) @@ -53,8 +53,8 @@ export function assembleObjects( } if (started) { - assign[i] = haveNull ? undefined : part + output[outputIndex] = haveNull ? undefined : part } - return assign + return output } diff --git a/src/read.js b/src/read.js index f95a9ee..867e3b5 100644 --- a/src/read.js +++ b/src/read.js @@ -171,12 +171,18 @@ async function readRowGroup(options, rowGroup) { } // assemble map-like column data for (let i = 0; i < keys.length; i++) { - /** @type {Record} */ - const obj = {} - for (let j = 0; j < keys[i].length; j++) { - obj[keys[i][j]] = values[i][j] + // keys will be empty for {} and undefined for null + if (keys[i] !== undefined) { + /** @type {Record} */ + const obj = {} + for (let j = 0; j < keys[i].length; j++) { + if (keys[i][j] === undefined) continue + obj[keys[i][j]] = values[i][j] === undefined ? null : values[i][j] + } + out.push(obj) + } else { + out.push(undefined) } - out.push(obj) } columnData = out } diff --git a/test/files/Int_Map.json b/test/files/Int_Map.json index d689f50..c9c37aa 100644 --- a/test/files/Int_Map.json +++ b/test/files/Int_Map.json @@ -1,5 +1,9 @@ [ [{ "k1": 1, "k2": 100 }], - [{ "k1": 2 }], - [{ }] + [{ "k1": 2, "k2": null }], + [{ }], + [{ }], + [{ }], + [null], + [{ "k1": null, "k3": null }] ] diff --git a/test/files/Int_Map.metadata.json b/test/files/Int_Map.metadata.json index 0a1d93e..4bf84aa 100644 --- a/test/files/Int_Map.metadata.json +++ b/test/files/Int_Map.metadata.json @@ -1,8 +1,8 @@ { "version": 1, "created_by": "DuckDB", - "metadata_length": 241, - "num_rows": 3, + "metadata_length": 243, + "num_rows": 7, "row_groups": [ { "columns": [ @@ -12,14 +12,14 @@ "codec": "SNAPPY", "data_page_offset": 4, "encodings": ["PLAIN"], - "num_values": 5, + "num_values": 10, "path_in_schema": ["int_map", "key_value", "key"], "statistics": { - "max": "k2", + "max": "k3", "min": "k1" }, - "total_compressed_size": 60, - "total_uncompressed_size": 63, + "total_compressed_size": 64, + "total_uncompressed_size": 82, "type": "BYTE_ARRAY" } }, @@ -27,21 +27,21 @@ "file_offset": 0, "meta_data": { "codec": "SNAPPY", - "data_page_offset": 64, + "data_page_offset": 68, "encodings": ["PLAIN"], - "num_values": 5, + "num_values": 10, "path_in_schema": ["int_map", "key_value", "value"], "statistics": { "max": "d\u0000\u0000\u0000", "min": "\u0001\u0000\u0000\u0000" }, - "total_compressed_size": 52, - "total_uncompressed_size": 53, + "total_compressed_size": 60, + "total_uncompressed_size": 59, "type": "INT32" } } ], - "num_rows": 3, + "num_rows": 7, "total_byte_size": 74752 } ], diff --git a/test/files/Int_Map.parquet b/test/files/Int_Map.parquet index 28e88250ce59e38327a5c8c3155dbeeb2d1db760..93d1fe9321db6af56119a8b9e47012ff554c9a54 100644 GIT binary patch delta 221 zcmaFM^qZ+Zz%j^BltHwCQ8Y$JR0POk6J=vyaFF3)U|?WkU}9us3E=`#96&Ay6B{EF z0}B%qkezMF%En;C%H{@CXAIO8A{vIIT?eUz%j^BltDB`G)za73&>y-Wn*B_li^}uU|?ckV&r5IVF5BZm>Bs47#Ok* znHbp&jF^BTKqiPZ0&4LPbwkptf}}SDXchw-GZPaNBO{QN0>mI)qD+!8rVJ9iDW%C1 zyFD1$CSFk&;S#G8t5D;xkzmR;0%Ai8AU#=-QHPyNEK4jytzohYqmn3_8V7?0n@eeO Owu=+PN1#a{*8>1LdmEAf diff --git a/test/read.test.js b/test/read.test.js index 5e5bd05..7f3a651 100644 --- a/test/read.test.js +++ b/test/read.test.js @@ -96,19 +96,27 @@ describe('parquetRead', () => { columnName: 'int_map', columnData: [ { k1: 1, k2: 100 }, - { k1: 2 }, + { k1: 2, k2: null }, { }, + { }, + { }, + null, + { k1: null, k3: null }, ], rowStart: 0, - rowEnd: 3, + rowEnd: 7, }) }, onComplete: (rows) => { /* eslint-disable no-sparse-arrays */ expect(toJson(rows)).toEqual([ [{ k1: 1, k2: 100 }], - [{ k1: 2 }], - [{}], + [{ k1: 2, k2: null }], + [{ }], + [{ }], + [{ }], + [null], + [{ k1: null, k3: null }], ]) }, })