diff --git a/package.json b/package.json index 4064e29..c1c986b 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,7 @@ "typecheck": "tsc" }, "devDependencies": { - "@types/node": "20.11.29", + "@types/node": "20.11.30", "@typescript-eslint/eslint-plugin": "7.3.1", "@vitest/coverage-v8": "1.4.0", "eslint": "8.57.0", diff --git a/src/assemble.js b/src/assemble.js index 0892716..0b69670 100644 --- a/src/assemble.js +++ b/src/assemble.js @@ -3,21 +3,21 @@ * * @param {number[] | undefined} definitionLevels definition levels, max 3 * @param {number[]} repetitionLevels repetition levels, max 1 - * @param {ArrayLike} value values to process + * @param {ArrayLike} values values to process * @param {boolean} isNull can an entry be null? * @param {number} maxDefinitionLevel definition level that corresponds to non-null * @returns {any[]} array of values */ export function assembleObjects( - definitionLevels, repetitionLevels, value, isNull, maxDefinitionLevel + definitionLevels, repetitionLevels, values, isNull, maxDefinitionLevel ) { - let vali = 0 + let valueIndex = 0 let started = false let haveNull = false - let i = 0 + let outputIndex = 0 let part = [] /** @type {any[]} */ - const assign = [] + const output = [] for (let counter = 0; counter < repetitionLevels.length; counter++) { const def = definitionLevels?.length ? definitionLevels[counter] : maxDefinitionLevel @@ -26,13 +26,13 @@ export function assembleObjects( if (!rep) { // new row - save what we have if (started) { - assign[i] = haveNull ? undefined : part + output[outputIndex] = haveNull ? undefined : part part = [] - i++ + outputIndex++ } else { // first time: no row to save yet, unless it's a row continued from previous page - if (vali > 0) { - assign[i - 1] = assign[i - 1]?.concat(part) // add items to previous row + if (valueIndex > 0) { + output[outputIndex - 1] = output[outputIndex - 1]?.concat(part) // add items to previous row part = [] // don't increment i since we only filled i-1 } @@ -42,8 +42,8 @@ export function assembleObjects( if (def === maxDefinitionLevel) { // append real value to current item - part.push(value[vali]) - vali++ + part.push(values[valueIndex]) + valueIndex++ } else if (def > 0) { // append null to current item part.push(undefined) @@ -53,8 +53,8 @@ export function assembleObjects( } if (started) { - assign[i] = haveNull ? undefined : part + output[outputIndex] = haveNull ? undefined : part } - return assign + return output } diff --git a/src/read.js b/src/read.js index f95a9ee..867e3b5 100644 --- a/src/read.js +++ b/src/read.js @@ -171,12 +171,18 @@ async function readRowGroup(options, rowGroup) { } // assemble map-like column data for (let i = 0; i < keys.length; i++) { - /** @type {Record} */ - const obj = {} - for (let j = 0; j < keys[i].length; j++) { - obj[keys[i][j]] = values[i][j] + // keys will be empty for {} and undefined for null + if (keys[i] !== undefined) { + /** @type {Record} */ + const obj = {} + for (let j = 0; j < keys[i].length; j++) { + if (keys[i][j] === undefined) continue + obj[keys[i][j]] = values[i][j] === undefined ? null : values[i][j] + } + out.push(obj) + } else { + out.push(undefined) } - out.push(obj) } columnData = out } diff --git a/test/files/Int_Map.json b/test/files/Int_Map.json index d689f50..c9c37aa 100644 --- a/test/files/Int_Map.json +++ b/test/files/Int_Map.json @@ -1,5 +1,9 @@ [ [{ "k1": 1, "k2": 100 }], - [{ "k1": 2 }], - [{ }] + [{ "k1": 2, "k2": null }], + [{ }], + [{ }], + [{ }], + [null], + [{ "k1": null, "k3": null }] ] diff --git a/test/files/Int_Map.metadata.json b/test/files/Int_Map.metadata.json index 0a1d93e..4bf84aa 100644 --- a/test/files/Int_Map.metadata.json +++ b/test/files/Int_Map.metadata.json @@ -1,8 +1,8 @@ { "version": 1, "created_by": "DuckDB", - "metadata_length": 241, - "num_rows": 3, + "metadata_length": 243, + "num_rows": 7, "row_groups": [ { "columns": [ @@ -12,14 +12,14 @@ "codec": "SNAPPY", "data_page_offset": 4, "encodings": ["PLAIN"], - "num_values": 5, + "num_values": 10, "path_in_schema": ["int_map", "key_value", "key"], "statistics": { - "max": "k2", + "max": "k3", "min": "k1" }, - "total_compressed_size": 60, - "total_uncompressed_size": 63, + "total_compressed_size": 64, + "total_uncompressed_size": 82, "type": "BYTE_ARRAY" } }, @@ -27,21 +27,21 @@ "file_offset": 0, "meta_data": { "codec": "SNAPPY", - "data_page_offset": 64, + "data_page_offset": 68, "encodings": ["PLAIN"], - "num_values": 5, + "num_values": 10, "path_in_schema": ["int_map", "key_value", "value"], "statistics": { "max": "d\u0000\u0000\u0000", "min": "\u0001\u0000\u0000\u0000" }, - "total_compressed_size": 52, - "total_uncompressed_size": 53, + "total_compressed_size": 60, + "total_uncompressed_size": 59, "type": "INT32" } } ], - "num_rows": 3, + "num_rows": 7, "total_byte_size": 74752 } ], diff --git a/test/files/Int_Map.parquet b/test/files/Int_Map.parquet index 28e8825..93d1fe9 100644 Binary files a/test/files/Int_Map.parquet and b/test/files/Int_Map.parquet differ diff --git a/test/read.test.js b/test/read.test.js index 5e5bd05..7f3a651 100644 --- a/test/read.test.js +++ b/test/read.test.js @@ -96,19 +96,27 @@ describe('parquetRead', () => { columnName: 'int_map', columnData: [ { k1: 1, k2: 100 }, - { k1: 2 }, + { k1: 2, k2: null }, { }, + { }, + { }, + null, + { k1: null, k3: null }, ], rowStart: 0, - rowEnd: 3, + rowEnd: 7, }) }, onComplete: (rows) => { /* eslint-disable no-sparse-arrays */ expect(toJson(rows)).toEqual([ [{ k1: 1, k2: 100 }], - [{ k1: 2 }], - [{}], + [{ k1: 2, k2: null }], + [{ }], + [{ }], + [{ }], + [null], + [{ k1: null, k3: null }], ]) }, })