From ed3b525a27e0abf8f4a170adb5a401ae4eff9759 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Thu, 23 May 2024 18:07:36 -0700 Subject: [PATCH] Fix nested optional from duckdb#3734 :duck: --- src/column.js | 2 +- test/files/duckdb3734.json | 102 ++++++++++++ test/files/duckdb3734.metadata.json | 231 ++++++++++++++++++++++++++++ test/files/duckdb3734.parquet | Bin 0 -> 1215 bytes 4 files changed, 334 insertions(+), 1 deletion(-) create mode 100644 test/files/duckdb3734.json create mode 100644 test/files/duckdb3734.metadata.json create mode 100644 test/files/duckdb3734.parquet diff --git a/src/column.js b/src/column.js index 4b1dbcb..e7b8592 100644 --- a/src/column.js +++ b/src/column.js @@ -64,7 +64,7 @@ export function readColumn(reader, rowGroup, columnMetadata, schemaPath, { compr // wrap nested flat data by depth for (let i = 2; i < schemaPath.length; i++) { if (schemaPath[i].element.repetition_type !== 'REQUIRED') { - values = [values] + values = Array.from(values, e => [e]) } } } diff --git a/test/files/duckdb3734.json b/test/files/duckdb3734.json new file mode 100644 index 0000000..f27184c --- /dev/null +++ b/test/files/duckdb3734.json @@ -0,0 +1,102 @@ +[ + [ + "tt0000001", + { + "category": "self", + "characters": ["[\"Self\"]"], + "job": "\\N", + "nconst": "nm1588970", + "ordering": 1 + } + ], + [ + "tt0000001", + { + "category": "director", + "characters": ["\\N"], + "job": "\\N", + "nconst": "nm0005690", + "ordering": 2 + } + ], + [ + "tt0000001", + { + "category": "cinematographer", + "characters": ["\\N"], + "job": "director of photography", + "nconst": "nm0374658", + "ordering": 3 + } + ], + [ + "tt0000002", + { + "category": "director", + "characters": ["\\N"], + "job": "\\N", + "nconst": "nm0721526", + "ordering": 1 + } + ], + [ + "tt0000002", + { + "category": "composer", + "characters": ["\\N"], + "job": "\\N", + "nconst": "nm1335271", + "ordering": 2 + } + ], + [ + "tt0000003", + { + "category": "director", + "characters": ["\\N"], + "job": "\\N", + "nconst": "nm0721526", + "ordering": 1 + } + ], + [ + "tt0000003", + { + "category": "producer", + "characters": ["\\N"], + "job": "producer", + "nconst": "nm1770680", + "ordering": 2 + } + ], + [ + "tt0000003", + { + "category": "composer", + "characters": ["\\N"], + "job": "\\N", + "nconst": "nm1335271", + "ordering": 3 + } + ], + [ + "tt0000003", + { + "category": "editor", + "characters": ["\\N"], + "job": "\\N", + "nconst": "nm5442200", + "ordering": 4 + } + ], + [ + "tt0000004", + { + "category": "director", + "characters": ["\\N"], + "job": "\\N", + "nconst": "nm0721526", + "ordering": 1 + } + ] +] diff --git a/test/files/duckdb3734.metadata.json b/test/files/duckdb3734.metadata.json new file mode 100644 index 0000000..bfaa7e9 --- /dev/null +++ b/test/files/duckdb3734.metadata.json @@ -0,0 +1,231 @@ +{ + "version": 1, + "schema": [ + { + "repetition_type": "REQUIRED", + "name": "duckdb_schema", + "num_children": 2 + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "tconst", + "num_children": 0, + "converted_type": "UTF8" + }, + { + "repetition_type": "OPTIONAL", + "name": "principals", + "num_children": 5 + }, + { + "type": "INT32", + "repetition_type": "OPTIONAL", + "name": "ordering", + "num_children": 0, + "converted_type": "INT_32" + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "nconst", + "num_children": 0, + "converted_type": "UTF8" + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "category", + "num_children": 0, + "converted_type": "UTF8" + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "job", + "num_children": 0, + "converted_type": "UTF8" + }, + { + "repetition_type": "OPTIONAL", + "name": "characters", + "num_children": 1, + "converted_type": "LIST" + }, + { + "repetition_type": "REPEATED", + "name": "list", + "num_children": 1 + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "element", + "num_children": 0, + "converted_type": "UTF8" + } + ], + "num_rows": 10, + "row_groups": [ + { + "columns": [ + { + "file_offset": 0, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "PLAIN", + "RLE_DICTIONARY" + ], + "path_in_schema": [ + "tconst" + ], + "codec": "SNAPPY", + "num_values": 10, + "total_uncompressed_size": 0, + "total_compressed_size": 83, + "data_page_offset": 4, + "statistics": { + "max": "tt0000004", + "min": "tt0000001", + "null_count": 0, + "max_value": "tt0000004", + "min_value": "tt0000001" + } + } + }, + { + "file_offset": 0, + "meta_data": { + "type": "INT32", + "encodings": [ + "PLAIN" + ], + "path_in_schema": [ + "principals", + "ordering" + ], + "codec": "SNAPPY", + "num_values": 10, + "total_uncompressed_size": 0, + "total_compressed_size": 53, + "data_page_offset": 87, + "statistics": { + "max": 4, + "min": 1, + "null_count": 0, + "max_value": 4, + "min_value": 1 + } + } + }, + { + "file_offset": 0, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "PLAIN" + ], + "path_in_schema": [ + "principals", + "nconst" + ], + "codec": "SNAPPY", + "num_values": 10, + "total_uncompressed_size": 0, + "total_compressed_size": 116, + "data_page_offset": 140, + "statistics": { + "max": "nm5442200", + "min": "nm0005690", + "null_count": 0, + "max_value": "nm5442200", + "min_value": "nm0005690" + } + } + }, + { + "file_offset": 0, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "PLAIN" + ], + "path_in_schema": [ + "principals", + "category" + ], + "codec": "SNAPPY", + "num_values": 10, + "total_uncompressed_size": 0, + "total_compressed_size": 121, + "data_page_offset": 256, + "statistics": { + "max": "self", + "min": "cinematographer", + "null_count": 0, + "max_value": "self", + "min_value": "cinematographer" + } + } + }, + { + "file_offset": 0, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "PLAIN" + ], + "path_in_schema": [ + "principals", + "job" + ], + "codec": "SNAPPY", + "num_values": 10, + "total_uncompressed_size": 0, + "total_compressed_size": 90, + "data_page_offset": 377, + "statistics": { + "max": "producer", + "min": "\\N", + "null_count": 0, + "max_value": "producer", + "min_value": "\\N" + } + } + }, + { + "file_offset": 0, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "PLAIN" + ], + "path_in_schema": [ + "principals", + "characters", + "list", + "element" + ], + "codec": "SNAPPY", + "num_values": 10, + "total_uncompressed_size": 0, + "total_compressed_size": 50, + "data_page_offset": 467, + "statistics": { + "max": "\\N", + "min": "[\"Self\"]", + "max_value": "\\N", + "min_value": "[\"Self\"]" + } + } + } + ], + "total_byte_size": 0, + "num_rows": 10, + "file_offset": 4 + } + ], + "created_by": "DuckDB", + "metadata_length": 690 +} diff --git a/test/files/duckdb3734.parquet b/test/files/duckdb3734.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4a27b9fdd76b34c79f3666b0442cf45c4e8b5b19 GIT binary patch literal 1215 zcmZ{kzi-n}5XbMmIIbPVYJ>d3OI1OMTGOG5oF9%;DFa0vI#f{wY@|Y|<2ETt964s_ z1S1n-WMJ-u5JF5049xr?%>4rp_x#+%ty)XA-@Dv>?%v%uKHPm)WeOXx{R5^k0BCR> zpHXCI2~;ZvKn)*_dSbN2)nLFLu<|xD*$Pvc3b5Rw7-W)+3JTFE&7en@?F$c(9+i#Q zgA(*3xqJp+g?vHV77In#@J8Q{l>H{Du=nIZ#)5-~*JGwf!pGv$QoUBg=L6Y@ce^gOnTkvEmEQ3QF z-t8dyGht6sUF=9OWK~-cF)z!{@m%F2at{;zQi0LZ7YDe3HNWm3znXaiJRnnmiWkJv z+FVBo#}^9vYFRD9)_jw;V^0K*mKc<_QYT!TOrp=MucXZtp$x7F$s#bdOECGH7&YmJ zg#0DeCyE75#)%-sQp(vD1>R=Ai@a#7WM2~Noq`n#-SXudN)^T4`omrHctR5VgIJf@ zHrKA?#%Z^MyleY3W