From 06578a9419f7675ad16ea3dc3d4026e5022e6ceb Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Thu, 23 May 2024 02:09:04 -0700 Subject: [PATCH] struct_strings.parquet --- src/assemble.js | 2 +- test/files/struct_strings.json | 4 ++ test/files/struct_strings.metadata.json | 90 ++++++++++++++++++++++++ test/files/struct_strings.parquet | Bin 0 -> 282 bytes 4 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 test/files/struct_strings.json create mode 100644 test/files/struct_strings.metadata.json create mode 100644 test/files/struct_strings.parquet diff --git a/src/assemble.js b/src/assemble.js index 8c70148..7eebebf 100644 --- a/src/assemble.js +++ b/src/assemble.js @@ -49,7 +49,7 @@ export function assembleLists( currentContainer = containerStack.at(-1) // Go deeper to end of definition level - while (currentDepth < repetitionPath.length - 2 && currentDefLevel < def) { + while (currentDepth < repetitionPath.length - 2 && (currentDefLevel < def || repetitionPath[currentDepth + 1] === 'REQUIRED')) { currentDepth++ if (repetitionPath[currentDepth] !== 'REQUIRED') { /** @type {any[]} */ diff --git a/test/files/struct_strings.json b/test/files/struct_strings.json new file mode 100644 index 0000000..26785a4 --- /dev/null +++ b/test/files/struct_strings.json @@ -0,0 +1,4 @@ +[ + [{ "f64_field": null, "str_field": "hello" }], + [{ "f64_field": 1.23, "str_field": null }] +] diff --git a/test/files/struct_strings.metadata.json b/test/files/struct_strings.metadata.json new file mode 100644 index 0000000..b4ab6dd --- /dev/null +++ b/test/files/struct_strings.metadata.json @@ -0,0 +1,90 @@ +{ + "version": 1, + "schema": [ + { + "name": "test", + "num_children": 1 + }, + { + "repetition_type": "REQUIRED", + "name": "inner", + "num_children": 2 + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "str_field", + "converted_type": "UTF8", + "logical_type": { + "type": "STRING" + } + }, + { + "type": "DOUBLE", + "repetition_type": "OPTIONAL", + "name": "f64_field" + } + ], + "num_rows": 2, + "row_groups": [ + { + "columns": [ + { + "file_offset": 4, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "RLE", + "PLAIN" + ], + "path_in_schema": [ + "inner", + "str_field" + ], + "codec": "SNAPPY", + "num_values": 2, + "total_uncompressed_size": 32, + "total_compressed_size": 34, + "key_value_metadata": [], + "data_page_offset": 4, + "statistics": { + "null_count": 1, + "distinct_count": 1 + } + } + }, + { + "file_offset": 38, + "meta_data": { + "type": "DOUBLE", + "encodings": [ + "RLE", + "PLAIN" + ], + "path_in_schema": [ + "inner", + "f64_field" + ], + "codec": "SNAPPY", + "num_values": 2, + "total_uncompressed_size": 31, + "total_compressed_size": 33, + "key_value_metadata": [], + "data_page_offset": 38, + "statistics": { + "null_count": 1, + "distinct_count": 1, + "max_value": 1.23, + "min_value": 1.23 + } + } + } + ], + "total_byte_size": 0, + "num_rows": 2 + } + ], + "key_value_metadata": [], + "created_by": "parquet-go", + "metadata_length": 203 +} diff --git a/test/files/struct_strings.parquet b/test/files/struct_strings.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7a0da50fc85150fb3fe8fa96bf331fb0209f9b06 GIT binary patch literal 282 zcmWG=3^EjD5S0^E(h+3=GT21f7#R31m>3usm>F4tWJYREPCigVMpOYw1)m961=BkB zhgBl$KHG~jN&0xOl%y7yh%zylGDxsy=H;apiLx+=@~ARNa2A&o#iwPa=A@`H_{aba z73BhnrkR;QL>a_bBxNLZ)Hq~Bc_dZY7$h|y`Vsm>nZ#Jc9K>8CdBiwu%$USLwyXKb zh;pGQhZ_i1ZYSmhRPF;-F2MnFDS`zwLJVj#gCvgxS3zP?VQFfKZhAh$X`pxjFjN3$ C4meN% literal 0 HcmV?d00001