From 5f4e2ffe598acaf4bce9bfdafe17ba1eefe4318a Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Tue, 13 Feb 2024 21:25:40 -0800 Subject: [PATCH] parquet-testing byte_array_decimal --- README.md | 3 +- src/toJson.js | 1 + test/files/README.md | 7 ++++ test/files/byte_array_decimal.json | 26 ++++++++++++ test/files/byte_array_decimal.metadata.json | 43 ++++++++++++++++++++ test/files/byte_array_decimal.parquet | Bin 0 -> 324 bytes test/files/rowgroups.metadata.json | 6 ++- test/metadata.test.js | 4 +- 8 files changed, 85 insertions(+), 5 deletions(-) create mode 100644 test/files/README.md create mode 100644 test/files/byte_array_decimal.json create mode 100644 test/files/byte_array_decimal.metadata.json create mode 100644 test/files/byte_array_decimal.parquet diff --git a/README.md b/README.md index 9366d74..15c6907 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,8 @@ Contributions are welcome! ## References - https://github.com/apache/parquet-format - - https://github.com/dask/fastparquet + - https://github.com/apache/parquet-testing - https://github.com/apache/thrift + - https://github.com/dask/fastparquet - https://github.com/google/snappy - https://github.com/zhipeng-jia/snappyjs diff --git a/src/toJson.js b/src/toJson.js index bf81685..11c1295 100644 --- a/src/toJson.js +++ b/src/toJson.js @@ -14,6 +14,7 @@ export function toJson(obj) { /** @type {Record} */ const newObj = {} for (const key of Object.keys(obj)) { + if (obj[key] === undefined) continue newObj[key] = toJson(obj[key]) } return newObj diff --git a/test/files/README.md b/test/files/README.md new file mode 100644 index 0000000..beb0bcb --- /dev/null +++ b/test/files/README.md @@ -0,0 +1,7 @@ +# Test Files License + +This directory contains binary test files from [apache/parquet-testing](https://github.com/apache/parquet-testing), under the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0). + +Copyright 2004 The Apache Software Foundation (http://www.apache.org/). + + diff --git a/test/files/byte_array_decimal.json b/test/files/byte_array_decimal.json new file mode 100644 index 0000000..7ce1ae6 --- /dev/null +++ b/test/files/byte_array_decimal.json @@ -0,0 +1,26 @@ +[ + [ 10000 ], + [ 20000 ], + [ 30000 ], + [ 40000 ], + [ 50000 ], + [ 60000 ], + [ 70000 ], + [ 80000 ], + [ 90000 ], + [ 100000 ], + [ 110000 ], + [ 120000 ], + [ 130000 ], + [ 140000 ], + [ 150000 ], + [ 160000 ], + [ 170000 ], + [ 180000 ], + [ 190000 ], + [ 200000 ], + [ 210000 ], + [ 220000 ], + [ 230000 ], + [ 240000 ] +] diff --git a/test/files/byte_array_decimal.metadata.json b/test/files/byte_array_decimal.metadata.json new file mode 100644 index 0000000..4abe4d0 --- /dev/null +++ b/test/files/byte_array_decimal.metadata.json @@ -0,0 +1,43 @@ +{ + "version": 1, + "created_by": "HVR 5.3.0/9 (linux_glibc2.5-x64-64bit)", + "metadata_length": 119, + "num_rows": 24, + "row_groups": [ + { + "columns": [ + { + "file_offset": 4, + "meta_data": { + "codec": "UNCOMPRESSED", + "data_page_offset": 4, + "encodings": [], + "num_values": 24, + "path_in_schema": [ "value" ], + "total_compressed_size": 168, + "total_uncompressed_size": 168, + "type": 6 + } + } + ], + "num_rows": 24, + "total_byte_size": 168 + } + ], + "schema": [ + { + "name": "schema", + "num_children": 1, + "repetition_type": "REQUIRED" + }, + { + "converted_type": "DECIMAL", + "field_id": 6, + "name": "value", + "precision": 4, + "repetition_type": "OPTIONAL", + "scale": 2, + "type": 6 + } + ] +} diff --git a/test/files/byte_array_decimal.parquet b/test/files/byte_array_decimal.parquet new file mode 100644 index 0000000000000000000000000000000000000000..798cb2aad4b8ed79528516c7190c84930060c3cb GIT binary patch literal 324 zcmWG=3^EjD5M9LtL^`4dKqi|g8v_Fq0|SErBO{PZ0Wld)fJjCiFgXECegTn85nyr; zh-6j(lPzHK1&Czv0h1d*B&!IRtN@dDKqQ+3m|OxP**U;u4w$?EA~{UJRXy z4pAPUdPx~c88r?WoW?MK3^ZWSQ1b{2QZUss)-%wzRM5!D%qy*kPtVCrN;c9n)vYiy T(KRzk$}G`jC { const result = parquetMetadata(arrayBuffer) const base = file.replace('.parquet', '') const expected = fileToJson(`test/files/${base}.metadata.json`) - expect(toJson(result)).containSubset(expected) + expect(toJson(result)).toEqual(expected) } }) @@ -54,7 +54,7 @@ describe('parquetMetadataAsync', () => { const result = await parquetMetadataAsync(asyncBuffer) const base = file.replace('.parquet', '') const expected = fileToJson(`test/files/${base}.metadata.json`) - expect(toJson(result)).containSubset(expected) + expect(toJson(result)).toEqual(expected) } })