diff --git a/README.md b/README.md index 9366d74..15c6907 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,8 @@ Contributions are welcome! ## References - https://github.com/apache/parquet-format - - https://github.com/dask/fastparquet + - https://github.com/apache/parquet-testing - https://github.com/apache/thrift + - https://github.com/dask/fastparquet - https://github.com/google/snappy - https://github.com/zhipeng-jia/snappyjs diff --git a/src/toJson.js b/src/toJson.js index bf81685..11c1295 100644 --- a/src/toJson.js +++ b/src/toJson.js @@ -14,6 +14,7 @@ export function toJson(obj) { /** @type {Record} */ const newObj = {} for (const key of Object.keys(obj)) { + if (obj[key] === undefined) continue newObj[key] = toJson(obj[key]) } return newObj diff --git a/test/files/README.md b/test/files/README.md new file mode 100644 index 0000000..beb0bcb --- /dev/null +++ b/test/files/README.md @@ -0,0 +1,7 @@ +# Test Files License + +This directory contains binary test files from [apache/parquet-testing](https://github.com/apache/parquet-testing), under the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0). + +Copyright 2004 The Apache Software Foundation (http://www.apache.org/). + + diff --git a/test/files/byte_array_decimal.json b/test/files/byte_array_decimal.json new file mode 100644 index 0000000..7ce1ae6 --- /dev/null +++ b/test/files/byte_array_decimal.json @@ -0,0 +1,26 @@ +[ + [ 10000 ], + [ 20000 ], + [ 30000 ], + [ 40000 ], + [ 50000 ], + [ 60000 ], + [ 70000 ], + [ 80000 ], + [ 90000 ], + [ 100000 ], + [ 110000 ], + [ 120000 ], + [ 130000 ], + [ 140000 ], + [ 150000 ], + [ 160000 ], + [ 170000 ], + [ 180000 ], + [ 190000 ], + [ 200000 ], + [ 210000 ], + [ 220000 ], + [ 230000 ], + [ 240000 ] +] diff --git a/test/files/byte_array_decimal.metadata.json b/test/files/byte_array_decimal.metadata.json new file mode 100644 index 0000000..4abe4d0 --- /dev/null +++ b/test/files/byte_array_decimal.metadata.json @@ -0,0 +1,43 @@ +{ + "version": 1, + "created_by": "HVR 5.3.0/9 (linux_glibc2.5-x64-64bit)", + "metadata_length": 119, + "num_rows": 24, + "row_groups": [ + { + "columns": [ + { + "file_offset": 4, + "meta_data": { + "codec": "UNCOMPRESSED", + "data_page_offset": 4, + "encodings": [], + "num_values": 24, + "path_in_schema": [ "value" ], + "total_compressed_size": 168, + "total_uncompressed_size": 168, + "type": 6 + } + } + ], + "num_rows": 24, + "total_byte_size": 168 + } + ], + "schema": [ + { + "name": "schema", + "num_children": 1, + "repetition_type": "REQUIRED" + }, + { + "converted_type": "DECIMAL", + "field_id": 6, + "name": "value", + "precision": 4, + "repetition_type": "OPTIONAL", + "scale": 2, + "type": 6 + } + ] +} diff --git a/test/files/byte_array_decimal.parquet b/test/files/byte_array_decimal.parquet new file mode 100644 index 0000000..798cb2a Binary files /dev/null and b/test/files/byte_array_decimal.parquet differ diff --git a/test/files/rowgroups.metadata.json b/test/files/rowgroups.metadata.json index 7588211..1560d55 100644 --- a/test/files/rowgroups.metadata.json +++ b/test/files/rowgroups.metadata.json @@ -77,10 +77,12 @@ ], "key_value_metadata": [ { - "key": "pandas" + "key": "pandas", + "value": "{\"index_columns\": [{\"kind\": \"range\", \"name\": null, \"start\": 0, \"stop\": 15, \"step\": 1}], \"column_indexes\": [{\"name\": null, \"field_name\": null, \"pandas_type\": \"unicode\", \"numpy_type\": \"object\", \"metadata\": {\"encoding\": \"UTF-8\"}}], \"columns\": [{\"name\": \"numbers\", \"field_name\": \"numbers\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metadata\": null}], \"creator\": {\"library\": \"pyarrow\", \"version\": \"14.0.2\"}, \"pandas_version\": \"2.1.4\"}" }, { - "key": "ARROW:schema" + "key": "ARROW:schema", + "value": "/////2gCAAAQAAAAAAAKAA4ABgAFAAgACgAAAAABBAAQAAAAAAAKAAwAAAAEAAgACgAAAOgBAAAEAAAAAQAAAAwAAAAIAAwABAAIAAgAAAAIAAAAEAAAAAYAAABwYW5kYXMAALMBAAB7ImluZGV4X2NvbHVtbnMiOiBbeyJraW5kIjogInJhbmdlIiwgIm5hbWUiOiBudWxsLCAic3RhcnQiOiAwLCAic3RvcCI6IDE1LCAic3RlcCI6IDF9XSwgImNvbHVtbl9pbmRleGVzIjogW3sibmFtZSI6IG51bGwsICJmaWVsZF9uYW1lIjogbnVsbCwgInBhbmRhc190eXBlIjogInVuaWNvZGUiLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAibWV0YWRhdGEiOiB7ImVuY29kaW5nIjogIlVURi04In19XSwgImNvbHVtbnMiOiBbeyJuYW1lIjogIm51bWJlcnMiLCAiZmllbGRfbmFtZSI6ICJudW1iZXJzIiwgInBhbmRhc190eXBlIjogImludDY0IiwgIm51bXB5X3R5cGUiOiAiaW50NjQiLCAibWV0YWRhdGEiOiBudWxsfV0sICJjcmVhdG9yIjogeyJsaWJyYXJ5IjogInB5YXJyb3ciLCAidmVyc2lvbiI6ICIxNC4wLjIifSwgInBhbmRhc192ZXJzaW9uIjogIjIuMS40In0AAQAAABQAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQIQAAAAIAAAAAQAAAAAAAAABwAAAG51bWJlcnMACAAMAAgABwAIAAAAAAAAAUAAAAAAAAAA" } ] } diff --git a/test/metadata.test.js b/test/metadata.test.js index 54c1e2a..6c0eb7b 100644 --- a/test/metadata.test.js +++ b/test/metadata.test.js @@ -13,7 +13,7 @@ describe('parquetMetadata', () => { const result = parquetMetadata(arrayBuffer) const base = file.replace('.parquet', '') const expected = fileToJson(`test/files/${base}.metadata.json`) - expect(toJson(result)).containSubset(expected) + expect(toJson(result)).toEqual(expected) } }) @@ -54,7 +54,7 @@ describe('parquetMetadataAsync', () => { const result = await parquetMetadataAsync(asyncBuffer) const base = file.replace('.parquet', '') const expected = fileToJson(`test/files/${base}.metadata.json`) - expect(toJson(result)).containSubset(expected) + expect(toJson(result)).toEqual(expected) } })