parquet-testing byte_array_decimal

This commit is contained in:
Kenny Daniel 2024-02-13 21:25:40 -08:00
parent 054431c98e
commit 5f4e2ffe59
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
8 changed files with 85 additions and 5 deletions

@ -95,7 +95,8 @@ Contributions are welcome!
## References
- https://github.com/apache/parquet-format
- https://github.com/dask/fastparquet
- https://github.com/apache/parquet-testing
- https://github.com/apache/thrift
- https://github.com/dask/fastparquet
- https://github.com/google/snappy
- https://github.com/zhipeng-jia/snappyjs

@ -14,6 +14,7 @@ export function toJson(obj) {
/** @type {Record<string, unknown>} */
const newObj = {}
for (const key of Object.keys(obj)) {
if (obj[key] === undefined) continue
newObj[key] = toJson(obj[key])
}
return newObj

7
test/files/README.md Normal file

@ -0,0 +1,7 @@
# Test Files License
This directory contains binary test files from [apache/parquet-testing](https://github.com/apache/parquet-testing), under the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0).
Copyright 2004 The Apache Software Foundation (http://www.apache.org/).

@ -0,0 +1,26 @@
[
[ 10000 ],
[ 20000 ],
[ 30000 ],
[ 40000 ],
[ 50000 ],
[ 60000 ],
[ 70000 ],
[ 80000 ],
[ 90000 ],
[ 100000 ],
[ 110000 ],
[ 120000 ],
[ 130000 ],
[ 140000 ],
[ 150000 ],
[ 160000 ],
[ 170000 ],
[ 180000 ],
[ 190000 ],
[ 200000 ],
[ 210000 ],
[ 220000 ],
[ 230000 ],
[ 240000 ]
]

@ -0,0 +1,43 @@
{
"version": 1,
"created_by": "HVR 5.3.0/9 (linux_glibc2.5-x64-64bit)",
"metadata_length": 119,
"num_rows": 24,
"row_groups": [
{
"columns": [
{
"file_offset": 4,
"meta_data": {
"codec": "UNCOMPRESSED",
"data_page_offset": 4,
"encodings": [],
"num_values": 24,
"path_in_schema": [ "value" ],
"total_compressed_size": 168,
"total_uncompressed_size": 168,
"type": 6
}
}
],
"num_rows": 24,
"total_byte_size": 168
}
],
"schema": [
{
"name": "schema",
"num_children": 1,
"repetition_type": "REQUIRED"
},
{
"converted_type": "DECIMAL",
"field_id": 6,
"name": "value",
"precision": 4,
"repetition_type": "OPTIONAL",
"scale": 2,
"type": 6
}
]
}

Binary file not shown.

@ -77,10 +77,12 @@
],
"key_value_metadata": [
{
"key": "pandas"
"key": "pandas",
"value": "{\"index_columns\": [{\"kind\": \"range\", \"name\": null, \"start\": 0, \"stop\": 15, \"step\": 1}], \"column_indexes\": [{\"name\": null, \"field_name\": null, \"pandas_type\": \"unicode\", \"numpy_type\": \"object\", \"metadata\": {\"encoding\": \"UTF-8\"}}], \"columns\": [{\"name\": \"numbers\", \"field_name\": \"numbers\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metadata\": null}], \"creator\": {\"library\": \"pyarrow\", \"version\": \"14.0.2\"}, \"pandas_version\": \"2.1.4\"}"
},
{
"key": "ARROW:schema"
"key": "ARROW:schema",
"value": "/////2gCAAAQAAAAAAAKAA4ABgAFAAgACgAAAAABBAAQAAAAAAAKAAwAAAAEAAgACgAAAOgBAAAEAAAAAQAAAAwAAAAIAAwABAAIAAgAAAAIAAAAEAAAAAYAAABwYW5kYXMAALMBAAB7ImluZGV4X2NvbHVtbnMiOiBbeyJraW5kIjogInJhbmdlIiwgIm5hbWUiOiBudWxsLCAic3RhcnQiOiAwLCAic3RvcCI6IDE1LCAic3RlcCI6IDF9XSwgImNvbHVtbl9pbmRleGVzIjogW3sibmFtZSI6IG51bGwsICJmaWVsZF9uYW1lIjogbnVsbCwgInBhbmRhc190eXBlIjogInVuaWNvZGUiLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAibWV0YWRhdGEiOiB7ImVuY29kaW5nIjogIlVURi04In19XSwgImNvbHVtbnMiOiBbeyJuYW1lIjogIm51bWJlcnMiLCAiZmllbGRfbmFtZSI6ICJudW1iZXJzIiwgInBhbmRhc190eXBlIjogImludDY0IiwgIm51bXB5X3R5cGUiOiAiaW50NjQiLCAibWV0YWRhdGEiOiBudWxsfV0sICJjcmVhdG9yIjogeyJsaWJyYXJ5IjogInB5YXJyb3ciLCAidmVyc2lvbiI6ICIxNC4wLjIifSwgInBhbmRhc192ZXJzaW9uIjogIjIuMS40In0AAQAAABQAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQIQAAAAIAAAAAQAAAAAAAAABwAAAG51bWJlcnMACAAMAAgABwAIAAAAAAAAAUAAAAAAAAAA"
}
]
}

@ -13,7 +13,7 @@ describe('parquetMetadata', () => {
const result = parquetMetadata(arrayBuffer)
const base = file.replace('.parquet', '')
const expected = fileToJson(`test/files/${base}.metadata.json`)
expect(toJson(result)).containSubset(expected)
expect(toJson(result)).toEqual(expected)
}
})
@ -54,7 +54,7 @@ describe('parquetMetadataAsync', () => {
const result = await parquetMetadataAsync(asyncBuffer)
const base = file.replace('.parquet', '')
const expected = fileToJson(`test/files/${base}.metadata.json`)
expect(toJson(result)).containSubset(expected)
expect(toJson(result)).toEqual(expected)
}
})