From 36d8ea2e1dc2a673b0b646f9ea5e8e8fad21ad41 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Fri, 7 Feb 2025 18:52:48 -0800 Subject: [PATCH] Fix handling of signed decimals (#60) --- eslint.config.js | 1 + src/convert.js | 16 ++- src/metadata.js | 2 +- test/convert.test.js | 38 ++++++- test/files/decimal-column.json | 22 ++++ test/files/decimal-column.metadata.json | 131 ++++++++++++++++++++++++ test/files/decimal-column.parquet | Bin 0 -> 2177 bytes 7 files changed, 202 insertions(+), 8 deletions(-) create mode 100644 test/files/decimal-column.json create mode 100644 test/files/decimal-column.metadata.json create mode 100644 test/files/decimal-column.parquet diff --git a/eslint.config.js b/eslint.config.js index 4f4a3b0..8987d34 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -58,6 +58,7 @@ export default [ object: true, array: false, }], + 'prefer-exponentiation-operator': 'error', 'prefer-promise-reject-errors': 'error', quotes: ['error', 'single'], 'require-await': 'warn', diff --git a/src/convert.js b/src/convert.js index 423a8be..3db468e 100644 --- a/src/convert.js +++ b/src/convert.js @@ -41,7 +41,7 @@ export function convert(data, schemaElement, utf8 = true) { const ctype = schemaElement.converted_type if (ctype === 'DECIMAL') { const scale = schemaElement.scale || 0 - const factor = Math.pow(10, -scale) + const factor = 10 ** -scale const arr = new Array(data.length) for (let i = 0; i < arr.length; i++) { if (data[0] instanceof Uint8Array) { @@ -123,11 +123,17 @@ export function convert(data, schemaElement, utf8 = true) { * @returns {number} */ export function parseDecimal(bytes) { - // TODO: handle signed let value = 0 for (const byte of bytes) { - value = value << 8 | byte + value = value * 256 + byte } + + // handle signed + const bits = bytes.length * 8 + if (value >= 2 ** (bits - 1)) { + value -= 2 ** bits + } + return value } @@ -152,7 +158,7 @@ export function parseFloat16(bytes) { const sign = int16 >> 15 ? -1 : 1 const exp = int16 >> 10 & 0x1f const frac = int16 & 0x3ff - if (exp === 0) return sign * Math.pow(2, -14) * (frac / 1024) // subnormals + if (exp === 0) return sign * 2 ** -14 * (frac / 1024) // subnormals if (exp === 0x1f) return frac ? NaN : sign * Infinity - return sign * Math.pow(2, exp - 15) * (1 + frac / 1024) + return sign * 2 ** (exp - 15) * (1 + frac / 1024) } diff --git a/src/metadata.js b/src/metadata.js index 5c59ca5..48d1f37 100644 --- a/src/metadata.js +++ b/src/metadata.js @@ -284,7 +284,7 @@ export function convertMetadata(value, schema) { if (type === 'INT64' && logical_type?.type === 'TIMESTAMP') return new Date(Number(view.getBigInt64(0, true))) if (type === 'INT32' && view.byteLength === 4) return view.getInt32(0, true) if (type === 'INT64' && view.byteLength === 8) return view.getBigInt64(0, true) - if (converted_type === 'DECIMAL') return parseDecimal(value) * Math.pow(10, -(schema.scale || 0)) + if (converted_type === 'DECIMAL') return parseDecimal(value) * 10 ** -(schema.scale || 0) if (logical_type?.type === 'FLOAT16') return parseFloat16(value) if (type === 'FIXED_LEN_BYTE_ARRAY') return value // assert(false) diff --git a/test/convert.test.js b/test/convert.test.js index 5491aba..d5ced98 100644 --- a/test/convert.test.js +++ b/test/convert.test.js @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest' -import { convert, parseFloat16 } from '../src/convert.js' +import { convert, parseDecimal, parseFloat16 } from '../src/convert.js' /** * @import {SchemaElement} from '../src/types.js' @@ -71,6 +71,13 @@ describe('convert function', () => { expect(convert(data, schemaElement)).toEqual([100, 200]) }) + it('converts byte array from issue #59 to DECIMAL', () => { + const data = [new Uint8Array([18, 83, 137, 151, 156, 0])] + /** @type {SchemaElement} */ + const schemaElement = { name, converted_type: 'DECIMAL', scale: 10, precision: 14 } + expect(convert(data, schemaElement)).toEqual([2015]) + }) + it('converts epoch time to DATE', () => { const data = [1, 2] // days since epoch /** @type {SchemaElement} */ @@ -180,6 +187,33 @@ describe('parseFloat16', () => { it('convert float16 subnormal number', () => { expect(parseFloat16(new Uint8Array([0xff, 0x03]))) - .toBeCloseTo(Math.pow(2, -14) * (1023 / 1024), 5) + .toBeCloseTo(2 ** -14 * (1023 / 1024), 5) + }) +}) + +describe('parseDecimal', () => { + it('should return 0 for an empty Uint8Array', () => { + const result = parseDecimal(new Uint8Array()) + expect(result).toBe(0) + }) + + it('should parse a single byte', () => { + const result = parseDecimal(new Uint8Array([42])) + expect(result).toBe(42) + }) + + it('should parse two bytes in big-endian order', () => { + const result = parseDecimal(new Uint8Array([1, 0])) + expect(result).toBe(256) + }) + + it('should parse three bytes', () => { + const result = parseDecimal(new Uint8Array([1, 2, 3])) + expect(result).toBe(66051) + }) + + it('should parse -1 as a 32-bit number', () => { + const result = parseDecimal(new Uint8Array([255, 255, 255, 255])) + expect(result).toBe(-1) }) }) diff --git a/test/files/decimal-column.json b/test/files/decimal-column.json new file mode 100644 index 0000000..1e0ce52 --- /dev/null +++ b/test/files/decimal-column.json @@ -0,0 +1,22 @@ +[ + [ + 40, + 2015 + ], + [ + 74, + 2015 + ], + [ + 140, + 2015 + ], + [ + 152, + 2015 + ], + [ + 190, + 2015 + ] +] diff --git a/test/files/decimal-column.metadata.json b/test/files/decimal-column.metadata.json new file mode 100644 index 0000000..a5d3eaf --- /dev/null +++ b/test/files/decimal-column.metadata.json @@ -0,0 +1,131 @@ +{ + "version": 2, + "schema": [ + { + "repetition_type": "REQUIRED", + "name": "schema", + "num_children": 2 + }, + { + "type": "INT64", + "repetition_type": "OPTIONAL", + "name": "mid" + }, + { + "type": "FIXED_LEN_BYTE_ARRAY", + "type_length": 6, + "repetition_type": "OPTIONAL", + "name": "value", + "converted_type": "DECIMAL", + "scale": 10, + "precision": 14, + "logical_type": { + "type": "DECIMAL", + "scale": 10, + "precision": 14 + } + } + ], + "num_rows": 5, + "row_groups": [ + { + "columns": [ + { + "file_offset": 0, + "meta_data": { + "type": "INT64", + "encodings": [ + "PLAIN", + "RLE", + "RLE_DICTIONARY" + ], + "path_in_schema": [ + "mid" + ], + "codec": "SNAPPY", + "num_values": 5, + "total_uncompressed_size": 126, + "total_compressed_size": 120, + "data_page_offset": 50, + "dictionary_page_offset": 4, + "statistics": { + "max": 190, + "min": 40, + "null_count": 0, + "max_value": 190, + "min_value": 40 + }, + "encoding_stats": [ + { + "page_type": "DICTIONARY_PAGE", + "encoding": "PLAIN", + "count": 1 + }, + { + "page_type": "DATA_PAGE", + "encoding": "RLE_DICTIONARY", + "count": 1 + } + ] + } + }, + { + "file_offset": 0, + "meta_data": { + "type": "FIXED_LEN_BYTE_ARRAY", + "encodings": [ + "PLAIN", + "RLE", + "RLE_DICTIONARY" + ], + "path_in_schema": [ + "value" + ], + "codec": "SNAPPY", + "num_values": 5, + "total_uncompressed_size": 82, + "total_compressed_size": 86, + "data_page_offset": 146, + "dictionary_page_offset": 124, + "statistics": { + "max": 2015, + "min": 2015, + "null_count": 0, + "max_value": 2015, + "min_value": 2015 + }, + "encoding_stats": [ + { + "page_type": "DICTIONARY_PAGE", + "encoding": "PLAIN", + "count": 1 + }, + { + "page_type": "DATA_PAGE", + "encoding": "RLE_DICTIONARY", + "count": 1 + } + ] + } + } + ], + "total_byte_size": 208, + "num_rows": 5, + "file_offset": 4, + "total_compressed_size": 206, + "ordinal": 0 + } + ], + "key_value_metadata": [ + { + "key": "pandas", + "value": "{\"index_columns\": [{\"kind\": \"range\", \"name\": null, \"start\": 0, \"stop\": 5, \"step\": 1}], \"column_indexes\": [{\"name\": null, \"field_name\": null, \"pandas_type\": \"unicode\", \"numpy_type\": \"object\", \"metadata\": {\"encoding\": \"UTF-8\"}}], \"columns\": [{\"name\": \"mid\", \"field_name\": \"mid\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metadata\": null}, {\"name\": \"value\", \"field_name\": \"value\", \"pandas_type\": \"decimal\", \"numpy_type\": \"object\", \"metadata\": {\"precision\": 14, \"scale\": 10}}], \"creator\": {\"library\": \"pyarrow\", \"version\": \"19.0.0\"}, \"pandas_version\": \"2.2.3\"}" + }, + { + "key": "ARROW:schema", + "value": "/////xgDAAAQAAAAAAAKAA4ABgAFAAgACgAAAAABBAAQAAAAAAAKAAwAAAAEAAgACgAAAGACAAAEAAAAAQAAAAQAAACA/f//QAIAAAQAAAAyAgAAeyJpbmRleF9jb2x1bW5zIjogW3sia2luZCI6ICJyYW5nZSIsICJuYW1lIjogbnVsbCwgInN0YXJ0IjogMCwgInN0b3AiOiA1LCAic3RlcCI6IDF9XSwgImNvbHVtbl9pbmRleGVzIjogW3sibmFtZSI6IG51bGwsICJmaWVsZF9uYW1lIjogbnVsbCwgInBhbmRhc190eXBlIjogInVuaWNvZGUiLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAibWV0YWRhdGEiOiB7ImVuY29kaW5nIjogIlVURi04In19XSwgImNvbHVtbnMiOiBbeyJuYW1lIjogIm1pZCIsICJmaWVsZF9uYW1lIjogIm1pZCIsICJwYW5kYXNfdHlwZSI6ICJpbnQ2NCIsICJudW1weV90eXBlIjogImludDY0IiwgIm1ldGFkYXRhIjogbnVsbH0sIHsibmFtZSI6ICJ2YWx1ZSIsICJmaWVsZF9uYW1lIjogInZhbHVlIiwgInBhbmRhc190eXBlIjogImRlY2ltYWwiLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAibWV0YWRhdGEiOiB7InByZWNpc2lvbiI6IDE0LCAic2NhbGUiOiAxMH19XSwgImNyZWF0b3IiOiB7ImxpYnJhcnkiOiAicHlhcnJvdyIsICJ2ZXJzaW9uIjogIjE5LjAuMCJ9LCAicGFuZGFzX3ZlcnNpb24iOiAiMi4yLjMifQAABgAAAHBhbmRhcwAAAgAAAFAAAAAEAAAAyP///wAAAQcQAAAAIAAAAAQAAAAAAAAABQAAAHZhbHVlAAAACAAMAAQACAAIAAAADgAAAAoAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQIQAAAAHAAAAAQAAAAAAAAAAwAAAG1pZAAIAAwACAAHAAgAAAAAAAABQAAAAAAAAAA=" + } + ], + "created_by": "parquet-cpp-arrow version 19.0.0", + "metadata_length": 1959 +} diff --git a/test/files/decimal-column.parquet b/test/files/decimal-column.parquet new file mode 100644 index 0000000000000000000000000000000000000000..aa514716ea67e1d75f4d94cff1ade174a11071e5 GIT binary patch literal 2177 zcmbVO&2QpH6dy>+61rVQQH-F<0a@B82a?!)q-C|0I*@l3pO8# za&M*H+ru7uSgDsi^$+OZ)MF1l^v&2|AxRJINO<17_vZJ0@69}$Gu+~tb>@uu@|1}& zXbYje^*yu^LbZ)&>*%LXqOX5J{`e!98N|eyPt)MN$wZjQP9l15NksPopE%lkLJP)^ z_rmauh1S-7C?kmboY_2O!ZdDVd+YqipWh92xh>3paH~h zKR=P{W8CIy61Y9<#4f-`uj-paY2= z$#zd?(<#=LEgclLXPU4YyYk3|B}e6<17eUwv4|o{1dz7PfSYfeHFOeW6x=RC>ATgF+q{o34>Ib1TT{SG(d>rgwQg%kL zVQdU-Qu!P?1ywdl@pvxCeWc6oa76248p=o>%}IHjxjY&TrzFlq9|Zs`UpU|nI2ICM z171tU%t7WL%if{HbKGjZ5nuV;lXxFJBxBY)#uzu@N5)qe=Wwxy%NX}?sYi9i;);Dr zlqU;&1(*CaJShr!lo2-w1nRxS(@8#uaZ)x6TG=9J==Y7@zFOex#+?P8mg#8zLcoB-eAXM)xCq)g#j zhI~mrg4w=k8C*`V`Bmf*&)M+epaumCnY503RP2{I~ zdnqT&^t5A%6AV&2-qb2(*x%|e&Y;STh3eQ?)UH&^NaBnS&g)VA1iRaZ`kNHbexinO zAEb=wO5*hY)zB`^yJEvpGv-7w$hn_z^d4jyeFe@1?!~NLUDOkN%bb!Gg8I)K$*%QP z`Ol>hokrr~2tzPG