Convert unsigned ints

This commit is contained in:
Kenny Daniel 2024-05-23 23:35:49 -07:00
parent 10b9b299d8
commit 2edc14b70e
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
6 changed files with 237 additions and 5 deletions

11
demo.js

@ -229,10 +229,13 @@ function renderTable(header, data) {
return table
}
function stringify(value) {
if (value === undefined) return ''
value = toJson(value)
function stringify(value, depth = 0) {
if (value === null) return depth ? 'null' : ''
if (value === undefined) return depth ? 'undefined' : ''
if (typeof value === 'bigint') return value.toString()
if (typeof value === 'string') return value
if (typeof value === 'object') return JSON.stringify(value)
if (Array.isArray(value)) return `[${value.map(v => stringify(v, depth + 1)).join(', ')}]`
if (value instanceof Date) return value.toISOString()
if (typeof value === 'object') return `{${Object.entries(value).map(([k, v]) => `${k}: ${stringify(v, depth + 1)}`).join(', ')}}`
return value
}

@ -66,7 +66,13 @@ export function convert(data, schemaElement, utf8 = true) {
}
return arr
}
// TODO: ctype UINT
if (ctype === 'UINT_64') {
const arr = new BigUint64Array(data.length)
for (let i = 0; i < arr.length; i++) {
arr[i] = BigInt(data[i])
}
return arr
}
const logicalType = schemaElement.logical_type?.type
if (logicalType === 'FLOAT16') {
return Array.from(data).map(parseFloat16)

1
src/types.d.ts vendored

@ -295,6 +295,7 @@ export type DecodedArray =
Uint8Array |
Int32Array |
BigInt64Array |
BigUint64Array |
Float32Array |
Float64Array |
any[]

4
test/files/unsigned.json Normal file

@ -0,0 +1,4 @@
[
[0,0,0,0],
[255,65535,4294967295,18446744073709552000]
]

@ -0,0 +1,218 @@
{
"version": 1,
"schema": [
{
"repetition_type": "REQUIRED",
"name": "schema",
"num_children": 4
},
{
"type": "INT32",
"repetition_type": "OPTIONAL",
"name": "utiny",
"converted_type": "UINT_8",
"logical_type": {
"type": "INTEGER",
"bitWidth": 8,
"isSigned": false
}
},
{
"type": "INT32",
"repetition_type": "OPTIONAL",
"name": "usmall",
"converted_type": "UINT_16",
"logical_type": {
"type": "INTEGER",
"bitWidth": 16,
"isSigned": false
}
},
{
"type": "INT64",
"repetition_type": "OPTIONAL",
"name": "uint"
},
{
"type": "INT64",
"repetition_type": "OPTIONAL",
"name": "ubig",
"converted_type": "UINT_64",
"logical_type": {
"type": "INTEGER",
"bitWidth": 64,
"isSigned": false
}
}
],
"num_rows": 2,
"row_groups": [
{
"columns": [
{
"file_offset": 72,
"meta_data": {
"type": "INT32",
"encodings": [
"PLAIN_DICTIONARY",
"PLAIN",
"RLE"
],
"path_in_schema": [
"utiny"
],
"codec": "SNAPPY",
"num_values": 2,
"total_uncompressed_size": 64,
"total_compressed_size": 68,
"data_page_offset": 28,
"dictionary_page_offset": 4,
"statistics": {
"null_count": 0,
"max_value": 255,
"min_value": 0
},
"encoding_stats": [
{
"page_type": "DICTIONARY_PAGE",
"encoding": "PLAIN_DICTIONARY",
"count": 1
},
{
"page_type": "DATA_PAGE",
"encoding": "PLAIN_DICTIONARY",
"count": 1
}
]
}
},
{
"file_offset": 207,
"meta_data": {
"type": "INT32",
"encodings": [
"PLAIN_DICTIONARY",
"PLAIN",
"RLE"
],
"path_in_schema": [
"usmall"
],
"codec": "SNAPPY",
"num_values": 2,
"total_uncompressed_size": 64,
"total_compressed_size": 68,
"data_page_offset": 163,
"dictionary_page_offset": 139,
"statistics": {
"null_count": 0,
"max_value": 65535,
"min_value": 0
},
"encoding_stats": [
{
"page_type": "DICTIONARY_PAGE",
"encoding": "PLAIN_DICTIONARY",
"count": 1
},
{
"page_type": "DATA_PAGE",
"encoding": "PLAIN_DICTIONARY",
"count": 1
}
]
}
},
{
"file_offset": 381,
"meta_data": {
"type": "INT64",
"encodings": [
"PLAIN_DICTIONARY",
"PLAIN",
"RLE"
],
"path_in_schema": [
"uint"
],
"codec": "SNAPPY",
"num_values": 2,
"total_uncompressed_size": 100,
"total_compressed_size": 104,
"data_page_offset": 309,
"dictionary_page_offset": 277,
"statistics": {
"max": 4294967295,
"min": 0,
"null_count": 0,
"max_value": 4294967295,
"min_value": 0
},
"encoding_stats": [
{
"page_type": "DICTIONARY_PAGE",
"encoding": "PLAIN_DICTIONARY",
"count": 1
},
{
"page_type": "DATA_PAGE",
"encoding": "PLAIN_DICTIONARY",
"count": 1
}
]
}
},
{
"file_offset": 561,
"meta_data": {
"type": "INT64",
"encodings": [
"PLAIN_DICTIONARY",
"PLAIN",
"RLE"
],
"path_in_schema": [
"ubig"
],
"codec": "SNAPPY",
"num_values": 2,
"total_uncompressed_size": 80,
"total_compressed_size": 84,
"data_page_offset": 509,
"dictionary_page_offset": 477,
"statistics": {
"null_count": 0,
"max_value": -1,
"min_value": 0
},
"encoding_stats": [
{
"page_type": "DICTIONARY_PAGE",
"encoding": "PLAIN_DICTIONARY",
"count": 1
},
{
"page_type": "DATA_PAGE",
"encoding": "PLAIN_DICTIONARY",
"count": 1
}
]
}
}
],
"total_byte_size": 308,
"num_rows": 2,
"file_offset": 4,
"total_compressed_size": 324,
"ordinal": 0
}
],
"key_value_metadata": [
{
"key": "ARROW:schema",
"value": "/////wgBAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAQAAACkAAAAZAAAADQAAAAEAAAAfP///wAAAQIQAAAAGAAAAAQAAAAAAAAABAAAAHViaWcAAAAAbv///0AAAACo////AAABAhAAAAAYAAAABAAAAAAAAAAEAAAAdWludAAAAACa////IAAAANT///8AAAECEAAAABgAAAAEAAAAAAAAAAYAAAB1c21hbGwAAMb///8QAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAECEAAAABwAAAAEAAAAAAAAAAUAAAB1dGlueQAGAAgABAAGAAAACAAAAAAAAAA="
}
],
"created_by": "parquet-cpp-arrow version 6.0.1",
"metadata_length": 851
}

BIN
test/files/unsigned.parquet Normal file

Binary file not shown.