hyparquet/src/convert.js

60 lines
1.7 KiB
JavaScript
Raw Normal View History

2024-05-02 06:23:50 +00:00
const dayMillis = 86400000000000 // 1 day in milliseconds
2024-02-26 20:20:48 +00:00
/**
* Convert known types from primitive to rich.
*
2024-05-02 06:23:50 +00:00
* @typedef {import('./types.js').DecodedArray} DecodedArray
* @param {DecodedArray} data series of primitive types
2024-04-28 22:58:25 +00:00
* @param {import('./types.js').SchemaElement} schemaElement schema element for the data
2024-05-02 06:23:50 +00:00
* @returns {DecodedArray} series of rich types
2024-02-26 20:20:48 +00:00
*/
export function convert(data, schemaElement) {
2024-05-02 06:23:50 +00:00
if (!Array.isArray(data)) return data
2024-02-26 20:20:48 +00:00
const ctype = schemaElement.converted_type
if (ctype === 'UTF8') {
const decoder = new TextDecoder()
2024-05-06 00:51:31 +00:00
return data.map(v => v && decoder.decode(v))
2024-02-26 20:20:48 +00:00
}
if (ctype === 'DECIMAL') {
const scaleFactor = schemaElement.scale ? Math.pow(10, schemaElement.scale) : 1
if (typeof data[0] === 'number') {
return scaleFactor === 1 ? data : data.map(v => v * scaleFactor)
} else if (typeof data[0] === 'bigint') {
return scaleFactor === 1 ? data : data.map(v => v * BigInt(scaleFactor))
} else {
return data.map(v => parseDecimal(v) * scaleFactor)
}
}
if (ctype === 'DATE') {
return data.map(v => new Date(v * dayMillis))
}
if (ctype === 'TIME_MILLIS') {
return data.map(v => new Date(v))
}
if (ctype === 'JSON') {
return data.map(v => JSON.parse(v))
}
if (ctype === 'BSON') {
throw new Error('parquet bson not supported')
}
if (ctype === 'INTERVAL') {
throw new Error('parquet interval not supported')
}
return data
}
/**
* Parse decimal from byte array.
*
* @param {Uint8Array} bytes
* @returns {number}
*/
function parseDecimal(bytes) {
// TODO: handle signed
let value = 0
for (const byte of bytes) {
value = value << 8 | byte
}
return value
}