hyparquet/src/thrift.js

253 lines
5.8 KiB
JavaScript
Raw Normal View History

2023-12-29 19:17:58 +00:00
// TCompactProtocol types
const CompactType = {
STOP: 0,
TRUE: 1,
FALSE: 2,
BYTE: 3,
I16: 4,
I32: 5,
I64: 6,
DOUBLE: 7,
BINARY: 8,
LIST: 9,
SET: 10,
MAP: 11,
STRUCT: 12,
UUID: 13,
}
/**
* Parse TCompactProtocol
2024-01-04 17:50:42 +00:00
*
* @import {DataReader} from '../src/types.d.ts'
2024-05-01 07:55:16 +00:00
* @param {DataReader} reader
* @returns {Record<string, any>}
2023-12-29 19:17:58 +00:00
*/
2024-05-01 07:55:16 +00:00
export function deserializeTCompactProtocol(reader) {
2023-12-29 19:17:58 +00:00
let lastFid = 0
2024-01-04 18:06:50 +00:00
/** @type {Record<string, any>} */
2024-01-04 18:25:45 +00:00
const value = {}
2023-12-29 19:17:58 +00:00
2024-05-01 07:55:16 +00:00
while (reader.offset < reader.view.byteLength) {
2023-12-29 19:17:58 +00:00
// Parse each field based on its type and add to the result object
2024-05-01 07:55:16 +00:00
const [type, fid, newLastFid] = readFieldBegin(reader, lastFid)
2023-12-29 19:17:58 +00:00
lastFid = newLastFid
if (type === CompactType.STOP) {
break
}
// Handle the field based on its type
2024-05-01 07:55:16 +00:00
value[`field_${fid}`] = readElement(reader, type)
2023-12-29 19:17:58 +00:00
}
2024-05-01 07:55:16 +00:00
return value
2023-12-29 19:17:58 +00:00
}
/**
* Read a single element based on its type
2024-01-04 17:50:42 +00:00
*
2024-05-01 07:55:16 +00:00
* @param {DataReader} reader
2024-01-04 17:50:42 +00:00
* @param {number} type
2024-05-01 07:55:16 +00:00
* @returns {any} value
2023-12-29 19:17:58 +00:00
*/
2024-05-01 07:55:16 +00:00
function readElement(reader, type) {
2023-12-29 19:17:58 +00:00
switch (type) {
case CompactType.TRUE:
2024-05-01 07:55:16 +00:00
return true
2023-12-29 19:17:58 +00:00
case CompactType.FALSE:
2024-05-01 07:55:16 +00:00
return false
2023-12-29 19:17:58 +00:00
case CompactType.BYTE:
// read byte directly
2024-05-01 07:55:16 +00:00
return reader.view.getInt8(reader.offset++)
2023-12-29 19:17:58 +00:00
case CompactType.I16:
case CompactType.I32:
2024-05-01 07:55:16 +00:00
return readZigZag(reader)
2023-12-29 19:17:58 +00:00
case CompactType.I64:
2024-05-01 07:55:16 +00:00
return readZigZagBigInt(reader)
case CompactType.DOUBLE: {
const value = reader.view.getFloat64(reader.offset, true)
reader.offset += 8
return value
}
2023-12-29 19:17:58 +00:00
case CompactType.BINARY: {
2024-05-01 07:55:16 +00:00
const stringLength = readVarInt(reader)
const strBytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, stringLength)
reader.offset += stringLength
2024-05-04 07:38:19 +00:00
return strBytes
2023-12-29 19:17:58 +00:00
}
case CompactType.LIST: {
2024-05-01 07:55:16 +00:00
const [elemType, listSize] = readCollectionBegin(reader)
const boolType = elemType === CompactType.TRUE || elemType === CompactType.FALSE
2024-04-30 07:09:41 +00:00
const values = new Array(listSize)
2023-12-29 19:17:58 +00:00
for (let i = 0; i < listSize; i++) {
values[i] = boolType ? readElement(reader, CompactType.BYTE) === 1 : readElement(reader, elemType)
2023-12-29 19:17:58 +00:00
}
2024-05-01 07:55:16 +00:00
return values
2023-12-29 19:17:58 +00:00
}
case CompactType.STRUCT: {
2024-01-04 18:06:50 +00:00
/** @type {Record<string, any>} */
2024-01-04 18:25:45 +00:00
const structValues = {}
2023-12-29 19:17:58 +00:00
let structLastFid = 0
while (true) {
2024-05-01 07:55:16 +00:00
let structFieldType, structFid
[structFieldType, structFid, structLastFid] = readFieldBegin(reader, structLastFid)
2023-12-29 19:17:58 +00:00
if (structFieldType === CompactType.STOP) {
break
}
2024-05-01 07:55:16 +00:00
structValues[`field_${structFid}`] = readElement(reader, structFieldType)
2023-12-29 19:17:58 +00:00
}
2024-05-01 07:55:16 +00:00
return structValues
2023-12-29 19:17:58 +00:00
}
// TODO: MAP and SET
case CompactType.UUID: {
// Read 16 bytes to uuid string
let uuid = ''
for (let i = 0; i < 16; i++) {
2024-05-01 07:55:16 +00:00
uuid += reader.view.getUint8(reader.offset++).toString(16).padStart(2, '0')
2023-12-29 19:17:58 +00:00
}
2024-05-01 07:55:16 +00:00
return uuid
2023-12-29 19:17:58 +00:00
}
default:
2024-01-13 00:28:37 +00:00
throw new Error(`thrift unhandled type: ${type}`)
2023-12-29 19:17:58 +00:00
}
}
/**
* Var int, also known as Unsigned LEB128.
* Var ints take 1 to 5 bytes (int32) or 1 to 10 bytes (int64).
2024-05-28 20:59:06 +00:00
* Reads groups of 7 low bits until high bit is 0.
2024-01-04 17:50:42 +00:00
*
2024-05-01 07:55:16 +00:00
* @param {DataReader} reader
* @returns {number} value
2023-12-29 19:17:58 +00:00
*/
2024-05-01 07:55:16 +00:00
export function readVarInt(reader) {
2023-12-29 19:17:58 +00:00
let result = 0
let shift = 0
while (true) {
2024-05-01 07:55:16 +00:00
const byte = reader.view.getUint8(reader.offset++)
2023-12-29 19:17:58 +00:00
result |= (byte & 0x7f) << shift
2024-05-11 01:50:12 +00:00
if (!(byte & 0x80)) {
2024-05-01 07:55:16 +00:00
return result
2023-12-29 19:17:58 +00:00
}
shift += 7
}
}
/**
* Read a varint as a bigint.
2024-01-04 17:50:42 +00:00
*
2024-05-01 07:55:16 +00:00
* @param {DataReader} reader
* @returns {bigint} value
2023-12-29 19:17:58 +00:00
*/
2024-05-01 07:55:16 +00:00
function readVarBigInt(reader) {
2024-05-11 01:50:12 +00:00
let result = 0n
let shift = 0n
2023-12-29 19:17:58 +00:00
while (true) {
2024-05-11 01:50:12 +00:00
const byte = reader.view.getUint8(reader.offset++)
result |= BigInt(byte & 0x7f) << shift
if (!(byte & 0x80)) {
2024-05-01 07:55:16 +00:00
return result
2023-12-29 19:17:58 +00:00
}
2024-05-11 01:50:12 +00:00
shift += 7n
2023-12-29 19:17:58 +00:00
}
}
/**
* Values of type int32 and int64 are transformed to a zigzag int.
* A zigzag int folds positive and negative numbers into the positive number space.
2024-01-04 17:50:42 +00:00
*
2024-05-01 07:55:16 +00:00
* @param {DataReader} reader
* @returns {number} value
2023-12-29 19:17:58 +00:00
*/
2024-05-11 01:50:12 +00:00
function readZigZag(reader) {
2024-05-01 07:55:16 +00:00
const zigzag = readVarInt(reader)
2023-12-29 19:17:58 +00:00
// convert zigzag to int
2024-05-19 01:21:18 +00:00
return zigzag >>> 1 ^ -(zigzag & 1)
2023-12-29 19:17:58 +00:00
}
/**
* A zigzag int folds positive and negative numbers into the positive number space.
* This version returns a BigInt.
2024-01-04 17:50:42 +00:00
*
2024-05-01 07:55:16 +00:00
* @param {DataReader} reader
* @returns {bigint} value
2023-12-29 19:17:58 +00:00
*/
2024-05-11 01:50:12 +00:00
export function readZigZagBigInt(reader) {
2024-05-01 07:55:16 +00:00
const zigzag = readVarBigInt(reader)
2023-12-29 19:17:58 +00:00
// convert zigzag to int
2024-05-19 01:21:18 +00:00
return zigzag >> BigInt(1) ^ -(zigzag & BigInt(1))
2023-12-29 19:17:58 +00:00
}
/**
* Get thrift type from half a byte
2024-01-04 17:50:42 +00:00
*
* @param {number} byte
* @returns {number}
2023-12-29 19:17:58 +00:00
*/
2024-01-04 18:25:45 +00:00
function getCompactType(byte) {
2023-12-29 19:17:58 +00:00
return byte & 0x0f
}
/**
* Read field type and field id
2024-01-04 17:50:42 +00:00
*
2024-05-01 07:55:16 +00:00
* @param {DataReader} reader
2024-01-04 17:50:42 +00:00
* @param {number} lastFid
2024-05-01 07:55:16 +00:00
* @returns {[number, number, number]} [type, fid, newLastFid]
2023-12-29 19:17:58 +00:00
*/
2024-05-01 07:55:16 +00:00
function readFieldBegin(reader, lastFid) {
const type = reader.view.getUint8(reader.offset++)
2023-12-29 19:17:58 +00:00
if ((type & 0x0f) === CompactType.STOP) {
// STOP also ends a struct
2024-05-01 07:55:16 +00:00
return [0, 0, lastFid]
2023-12-29 19:17:58 +00:00
}
const delta = type >> 4
let fid // field id
if (delta) {
2023-12-29 19:17:58 +00:00
// add delta to last field id
fid = lastFid + delta
} else {
throw new Error('non-delta field id not supported')
2023-12-29 19:17:58 +00:00
}
2024-05-01 07:55:16 +00:00
return [getCompactType(type), fid, fid]
2023-12-29 19:17:58 +00:00
}
2024-01-04 17:50:42 +00:00
/**
* Read collection type and size
*
2024-05-01 07:55:16 +00:00
* @param {DataReader} reader
* @returns {[number, number]} [type, size]
2024-01-04 17:50:42 +00:00
*/
2024-05-01 07:55:16 +00:00
function readCollectionBegin(reader) {
const sizeType = reader.view.getUint8(reader.offset++)
2023-12-29 19:17:58 +00:00
const size = sizeType >> 4
const type = getCompactType(sizeType)
if (size === 15) {
2024-05-01 07:55:16 +00:00
const newSize = readVarInt(reader)
return [type, newSize]
2023-12-29 19:17:58 +00:00
}
2024-05-01 07:55:16 +00:00
return [type, size]
2023-12-29 19:17:58 +00:00
}
/**
* Convert int to varint. Outputs 1-5 bytes for int32.
2024-01-04 17:50:42 +00:00
*
* @param {number} n
* @returns {number[]}
2023-12-29 19:17:58 +00:00
*/
2024-01-04 18:25:45 +00:00
export function toVarInt(n) {
2023-12-29 19:17:58 +00:00
let idx = 0
const varInt = []
while (true) {
if ((n & ~0x7f) === 0) {
varInt[idx++] = n
break
} else {
2024-05-19 01:21:18 +00:00
varInt[idx++] = n & 0x7f | 0x80
2023-12-29 19:17:58 +00:00
n >>>= 7
}
}
return varInt
}