Return Decoded struct with bytes read

This commit is contained in:
Kenny Daniel 2024-01-02 17:16:33 -08:00
parent 15e4f5597f
commit a032785ea6
No known key found for this signature in database
GPG Key ID: 6A3C5E318BE71391
4 changed files with 37 additions and 19 deletions

@ -4,6 +4,7 @@
"eslint:recommended",
"plugin:@typescript-eslint/recommended"
],
"ignorePatterns": ["dist/"],
"plugins": ["import", "jsdoc"],
"rules": {
"@typescript-eslint/no-explicit-any": "warn",

@ -8,3 +8,11 @@
JavaScript parser for [Apache Parquet](https://parquet.apache.org) files.
Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval.
## References
- https://github.com/apache/parquet-format
- https://github.com/dask/fastparquet
- https://github.com/apache/thrift
- https://github.com/google/snappy
- https://github.com/zhipeng-jia/snappyjs

@ -1,3 +1,12 @@
/**
* Represents a decoded value, and includes the number of bytes read.
* This is used to read data from the file and advance a virtual file pointer.
*/
interface Decoded<T> {
value: T
byteLength: number
}
// TCompactProtocol types
const CompactType = {
STOP: 0,
@ -19,16 +28,16 @@ const CompactType = {
/**
* Parse TCompactProtocol
*/
export function deserializeTCompactProtocol(buffer: ArrayBuffer): [number, Record<string, any>] {
export function deserializeTCompactProtocol(buffer: ArrayBuffer): Decoded<Record<string, any>> {
const view = new DataView(buffer)
let index = 0
let byteLength = 0
let lastFid = 0
const result: Record<string, any> = {}
const value: Record<string, any> = {}
while (index < buffer.byteLength) {
while (byteLength < buffer.byteLength) {
// Parse each field based on its type and add to the result object
const [type, fid, newIndex, newLastFid] = readFieldBegin(view, index, lastFid)
index = newIndex
const [type, fid, newIndex, newLastFid] = readFieldBegin(view, byteLength, lastFid)
byteLength = newIndex
lastFid = newLastFid
if (type === CompactType.STOP) {
@ -37,11 +46,11 @@ export function deserializeTCompactProtocol(buffer: ArrayBuffer): [number, Recor
// Handle the field based on its type
let fieldValue
[fieldValue, index] = readElement(view, type, index)
result[`field_${fid}`] = fieldValue
[fieldValue, byteLength] = readElement(view, type, byteLength)
value[`field_${fid}`] = fieldValue
}
return [ index, result ]
return { value, byteLength }
}
/**

@ -63,18 +63,18 @@ describe('deserializeTCompactProtocol function', () => {
// Mark the end of the structure
view.setUint8(index, 0x00) // STOP field
const [bufferLength, result] = deserializeTCompactProtocol(buffer)
expect(bufferLength).toBe(index + 1)
const { byteLength, value } = deserializeTCompactProtocol(buffer)
expect(byteLength).toBe(index + 1)
// Assertions for each basic type
expect(result.field_1).toBe(true) // TRUE
expect(result.field_2).toBe(false) // FALSE
expect(result.field_3).toBe(0x7f) // BYTE
expect(result.field_4).toBe(0x7fff) // I16
expect(result.field_5).toBe(0x7fffffff) // I32
expect(result.field_6).toBe(BigInt('0x7fffffffffffffff')) // I64
expect(result.field_7).toBeCloseTo(123.456) // DOUBLE
expect(result.field_8).toBe('Hello, Thrift!') // STRING
expect(value.field_1).toBe(true) // TRUE
expect(value.field_2).toBe(false) // FALSE
expect(value.field_3).toBe(0x7f) // BYTE
expect(value.field_4).toBe(0x7fff) // I16
expect(value.field_5).toBe(0x7fffffff) // I32
expect(value.field_6).toBe(BigInt('0x7fffffffffffffff')) // I64
expect(value.field_7).toBeCloseTo(123.456) // DOUBLE
expect(value.field_8).toBe('Hello, Thrift!') // STRING
})
})