mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Fix thrift encoding when delta > 15
This commit is contained in:
parent
e68f992101
commit
a56c78de39
@ -55,12 +55,12 @@
|
||||
"hyparquet": "1.20.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/eslint-parser": "7.28.4",
|
||||
"@babel/eslint-parser": "7.28.5",
|
||||
"@types/node": "24.9.1",
|
||||
"@vitest/coverage-v8": "3.2.4",
|
||||
"@vitest/coverage-v8": "4.0.2",
|
||||
"eslint": "9.38.0",
|
||||
"eslint-plugin-jsdoc": "61.1.5",
|
||||
"eslint-plugin-jsdoc": "61.1.7",
|
||||
"typescript": "5.9.3",
|
||||
"vitest": "3.2.4"
|
||||
"vitest": "4.0.2"
|
||||
}
|
||||
}
|
||||
|
||||
@ -28,8 +28,13 @@ export function serializeTCompactProtocol(writer, data) {
|
||||
if (delta <= 0) {
|
||||
throw new Error(`thrift non-monotonic field ID: fid=${fid}, lastFid=${lastFid}`)
|
||||
}
|
||||
// High nibble = delta, low nibble = type
|
||||
writer.appendUint8(delta << 4 | type)
|
||||
// high nibble = delta, low nibble = type < 15 or zigzag
|
||||
if (delta <= 15) {
|
||||
writer.appendUint8(delta << 4 | type)
|
||||
} else {
|
||||
writer.appendUint8(type)
|
||||
writer.appendVarInt(fid << 1 ^ fid >> 15) // zigzag
|
||||
}
|
||||
|
||||
// Write the field content itself
|
||||
writeElement(writer, type, value)
|
||||
@ -137,7 +142,12 @@ function writeElement(writer, type, value) {
|
||||
if (delta <= 0) {
|
||||
throw new Error(`Non-monotonic fid in struct: fid=${fid}, lastFid=${lastFid}`)
|
||||
}
|
||||
writer.appendUint8(delta << 4 | t & 0x0f)
|
||||
if (delta <= 15) {
|
||||
writer.appendUint8(delta << 4 | t)
|
||||
} else {
|
||||
writer.appendUint8(t)
|
||||
writer.appendVarInt(fid << 1 ^ fid >> 15)
|
||||
}
|
||||
writeElement(writer, t, v)
|
||||
lastFid = fid
|
||||
}
|
||||
|
||||
@ -2,6 +2,7 @@ import { deserializeTCompactProtocol } from 'hyparquet/src/thrift.js'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { serializeTCompactProtocol } from '../src/thrift.js'
|
||||
import { ByteWriter } from '../src/bytewriter.js'
|
||||
import { logicalType } from '../src/metadata.js'
|
||||
|
||||
/**
|
||||
* Utility to decode a Thrift-serialized buffer and return the parsed object.
|
||||
@ -30,8 +31,7 @@ describe('serializeTCompactProtocol', () => {
|
||||
|
||||
const writer = new ByteWriter()
|
||||
serializeTCompactProtocol(writer, data)
|
||||
const buf = writer.buffer.slice(0, writer.offset)
|
||||
const result = roundTripDeserialize(buf)
|
||||
const result = roundTripDeserialize(writer.getBuffer())
|
||||
|
||||
expect(result.field_1).toBe(true)
|
||||
expect(result.field_2).toBe(false)
|
||||
@ -61,8 +61,7 @@ describe('serializeTCompactProtocol', () => {
|
||||
|
||||
const writer = new ByteWriter()
|
||||
serializeTCompactProtocol(writer, data)
|
||||
const buf = writer.buffer.slice(0, writer.offset)
|
||||
const result = roundTripDeserialize(buf)
|
||||
const result = roundTripDeserialize(writer.getBuffer())
|
||||
|
||||
expect(result.field_1.field_1).toBe(42)
|
||||
expect(result.field_1.field_2.field_1).toBe(true)
|
||||
@ -74,13 +73,12 @@ describe('serializeTCompactProtocol', () => {
|
||||
const data = {}
|
||||
const writer = new ByteWriter()
|
||||
serializeTCompactProtocol(writer, data)
|
||||
const buf = writer.buffer.slice(0, writer.offset)
|
||||
const arr = new Uint8Array(buf)
|
||||
const arr = new Uint8Array(writer.getBuffer())
|
||||
// The entire buffer should just be [0x00] = STOP
|
||||
expect(arr).toEqual(new Uint8Array([0x00]))
|
||||
|
||||
// Round-trip: should deserialize to an empty object
|
||||
const result = roundTripDeserialize(buf)
|
||||
const result = roundTripDeserialize(writer.getBuffer())
|
||||
expect(result).toEqual({})
|
||||
})
|
||||
|
||||
@ -92,4 +90,21 @@ describe('serializeTCompactProtocol', () => {
|
||||
const writer = new ByteWriter()
|
||||
expect(() => serializeTCompactProtocol(writer, invalidData)).toThrow()
|
||||
})
|
||||
|
||||
it('serializes field IDs with gaps larger than 15', () => {
|
||||
const data = { field_1: 1, field_17: 17 }
|
||||
const writer = new ByteWriter()
|
||||
serializeTCompactProtocol(writer, data)
|
||||
const result = roundTripDeserialize(writer.getBuffer())
|
||||
expect(result.field_1).toBe(1)
|
||||
expect(result.field_17).toBe(17)
|
||||
})
|
||||
|
||||
it('serializes GEOMETRY logicalType struct with field_17', () => {
|
||||
const data = { field_1: logicalType({ type: 'GEOMETRY' }) }
|
||||
const writer = new ByteWriter()
|
||||
serializeTCompactProtocol(writer, data)
|
||||
const result = roundTripDeserialize(writer.getBuffer())
|
||||
expect(result.field_1.field_17).toEqual({})
|
||||
})
|
||||
})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user