diff --git a/package.json b/package.json index 28cdb72..6a01c03 100644 --- a/package.json +++ b/package.json @@ -55,12 +55,12 @@ "hyparquet": "1.20.0" }, "devDependencies": { - "@babel/eslint-parser": "7.28.4", + "@babel/eslint-parser": "7.28.5", "@types/node": "24.9.1", - "@vitest/coverage-v8": "3.2.4", + "@vitest/coverage-v8": "4.0.2", "eslint": "9.38.0", - "eslint-plugin-jsdoc": "61.1.5", + "eslint-plugin-jsdoc": "61.1.7", "typescript": "5.9.3", - "vitest": "3.2.4" + "vitest": "4.0.2" } } diff --git a/src/thrift.js b/src/thrift.js index 845e327..105353e 100644 --- a/src/thrift.js +++ b/src/thrift.js @@ -28,8 +28,13 @@ export function serializeTCompactProtocol(writer, data) { if (delta <= 0) { throw new Error(`thrift non-monotonic field ID: fid=${fid}, lastFid=${lastFid}`) } - // High nibble = delta, low nibble = type - writer.appendUint8(delta << 4 | type) + // high nibble = delta, low nibble = type < 15 or zigzag + if (delta <= 15) { + writer.appendUint8(delta << 4 | type) + } else { + writer.appendUint8(type) + writer.appendVarInt(fid << 1 ^ fid >> 15) // zigzag + } // Write the field content itself writeElement(writer, type, value) @@ -137,7 +142,12 @@ function writeElement(writer, type, value) { if (delta <= 0) { throw new Error(`Non-monotonic fid in struct: fid=${fid}, lastFid=${lastFid}`) } - writer.appendUint8(delta << 4 | t & 0x0f) + if (delta <= 15) { + writer.appendUint8(delta << 4 | t) + } else { + writer.appendUint8(t) + writer.appendVarInt(fid << 1 ^ fid >> 15) + } writeElement(writer, t, v) lastFid = fid } diff --git a/test/thrift.test.js b/test/thrift.test.js index d54114a..967f3db 100644 --- a/test/thrift.test.js +++ b/test/thrift.test.js @@ -2,6 +2,7 @@ import { deserializeTCompactProtocol } from 'hyparquet/src/thrift.js' import { describe, expect, it } from 'vitest' import { serializeTCompactProtocol } from '../src/thrift.js' import { ByteWriter } from '../src/bytewriter.js' +import { logicalType } from '../src/metadata.js' /** * Utility to decode a Thrift-serialized buffer and return the parsed object. @@ -30,8 +31,7 @@ describe('serializeTCompactProtocol', () => { const writer = new ByteWriter() serializeTCompactProtocol(writer, data) - const buf = writer.buffer.slice(0, writer.offset) - const result = roundTripDeserialize(buf) + const result = roundTripDeserialize(writer.getBuffer()) expect(result.field_1).toBe(true) expect(result.field_2).toBe(false) @@ -61,8 +61,7 @@ describe('serializeTCompactProtocol', () => { const writer = new ByteWriter() serializeTCompactProtocol(writer, data) - const buf = writer.buffer.slice(0, writer.offset) - const result = roundTripDeserialize(buf) + const result = roundTripDeserialize(writer.getBuffer()) expect(result.field_1.field_1).toBe(42) expect(result.field_1.field_2.field_1).toBe(true) @@ -74,13 +73,12 @@ describe('serializeTCompactProtocol', () => { const data = {} const writer = new ByteWriter() serializeTCompactProtocol(writer, data) - const buf = writer.buffer.slice(0, writer.offset) - const arr = new Uint8Array(buf) + const arr = new Uint8Array(writer.getBuffer()) // The entire buffer should just be [0x00] = STOP expect(arr).toEqual(new Uint8Array([0x00])) // Round-trip: should deserialize to an empty object - const result = roundTripDeserialize(buf) + const result = roundTripDeserialize(writer.getBuffer()) expect(result).toEqual({}) }) @@ -92,4 +90,21 @@ describe('serializeTCompactProtocol', () => { const writer = new ByteWriter() expect(() => serializeTCompactProtocol(writer, invalidData)).toThrow() }) + + it('serializes field IDs with gaps larger than 15', () => { + const data = { field_1: 1, field_17: 17 } + const writer = new ByteWriter() + serializeTCompactProtocol(writer, data) + const result = roundTripDeserialize(writer.getBuffer()) + expect(result.field_1).toBe(1) + expect(result.field_17).toBe(17) + }) + + it('serializes GEOMETRY logicalType struct with field_17', () => { + const data = { field_1: logicalType({ type: 'GEOMETRY' }) } + const writer = new ByteWriter() + serializeTCompactProtocol(writer, data) + const result = roundTripDeserialize(writer.getBuffer()) + expect(result.field_1.field_17).toEqual({}) + }) })