diff --git a/package.json b/package.json index d889ed8..21c77e8 100644 --- a/package.json +++ b/package.json @@ -45,7 +45,7 @@ "test": "vitest run" }, "dependencies": { - "hyparquet": "1.12.0" + "hyparquet": "1.12.1" }, "devDependencies": { "@babel/eslint-parser": "7.27.0", diff --git a/src/bytewriter.js b/src/bytewriter.js index 6850ce0..9124334 100644 --- a/src/bytewriter.js +++ b/src/bytewriter.js @@ -8,8 +8,9 @@ */ export function ByteWriter() { this.buffer = new ArrayBuffer(1024) - this.offset = 0 this.view = new DataView(this.buffer) + this.offset = 0 // bytes written + this.index = 0 // index in buffer return this } @@ -18,8 +19,8 @@ export function ByteWriter() { */ ByteWriter.prototype.ensure = function(size) { // auto-expanding buffer - if (this.offset + size > this.buffer.byteLength) { - const newSize = Math.max(this.buffer.byteLength * 2, this.offset + size) + if (this.index + size > this.buffer.byteLength) { + const newSize = Math.max(this.buffer.byteLength * 2, this.index + size) const newBuffer = new ArrayBuffer(newSize) // TODO: save buffers until later and merge once? new Uint8Array(newBuffer).set(new Uint8Array(this.buffer)) @@ -32,61 +33,67 @@ ByteWriter.prototype.finish = function() { } ByteWriter.prototype.getBuffer = function() { - return this.buffer.slice(0, this.offset) + return this.buffer.slice(0, this.index) } /** * @param {number} value */ ByteWriter.prototype.appendUint8 = function(value) { - this.ensure(this.offset + 1) - this.view.setUint8(this.offset, value) + this.ensure(this.index + 1) + this.view.setUint8(this.index, value) this.offset++ + this.index++ } /** * @param {number} value */ ByteWriter.prototype.appendUint32 = function(value) { - this.ensure(this.offset + 4) - this.view.setUint32(this.offset, value, true) + this.ensure(this.index + 4) + this.view.setUint32(this.index, value, true) this.offset += 4 + this.index += 4 } /** * @param {number} value */ ByteWriter.prototype.appendInt32 = function(value) { - this.ensure(this.offset + 4) - this.view.setInt32(this.offset, value, true) + this.ensure(this.index + 4) + this.view.setInt32(this.index, value, true) this.offset += 4 + this.index += 4 } /** * @param {bigint} value */ ByteWriter.prototype.appendInt64 = function(value) { - this.ensure(this.offset + 8) - this.view.setBigInt64(this.offset, BigInt(value), true) + this.ensure(this.index + 8) + this.view.setBigInt64(this.index, BigInt(value), true) this.offset += 8 + this.index += 8 } /** * @param {number} value */ ByteWriter.prototype.appendFloat32 = function(value) { - this.ensure(this.offset + 8) - this.view.setFloat32(this.offset, value, true) + this.ensure(this.index + 8) + this.view.setFloat32(this.index, value, true) this.offset += 4 + this.index += 4 } /** * @param {number} value */ ByteWriter.prototype.appendFloat64 = function(value) { - this.ensure(this.offset + 8) - this.view.setFloat64(this.offset, value, true) + this.ensure(this.index + 8) + this.view.setFloat64(this.index, value, true) this.offset += 8 + this.index += 8 } /** @@ -100,9 +107,10 @@ ByteWriter.prototype.appendBuffer = function(value) { * @param {Uint8Array} value */ ByteWriter.prototype.appendBytes = function(value) { - this.ensure(this.offset + value.length) - new Uint8Array(this.buffer, this.offset, value.length).set(value) + this.ensure(this.index + value.length) + new Uint8Array(this.buffer, this.index, value.length).set(value) this.offset += value.length + this.index += value.length } /** diff --git a/src/column.js b/src/column.js index 2388f26..7320c3e 100644 --- a/src/column.js +++ b/src/column.js @@ -64,7 +64,7 @@ export function writeColumn(writer, schemaPath, values, compressed, stats) { codec: compressed ? 'SNAPPY' : 'UNCOMPRESSED', num_values: BigInt(num_values), total_compressed_size: BigInt(writer.offset - offsetStart), - total_uncompressed_size: BigInt(writer.offset - offsetStart), + total_uncompressed_size: BigInt(writer.offset - offsetStart), // TODO data_page_offset, dictionary_page_offset, statistics, diff --git a/src/filewriter.js b/src/filewriter.js index 34a748f..ef01cb0 100644 --- a/src/filewriter.js +++ b/src/filewriter.js @@ -17,10 +17,10 @@ export function fileWriter(filename) { fs.writeFileSync(filename, '', { flag: 'w' }) function flush() { - const chunk = writer.buffer.slice(0, writer.offset) + const chunk = writer.buffer.slice(0, writer.index) // TODO: async fs.writeFileSync(filename, new Uint8Array(chunk), { flag: 'a' }) - writer.offset = 0 + writer.index = 0 } /** @@ -28,11 +28,11 @@ export function fileWriter(filename) { * @param {number} size */ writer.ensure = function(size) { - if (writer.offset > chunkSize) { + if (writer.index > chunkSize) { flush() } - if (writer.offset + size > writer.buffer.byteLength) { - const newSize = Math.max(writer.buffer.byteLength * 2, writer.offset + size) + if (writer.index + size > writer.buffer.byteLength) { + const newSize = Math.max(writer.buffer.byteLength * 2, writer.index + size) const newBuffer = new ArrayBuffer(newSize) new Uint8Array(newBuffer).set(new Uint8Array(writer.buffer)) writer.buffer = newBuffer diff --git a/src/metadata.js b/src/metadata.js index ba21a1c..390c0e4 100644 --- a/src/metadata.js +++ b/src/metadata.js @@ -83,8 +83,10 @@ export function writeMetadata(writer, metadata) { field_6: metadata.created_by, } + // write metadata as thrift const metadataStart = writer.offset serializeTCompactProtocol(writer, compact) + // write metadata length const metadataLength = writer.offset - metadataStart writer.appendUint32(metadataLength) } diff --git a/src/types.d.ts b/src/types.d.ts index 810432a..bfb8f33 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -25,8 +25,9 @@ export interface ColumnData { export interface Writer { buffer: ArrayBuffer - offset: number view: DataView + offset: number + ensure(size: number): void finish(): void getBuffer(): ArrayBuffer