Fix offset handling for filewriter

This commit is contained in:
Kenny Daniel 2025-04-15 22:21:52 -07:00
parent 903530f1ff
commit 18294de6ec
No known key found for this signature in database
GPG Key ID: FDF16101AF5AFD3A
6 changed files with 37 additions and 26 deletions

@ -45,7 +45,7 @@
"test": "vitest run"
},
"dependencies": {
"hyparquet": "1.12.0"
"hyparquet": "1.12.1"
},
"devDependencies": {
"@babel/eslint-parser": "7.27.0",

@ -8,8 +8,9 @@
*/
export function ByteWriter() {
this.buffer = new ArrayBuffer(1024)
this.offset = 0
this.view = new DataView(this.buffer)
this.offset = 0 // bytes written
this.index = 0 // index in buffer
return this
}
@ -18,8 +19,8 @@ export function ByteWriter() {
*/
ByteWriter.prototype.ensure = function(size) {
// auto-expanding buffer
if (this.offset + size > this.buffer.byteLength) {
const newSize = Math.max(this.buffer.byteLength * 2, this.offset + size)
if (this.index + size > this.buffer.byteLength) {
const newSize = Math.max(this.buffer.byteLength * 2, this.index + size)
const newBuffer = new ArrayBuffer(newSize)
// TODO: save buffers until later and merge once?
new Uint8Array(newBuffer).set(new Uint8Array(this.buffer))
@ -32,61 +33,67 @@ ByteWriter.prototype.finish = function() {
}
ByteWriter.prototype.getBuffer = function() {
return this.buffer.slice(0, this.offset)
return this.buffer.slice(0, this.index)
}
/**
* @param {number} value
*/
ByteWriter.prototype.appendUint8 = function(value) {
this.ensure(this.offset + 1)
this.view.setUint8(this.offset, value)
this.ensure(this.index + 1)
this.view.setUint8(this.index, value)
this.offset++
this.index++
}
/**
* @param {number} value
*/
ByteWriter.prototype.appendUint32 = function(value) {
this.ensure(this.offset + 4)
this.view.setUint32(this.offset, value, true)
this.ensure(this.index + 4)
this.view.setUint32(this.index, value, true)
this.offset += 4
this.index += 4
}
/**
* @param {number} value
*/
ByteWriter.prototype.appendInt32 = function(value) {
this.ensure(this.offset + 4)
this.view.setInt32(this.offset, value, true)
this.ensure(this.index + 4)
this.view.setInt32(this.index, value, true)
this.offset += 4
this.index += 4
}
/**
* @param {bigint} value
*/
ByteWriter.prototype.appendInt64 = function(value) {
this.ensure(this.offset + 8)
this.view.setBigInt64(this.offset, BigInt(value), true)
this.ensure(this.index + 8)
this.view.setBigInt64(this.index, BigInt(value), true)
this.offset += 8
this.index += 8
}
/**
* @param {number} value
*/
ByteWriter.prototype.appendFloat32 = function(value) {
this.ensure(this.offset + 8)
this.view.setFloat32(this.offset, value, true)
this.ensure(this.index + 8)
this.view.setFloat32(this.index, value, true)
this.offset += 4
this.index += 4
}
/**
* @param {number} value
*/
ByteWriter.prototype.appendFloat64 = function(value) {
this.ensure(this.offset + 8)
this.view.setFloat64(this.offset, value, true)
this.ensure(this.index + 8)
this.view.setFloat64(this.index, value, true)
this.offset += 8
this.index += 8
}
/**
@ -100,9 +107,10 @@ ByteWriter.prototype.appendBuffer = function(value) {
* @param {Uint8Array} value
*/
ByteWriter.prototype.appendBytes = function(value) {
this.ensure(this.offset + value.length)
new Uint8Array(this.buffer, this.offset, value.length).set(value)
this.ensure(this.index + value.length)
new Uint8Array(this.buffer, this.index, value.length).set(value)
this.offset += value.length
this.index += value.length
}
/**

@ -64,7 +64,7 @@ export function writeColumn(writer, schemaPath, values, compressed, stats) {
codec: compressed ? 'SNAPPY' : 'UNCOMPRESSED',
num_values: BigInt(num_values),
total_compressed_size: BigInt(writer.offset - offsetStart),
total_uncompressed_size: BigInt(writer.offset - offsetStart),
total_uncompressed_size: BigInt(writer.offset - offsetStart), // TODO
data_page_offset,
dictionary_page_offset,
statistics,

@ -17,10 +17,10 @@ export function fileWriter(filename) {
fs.writeFileSync(filename, '', { flag: 'w' })
function flush() {
const chunk = writer.buffer.slice(0, writer.offset)
const chunk = writer.buffer.slice(0, writer.index)
// TODO: async
fs.writeFileSync(filename, new Uint8Array(chunk), { flag: 'a' })
writer.offset = 0
writer.index = 0
}
/**
@ -28,11 +28,11 @@ export function fileWriter(filename) {
* @param {number} size
*/
writer.ensure = function(size) {
if (writer.offset > chunkSize) {
if (writer.index > chunkSize) {
flush()
}
if (writer.offset + size > writer.buffer.byteLength) {
const newSize = Math.max(writer.buffer.byteLength * 2, writer.offset + size)
if (writer.index + size > writer.buffer.byteLength) {
const newSize = Math.max(writer.buffer.byteLength * 2, writer.index + size)
const newBuffer = new ArrayBuffer(newSize)
new Uint8Array(newBuffer).set(new Uint8Array(writer.buffer))
writer.buffer = newBuffer

@ -83,8 +83,10 @@ export function writeMetadata(writer, metadata) {
field_6: metadata.created_by,
}
// write metadata as thrift
const metadataStart = writer.offset
serializeTCompactProtocol(writer, compact)
// write metadata length
const metadataLength = writer.offset - metadataStart
writer.appendUint32(metadataLength)
}

3
src/types.d.ts vendored

@ -25,8 +25,9 @@ export interface ColumnData {
export interface Writer {
buffer: ArrayBuffer
offset: number
view: DataView
offset: number
ensure(size: number): void
finish(): void
getBuffer(): ArrayBuffer