mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Fix offset handling for filewriter
This commit is contained in:
parent
903530f1ff
commit
18294de6ec
@ -45,7 +45,7 @@
|
||||
"test": "vitest run"
|
||||
},
|
||||
"dependencies": {
|
||||
"hyparquet": "1.12.0"
|
||||
"hyparquet": "1.12.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/eslint-parser": "7.27.0",
|
||||
|
||||
@ -8,8 +8,9 @@
|
||||
*/
|
||||
export function ByteWriter() {
|
||||
this.buffer = new ArrayBuffer(1024)
|
||||
this.offset = 0
|
||||
this.view = new DataView(this.buffer)
|
||||
this.offset = 0 // bytes written
|
||||
this.index = 0 // index in buffer
|
||||
return this
|
||||
}
|
||||
|
||||
@ -18,8 +19,8 @@ export function ByteWriter() {
|
||||
*/
|
||||
ByteWriter.prototype.ensure = function(size) {
|
||||
// auto-expanding buffer
|
||||
if (this.offset + size > this.buffer.byteLength) {
|
||||
const newSize = Math.max(this.buffer.byteLength * 2, this.offset + size)
|
||||
if (this.index + size > this.buffer.byteLength) {
|
||||
const newSize = Math.max(this.buffer.byteLength * 2, this.index + size)
|
||||
const newBuffer = new ArrayBuffer(newSize)
|
||||
// TODO: save buffers until later and merge once?
|
||||
new Uint8Array(newBuffer).set(new Uint8Array(this.buffer))
|
||||
@ -32,61 +33,67 @@ ByteWriter.prototype.finish = function() {
|
||||
}
|
||||
|
||||
ByteWriter.prototype.getBuffer = function() {
|
||||
return this.buffer.slice(0, this.offset)
|
||||
return this.buffer.slice(0, this.index)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
ByteWriter.prototype.appendUint8 = function(value) {
|
||||
this.ensure(this.offset + 1)
|
||||
this.view.setUint8(this.offset, value)
|
||||
this.ensure(this.index + 1)
|
||||
this.view.setUint8(this.index, value)
|
||||
this.offset++
|
||||
this.index++
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
ByteWriter.prototype.appendUint32 = function(value) {
|
||||
this.ensure(this.offset + 4)
|
||||
this.view.setUint32(this.offset, value, true)
|
||||
this.ensure(this.index + 4)
|
||||
this.view.setUint32(this.index, value, true)
|
||||
this.offset += 4
|
||||
this.index += 4
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
ByteWriter.prototype.appendInt32 = function(value) {
|
||||
this.ensure(this.offset + 4)
|
||||
this.view.setInt32(this.offset, value, true)
|
||||
this.ensure(this.index + 4)
|
||||
this.view.setInt32(this.index, value, true)
|
||||
this.offset += 4
|
||||
this.index += 4
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {bigint} value
|
||||
*/
|
||||
ByteWriter.prototype.appendInt64 = function(value) {
|
||||
this.ensure(this.offset + 8)
|
||||
this.view.setBigInt64(this.offset, BigInt(value), true)
|
||||
this.ensure(this.index + 8)
|
||||
this.view.setBigInt64(this.index, BigInt(value), true)
|
||||
this.offset += 8
|
||||
this.index += 8
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
ByteWriter.prototype.appendFloat32 = function(value) {
|
||||
this.ensure(this.offset + 8)
|
||||
this.view.setFloat32(this.offset, value, true)
|
||||
this.ensure(this.index + 8)
|
||||
this.view.setFloat32(this.index, value, true)
|
||||
this.offset += 4
|
||||
this.index += 4
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
ByteWriter.prototype.appendFloat64 = function(value) {
|
||||
this.ensure(this.offset + 8)
|
||||
this.view.setFloat64(this.offset, value, true)
|
||||
this.ensure(this.index + 8)
|
||||
this.view.setFloat64(this.index, value, true)
|
||||
this.offset += 8
|
||||
this.index += 8
|
||||
}
|
||||
|
||||
/**
|
||||
@ -100,9 +107,10 @@ ByteWriter.prototype.appendBuffer = function(value) {
|
||||
* @param {Uint8Array} value
|
||||
*/
|
||||
ByteWriter.prototype.appendBytes = function(value) {
|
||||
this.ensure(this.offset + value.length)
|
||||
new Uint8Array(this.buffer, this.offset, value.length).set(value)
|
||||
this.ensure(this.index + value.length)
|
||||
new Uint8Array(this.buffer, this.index, value.length).set(value)
|
||||
this.offset += value.length
|
||||
this.index += value.length
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -64,7 +64,7 @@ export function writeColumn(writer, schemaPath, values, compressed, stats) {
|
||||
codec: compressed ? 'SNAPPY' : 'UNCOMPRESSED',
|
||||
num_values: BigInt(num_values),
|
||||
total_compressed_size: BigInt(writer.offset - offsetStart),
|
||||
total_uncompressed_size: BigInt(writer.offset - offsetStart),
|
||||
total_uncompressed_size: BigInt(writer.offset - offsetStart), // TODO
|
||||
data_page_offset,
|
||||
dictionary_page_offset,
|
||||
statistics,
|
||||
|
||||
@ -17,10 +17,10 @@ export function fileWriter(filename) {
|
||||
fs.writeFileSync(filename, '', { flag: 'w' })
|
||||
|
||||
function flush() {
|
||||
const chunk = writer.buffer.slice(0, writer.offset)
|
||||
const chunk = writer.buffer.slice(0, writer.index)
|
||||
// TODO: async
|
||||
fs.writeFileSync(filename, new Uint8Array(chunk), { flag: 'a' })
|
||||
writer.offset = 0
|
||||
writer.index = 0
|
||||
}
|
||||
|
||||
/**
|
||||
@ -28,11 +28,11 @@ export function fileWriter(filename) {
|
||||
* @param {number} size
|
||||
*/
|
||||
writer.ensure = function(size) {
|
||||
if (writer.offset > chunkSize) {
|
||||
if (writer.index > chunkSize) {
|
||||
flush()
|
||||
}
|
||||
if (writer.offset + size > writer.buffer.byteLength) {
|
||||
const newSize = Math.max(writer.buffer.byteLength * 2, writer.offset + size)
|
||||
if (writer.index + size > writer.buffer.byteLength) {
|
||||
const newSize = Math.max(writer.buffer.byteLength * 2, writer.index + size)
|
||||
const newBuffer = new ArrayBuffer(newSize)
|
||||
new Uint8Array(newBuffer).set(new Uint8Array(writer.buffer))
|
||||
writer.buffer = newBuffer
|
||||
|
||||
@ -83,8 +83,10 @@ export function writeMetadata(writer, metadata) {
|
||||
field_6: metadata.created_by,
|
||||
}
|
||||
|
||||
// write metadata as thrift
|
||||
const metadataStart = writer.offset
|
||||
serializeTCompactProtocol(writer, compact)
|
||||
// write metadata length
|
||||
const metadataLength = writer.offset - metadataStart
|
||||
writer.appendUint32(metadataLength)
|
||||
}
|
||||
|
||||
3
src/types.d.ts
vendored
3
src/types.d.ts
vendored
@ -25,8 +25,9 @@ export interface ColumnData {
|
||||
|
||||
export interface Writer {
|
||||
buffer: ArrayBuffer
|
||||
offset: number
|
||||
view: DataView
|
||||
offset: number
|
||||
|
||||
ensure(size: number): void
|
||||
finish(): void
|
||||
getBuffer(): ArrayBuffer
|
||||
|
||||
Loading…
Reference in New Issue
Block a user