From 060ef950b251bb618f38e08457a83c9c43a83aac Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Sun, 14 Jan 2024 13:38:05 -0800 Subject: [PATCH] offsetArrayBuffer to return an offset view of an array buffer --- src/asyncbuffer.js | 30 ++++++++++++++++++++++++++++++ src/column.js | 1 + src/header.js | 8 +++----- src/types.d.ts | 8 ++++++++ test/asyncbuffer.test.js | 38 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 src/asyncbuffer.js create mode 100644 test/asyncbuffer.test.js diff --git a/src/asyncbuffer.js b/src/asyncbuffer.js new file mode 100644 index 0000000..a69dd66 --- /dev/null +++ b/src/asyncbuffer.js @@ -0,0 +1,30 @@ +/** + * Return an offset view into an existing array buffer. + * If slice is called on data outside the original array buffer, an error is thrown. + * + * This is useful for pre-loading a section of a file into memory, + * then reading slices from it, but indexed relative to the original file. + * + * @typedef {import('./types.js').ArrayBufferLike} ArrayBufferLike + * @param {ArrayBuffer} arrayBuffer array buffer to place at offset + * @param {number} offset offset in bytes + * @returns {ArrayBufferLike} array buffer view with offset + */ +export function offsetArrayBuffer(arrayBuffer, offset) { + if (offset < 0) throw new Error(`offset must be positive ${offset}`) + return { + byteLength: offset + arrayBuffer.byteLength, + slice(start, end) { + if (start < offset || start > offset + arrayBuffer.byteLength) { + throw new Error(`start out of bounds: ${start} not in ${offset}..${offset + arrayBuffer.byteLength}`) + } + if (end) { + if (end < offset || end > offset + arrayBuffer.byteLength) { + throw new Error(`end out of bounds: ${end} not in ${offset}..${offset + arrayBuffer.byteLength}`) + } + end -= offset + } + return arrayBuffer.slice(start - offset, end) + }, + } +} diff --git a/src/column.js b/src/column.js index 37466f2..7b4daac 100644 --- a/src/column.js +++ b/src/column.js @@ -5,6 +5,7 @@ import { snappyUncompress } from './snappy.js' import { CompressionCodec, Encoding, PageType } from './types.js' /** + * @typedef {import('./types.js').ArrayBufferLike} ArrayBufferLike * @typedef {import('./types.js').SchemaElement} SchemaElement * @typedef {import('./types.js').ColumnMetaData} ColumnMetaData * @typedef {import('./types.js').RowGroup} RowGroup diff --git a/src/header.js b/src/header.js index d6c6e6c..61be0a6 100644 --- a/src/header.js +++ b/src/header.js @@ -11,16 +11,14 @@ import { deserializeTCompactProtocol } from './thrift.js' /** * Read parquet header from a buffer. * + * @typedef {import("./types.d.ts").ArrayBufferLike} ArrayBufferLike * @typedef {import("./types.d.ts").PageHeader} PageHeader - * @param {ArrayBuffer} arrayBuffer parquet file contents + * @param {ArrayBufferLike} arrayBuffer parquet file contents * @param {number} offset offset to start reading from * @returns {Decoded} metadata object and bytes read */ export function parquetHeader(arrayBuffer, offset) { - // DataView for easier manipulation of the buffer - const view = new DataView(arrayBuffer) - - const headerBuffer = view.buffer.slice(offset) + const headerBuffer = arrayBuffer.slice(offset) const { value: header, byteLength } = deserializeTCompactProtocol(headerBuffer) // Parse parquet header from thrift data diff --git a/src/types.d.ts b/src/types.d.ts index 6ca0b76..a47095d 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -1,3 +1,11 @@ +/** + * Just like an ArrayBuffer, but an interface + */ +export interface ArrayBufferLike { + byteLength: number + slice(start: number, end?: number): ArrayBuffer +} + /** * Represents a decoded value, and includes the number of bytes read. * This is used to read data from the file and advance a virtual file pointer. diff --git a/test/asyncbuffer.test.js b/test/asyncbuffer.test.js new file mode 100644 index 0000000..b5dc2cb --- /dev/null +++ b/test/asyncbuffer.test.js @@ -0,0 +1,38 @@ +import { describe, expect, it } from 'vitest' +import { offsetArrayBuffer } from '../src/asyncbuffer.js' + +describe('offsetArrayBuffer', () => { + it('creates a valid offset array buffer', () => { + const buffer = new ArrayBuffer(10) + const offsetBuffer = offsetArrayBuffer(buffer, 5) + expect(offsetBuffer.byteLength).toBe(15) + }) + + it('correctly slices the array buffer with offset', () => { + const buffer = new ArrayBuffer(10) + const offsetBuffer = offsetArrayBuffer(buffer, 5) + const view = new Uint8Array(buffer) + for (let i = 0; i < view.length; i++) { + view[i] = i // Populate the buffer with data [0, 1, 2, ...] + } + + const slicedBuffer = offsetBuffer.slice(5, 10) // This should give us [0, 1, 2, 3, 4] from the original buffer + const slicedView = new Uint8Array(slicedBuffer) + + for (let i = 0; i < slicedView.length; i++) { + expect(slicedView[i]).toBe(i) // Each item should match its index + } + }) + + it('throws error for negative offset', () => { + const buffer = new ArrayBuffer(10) + expect(() => offsetArrayBuffer(buffer, -5)).toThrow('offset must be positive') + }) + + it('throws error for out of bounds slice', () => { + const buffer = new ArrayBuffer(10) + const offsetBuffer = offsetArrayBuffer(buffer, 5) + expect(() => offsetBuffer.slice(3, 7)).toThrow('start out of bounds') + expect(() => offsetBuffer.slice(5, 20)).toThrow('end out of bounds') + }) +})