offsetArrayBuffer to return an offset view of an array buffer

This commit is contained in:
Kenny Daniel 2024-01-14 13:38:05 -08:00
parent 4708ffca2f
commit 060ef950b2
No known key found for this signature in database
GPG Key ID: 6A3C5E318BE71391
5 changed files with 80 additions and 5 deletions

30
src/asyncbuffer.js Normal file

@ -0,0 +1,30 @@
/**
* Return an offset view into an existing array buffer.
* If slice is called on data outside the original array buffer, an error is thrown.
*
* This is useful for pre-loading a section of a file into memory,
* then reading slices from it, but indexed relative to the original file.
*
* @typedef {import('./types.js').ArrayBufferLike} ArrayBufferLike
* @param {ArrayBuffer} arrayBuffer array buffer to place at offset
* @param {number} offset offset in bytes
* @returns {ArrayBufferLike} array buffer view with offset
*/
export function offsetArrayBuffer(arrayBuffer, offset) {
if (offset < 0) throw new Error(`offset must be positive ${offset}`)
return {
byteLength: offset + arrayBuffer.byteLength,
slice(start, end) {
if (start < offset || start > offset + arrayBuffer.byteLength) {
throw new Error(`start out of bounds: ${start} not in ${offset}..${offset + arrayBuffer.byteLength}`)
}
if (end) {
if (end < offset || end > offset + arrayBuffer.byteLength) {
throw new Error(`end out of bounds: ${end} not in ${offset}..${offset + arrayBuffer.byteLength}`)
}
end -= offset
}
return arrayBuffer.slice(start - offset, end)
},
}
}

@ -5,6 +5,7 @@ import { snappyUncompress } from './snappy.js'
import { CompressionCodec, Encoding, PageType } from './types.js'
/**
* @typedef {import('./types.js').ArrayBufferLike} ArrayBufferLike
* @typedef {import('./types.js').SchemaElement} SchemaElement
* @typedef {import('./types.js').ColumnMetaData} ColumnMetaData
* @typedef {import('./types.js').RowGroup} RowGroup

@ -11,16 +11,14 @@ import { deserializeTCompactProtocol } from './thrift.js'
/**
* Read parquet header from a buffer.
*
* @typedef {import("./types.d.ts").ArrayBufferLike} ArrayBufferLike
* @typedef {import("./types.d.ts").PageHeader} PageHeader
* @param {ArrayBuffer} arrayBuffer parquet file contents
* @param {ArrayBufferLike} arrayBuffer parquet file contents
* @param {number} offset offset to start reading from
* @returns {Decoded<PageHeader>} metadata object and bytes read
*/
export function parquetHeader(arrayBuffer, offset) {
// DataView for easier manipulation of the buffer
const view = new DataView(arrayBuffer)
const headerBuffer = view.buffer.slice(offset)
const headerBuffer = arrayBuffer.slice(offset)
const { value: header, byteLength } = deserializeTCompactProtocol(headerBuffer)
// Parse parquet header from thrift data

8
src/types.d.ts vendored

@ -1,3 +1,11 @@
/**
* Just like an ArrayBuffer, but an interface
*/
export interface ArrayBufferLike {
byteLength: number
slice(start: number, end?: number): ArrayBuffer
}
/**
* Represents a decoded value, and includes the number of bytes read.
* This is used to read data from the file and advance a virtual file pointer.

38
test/asyncbuffer.test.js Normal file

@ -0,0 +1,38 @@
import { describe, expect, it } from 'vitest'
import { offsetArrayBuffer } from '../src/asyncbuffer.js'
describe('offsetArrayBuffer', () => {
it('creates a valid offset array buffer', () => {
const buffer = new ArrayBuffer(10)
const offsetBuffer = offsetArrayBuffer(buffer, 5)
expect(offsetBuffer.byteLength).toBe(15)
})
it('correctly slices the array buffer with offset', () => {
const buffer = new ArrayBuffer(10)
const offsetBuffer = offsetArrayBuffer(buffer, 5)
const view = new Uint8Array(buffer)
for (let i = 0; i < view.length; i++) {
view[i] = i // Populate the buffer with data [0, 1, 2, ...]
}
const slicedBuffer = offsetBuffer.slice(5, 10) // This should give us [0, 1, 2, 3, 4] from the original buffer
const slicedView = new Uint8Array(slicedBuffer)
for (let i = 0; i < slicedView.length; i++) {
expect(slicedView[i]).toBe(i) // Each item should match its index
}
})
it('throws error for negative offset', () => {
const buffer = new ArrayBuffer(10)
expect(() => offsetArrayBuffer(buffer, -5)).toThrow('offset must be positive')
})
it('throws error for out of bounds slice', () => {
const buffer = new ArrayBuffer(10)
const offsetBuffer = offsetArrayBuffer(buffer, 5)
expect(() => offsetBuffer.slice(3, 7)).toThrow('start out of bounds')
expect(() => offsetBuffer.slice(5, 20)).toThrow('end out of bounds')
})
})