Export cachedAsyncBuffer

This commit is contained in:
Kenny Daniel 2024-10-16 01:09:18 -07:00
parent da37b512d0
commit 5d21b09b7a
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
5 changed files with 62 additions and 51 deletions

@ -1,50 +0,0 @@
import type { AsyncBuffer, Awaitable } from "../src/types.js"
/**
* Returns a caches layer on top of an AsyncBuffer.
* This is useful for caching slices of a file that are read multiple times,
* possibly over a network.
*
* TODO: require data to be loaded with preload(), reads outside of preload rejected.
*
* @param {AsyncBuffer} file file-like object to cache
* @returns {AsyncBuffer} cached file-like object
*/
export function cachedAsyncBuffer(file: AsyncBuffer): AsyncBuffer {
// indexed by 'start,end'
const cache = new Map<string, Awaitable<ArrayBuffer>>()
return {
byteLength: file.byteLength,
slice(start: number, end?: number): Awaitable<ArrayBuffer> {
// ensure both "100-200" and "100-" are both cached the same
const key = cacheKey(start, end, file.byteLength)
const cached = cache.get(key)
if (cached) return cached
// cache miss, read from file
const promise = file.slice(start, end)
cache.set(key, promise)
return promise
},
}
}
/**
* Returns canonical cache key for a byte range.
* Cache key is a string of the form 'start,end'.
* Attempts to normalize int-range and suffix-range requests to the same key.
*/
function cacheKey(start: number, end: number | undefined, fileSize: number | undefined): string {
if (start < 0) {
if (end !== undefined) throw new Error(`invalid suffix range [${start}, ${end}]`)
if (fileSize === undefined) return `${start},`
return `${fileSize + start},${fileSize}`
} else if (end !== undefined) {
if (start > end) throw new Error(`invalid empty range [${start}, ${end}]`)
return `${start},${end}`
} else if (fileSize === undefined) {
return `${start},`
} else {
return `${start},${fileSize}`
}
}

@ -1,6 +1,6 @@
import { cachedAsyncBuffer } from '../../src/asyncBuffer.js'
import type { AsyncBuffer, FileMetaData } from '../../src/hyparquet.js'
import { asyncBufferFromUrl } from '../../src/utils.js'
import { cachedAsyncBuffer } from '../asyncBuffer.js'
// Serializable constructors for AsyncBuffers
interface AsyncBufferFromFile {

54
src/asyncBuffer.js Normal file

@ -0,0 +1,54 @@
/**
* Returns a cached layer on top of an AsyncBuffer. For caching slices of a file
* that are read multiple times, possibly over a network.
*
* @typedef {import('./types.js').AsyncBuffer} AsyncBuffer
* @param {AsyncBuffer} file file-like object to cache
* @returns {AsyncBuffer} cached file-like object
*/
export function cachedAsyncBuffer({ byteLength, slice }) {
const cache = new Map()
return {
byteLength,
/**
* @param {number} start
* @param {number} [end]
* @returns {import('./types.js').Awaitable<ArrayBuffer>}
*/
slice(start, end) {
const key = cacheKey(start, end, byteLength)
const cached = cache.get(key)
if (cached) return cached
// cache miss, read from file
const promise = slice(start, end)
cache.set(key, promise)
return promise
},
}
}
/**
* Returns canonical cache key for a byte range 'start,end'.
* Normalize int-range and suffix-range requests to the same key.
*
* @param {number} start start byte of range
* @param {number} [end] end byte of range, or undefined for suffix range
* @param {number} [size] size of file, or undefined for suffix range
* @returns {string}
*/
function cacheKey(start, end, size) {
if (start < 0) {
if (end !== undefined) throw new Error(`invalid suffix range [${start}, ${end}]`)
if (size === undefined) return `${start},`
return `${size + start},${size}`
} else if (end !== undefined) {
if (start > end) throw new Error(`invalid empty range [${start}, ${end}]`)
return `${start},${end}`
} else if (size === undefined) {
return `${start},`
} else {
return `${start},${size}`
}
}

5
src/hyparquet.d.ts vendored

@ -130,6 +130,11 @@ export function asyncBufferFromFile(filename: string): Promise<AsyncBuffer>
*/
export function byteLengthFromUrl(url: string): Promise<number>
/**
* Returns a cached layer on top of an AsyncBuffer.
*/
export function cachedAsyncBuffer(asyncBuffer: AsyncBuffer): AsyncBuffer
/**
* Parquet query options for reading data
*/

@ -9,6 +9,8 @@ export { snappyUncompress } from './snappy.js'
export { asyncBufferFromFile, asyncBufferFromUrl, byteLengthFromUrl, toJson } from './utils.js'
export { cachedAsyncBuffer } from './asyncBuffer.js'
/**
* @param {import('./hyparquet.js').ParquetReadOptions} options
* @returns {Promise<Array<Record<string, any>>>}