mirror of
https://github.com/asadbek064/hyparquet.git
synced 2026-01-03 18:26:38 +00:00
Export cachedAsyncBuffer
This commit is contained in:
parent
da37b512d0
commit
5d21b09b7a
@ -1,50 +0,0 @@
|
||||
import type { AsyncBuffer, Awaitable } from "../src/types.js"
|
||||
|
||||
/**
|
||||
* Returns a caches layer on top of an AsyncBuffer.
|
||||
* This is useful for caching slices of a file that are read multiple times,
|
||||
* possibly over a network.
|
||||
*
|
||||
* TODO: require data to be loaded with preload(), reads outside of preload rejected.
|
||||
*
|
||||
* @param {AsyncBuffer} file file-like object to cache
|
||||
* @returns {AsyncBuffer} cached file-like object
|
||||
*/
|
||||
export function cachedAsyncBuffer(file: AsyncBuffer): AsyncBuffer {
|
||||
// indexed by 'start,end'
|
||||
const cache = new Map<string, Awaitable<ArrayBuffer>>()
|
||||
return {
|
||||
byteLength: file.byteLength,
|
||||
slice(start: number, end?: number): Awaitable<ArrayBuffer> {
|
||||
// ensure both "100-200" and "100-" are both cached the same
|
||||
const key = cacheKey(start, end, file.byteLength)
|
||||
const cached = cache.get(key)
|
||||
if (cached) return cached
|
||||
// cache miss, read from file
|
||||
const promise = file.slice(start, end)
|
||||
cache.set(key, promise)
|
||||
return promise
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns canonical cache key for a byte range.
|
||||
* Cache key is a string of the form 'start,end'.
|
||||
* Attempts to normalize int-range and suffix-range requests to the same key.
|
||||
*/
|
||||
function cacheKey(start: number, end: number | undefined, fileSize: number | undefined): string {
|
||||
if (start < 0) {
|
||||
if (end !== undefined) throw new Error(`invalid suffix range [${start}, ${end}]`)
|
||||
if (fileSize === undefined) return `${start},`
|
||||
return `${fileSize + start},${fileSize}`
|
||||
} else if (end !== undefined) {
|
||||
if (start > end) throw new Error(`invalid empty range [${start}, ${end}]`)
|
||||
return `${start},${end}`
|
||||
} else if (fileSize === undefined) {
|
||||
return `${start},`
|
||||
} else {
|
||||
return `${start},${fileSize}`
|
||||
}
|
||||
}
|
||||
@ -1,6 +1,6 @@
|
||||
import { cachedAsyncBuffer } from '../../src/asyncBuffer.js'
|
||||
import type { AsyncBuffer, FileMetaData } from '../../src/hyparquet.js'
|
||||
import { asyncBufferFromUrl } from '../../src/utils.js'
|
||||
import { cachedAsyncBuffer } from '../asyncBuffer.js'
|
||||
|
||||
// Serializable constructors for AsyncBuffers
|
||||
interface AsyncBufferFromFile {
|
||||
|
||||
54
src/asyncBuffer.js
Normal file
54
src/asyncBuffer.js
Normal file
@ -0,0 +1,54 @@
|
||||
|
||||
/**
|
||||
* Returns a cached layer on top of an AsyncBuffer. For caching slices of a file
|
||||
* that are read multiple times, possibly over a network.
|
||||
*
|
||||
* @typedef {import('./types.js').AsyncBuffer} AsyncBuffer
|
||||
* @param {AsyncBuffer} file file-like object to cache
|
||||
* @returns {AsyncBuffer} cached file-like object
|
||||
*/
|
||||
export function cachedAsyncBuffer({ byteLength, slice }) {
|
||||
const cache = new Map()
|
||||
return {
|
||||
byteLength,
|
||||
/**
|
||||
* @param {number} start
|
||||
* @param {number} [end]
|
||||
* @returns {import('./types.js').Awaitable<ArrayBuffer>}
|
||||
*/
|
||||
slice(start, end) {
|
||||
const key = cacheKey(start, end, byteLength)
|
||||
const cached = cache.get(key)
|
||||
if (cached) return cached
|
||||
// cache miss, read from file
|
||||
const promise = slice(start, end)
|
||||
cache.set(key, promise)
|
||||
return promise
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns canonical cache key for a byte range 'start,end'.
|
||||
* Normalize int-range and suffix-range requests to the same key.
|
||||
*
|
||||
* @param {number} start start byte of range
|
||||
* @param {number} [end] end byte of range, or undefined for suffix range
|
||||
* @param {number} [size] size of file, or undefined for suffix range
|
||||
* @returns {string}
|
||||
*/
|
||||
function cacheKey(start, end, size) {
|
||||
if (start < 0) {
|
||||
if (end !== undefined) throw new Error(`invalid suffix range [${start}, ${end}]`)
|
||||
if (size === undefined) return `${start},`
|
||||
return `${size + start},${size}`
|
||||
} else if (end !== undefined) {
|
||||
if (start > end) throw new Error(`invalid empty range [${start}, ${end}]`)
|
||||
return `${start},${end}`
|
||||
} else if (size === undefined) {
|
||||
return `${start},`
|
||||
} else {
|
||||
return `${start},${size}`
|
||||
}
|
||||
}
|
||||
5
src/hyparquet.d.ts
vendored
5
src/hyparquet.d.ts
vendored
@ -130,6 +130,11 @@ export function asyncBufferFromFile(filename: string): Promise<AsyncBuffer>
|
||||
*/
|
||||
export function byteLengthFromUrl(url: string): Promise<number>
|
||||
|
||||
/**
|
||||
* Returns a cached layer on top of an AsyncBuffer.
|
||||
*/
|
||||
export function cachedAsyncBuffer(asyncBuffer: AsyncBuffer): AsyncBuffer
|
||||
|
||||
/**
|
||||
* Parquet query options for reading data
|
||||
*/
|
||||
|
||||
@ -9,6 +9,8 @@ export { snappyUncompress } from './snappy.js'
|
||||
|
||||
export { asyncBufferFromFile, asyncBufferFromUrl, byteLengthFromUrl, toJson } from './utils.js'
|
||||
|
||||
export { cachedAsyncBuffer } from './asyncBuffer.js'
|
||||
|
||||
/**
|
||||
* @param {import('./hyparquet.js').ParquetReadOptions} options
|
||||
* @returns {Promise<Array<Record<string, any>>>}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user