From 725545731dc84c4c6ef88e15987c05527da63246 Mon Sep 17 00:00:00 2001 From: Sean Lynch <42618346+swlynch99@users.noreply.github.com> Date: Thu, 16 Jan 2025 14:55:05 -0500 Subject: [PATCH] Support endpoints that don't support range requests in `asyncBufferFromUrl` (#57) * Support endpoints that don't support range requests in asyncBufferFromUrl Before this commit asyncBufferFromUrl assumes that the body of whatever successful response it gets is equivalent to the range it requested. If the origin server does not support HTTP range requests then this assumption is usually wrong and will lead to parsing failures. This commit changes asyncBufferFromUrl to change its behaviour slightly based on the status code in the response: - if 200 then we got the whole parquet file as the response. Save it and use the resulting ArrayBuffer to serve all future slice calls. - if 206 then we got a range response and we can just return that. I have also included some test cases to ensure that such responses are handled correctly and also tweaked other existing mocks to also include the relevant status code. * Fix all lint warnings * replace switch with if-else --- src/utils.js | 25 +++++++++++++++++++++++-- test/utils.test.js | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/utils.js b/src/utils.js index 1f9928e..f408421 100644 --- a/src/utils.js +++ b/src/utils.js @@ -86,17 +86,38 @@ export async function asyncBufferFromUrl({ url, byteLength, requestInit }) { if (!url) throw new Error('missing url') // byte length from HEAD request byteLength ||= await byteLengthFromUrl(url, requestInit) + + /** + * A promise for the whole buffer, if range requests are not supported. + * @type {Promise|undefined} + */ + let buffer = undefined const init = requestInit || {} + return { byteLength, async slice(start, end) { - // fetch byte range from url + if (buffer) { + return buffer.then(buffer => buffer.slice(start, end)) + } + const headers = new Headers(init.headers) const endStr = end === undefined ? '' : end - 1 headers.set('Range', `bytes=${start}-${endStr}`) + const res = await fetch(url, { ...init, headers }) if (!res.ok || !res.body) throw new Error(`fetch failed ${res.status}`) - return res.arrayBuffer() + + if (res.status === 200) { + // Endpoint does not support range requests and returned the whole object + buffer = res.arrayBuffer() + return buffer.then(buffer => buffer.slice(start, end)) + } else if (res.status === 206) { + // The endpoint supports range requests and sent us the requested range + return res.arrayBuffer() + } else { + throw new Error(`fetch received unexpected status code ${res.status}`) + } }, } } diff --git a/test/utils.test.js b/test/utils.test.js index 6a40764..d962fa2 100644 --- a/test/utils.test.js +++ b/test/utils.test.js @@ -114,6 +114,7 @@ describe('asyncBufferFromUrl', () => { global.fetch = vi.fn().mockResolvedValue({ ok: true, body: {}, + status: 206, arrayBuffer: () => Promise.resolve(mockArrayBuffer), }) @@ -131,6 +132,7 @@ describe('asyncBufferFromUrl', () => { global.fetch = vi.fn().mockResolvedValue({ ok: true, body: {}, + status: 206, arrayBuffer: () => Promise.resolve(mockArrayBuffer), }) @@ -191,6 +193,7 @@ describe('asyncBufferFromUrl', () => { return Promise.resolve({ ok: true, body: {}, + status: 206, arrayBuffer: () => Promise.resolve(mockArrayBuffer), }) }) @@ -203,4 +206,42 @@ describe('asyncBufferFromUrl', () => { await expect(withHeaders.slice(0, 10)).rejects.toThrow('fetch failed 404') }) + + describe('when range requests are unsupported', () => { + it('creates an AsyncBuffer with the correct byte length', async () => { + const mockArrayBuffer = new ArrayBuffer(1024) + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + status: 200, + body: {}, + arrayBuffer: () => Promise.resolve(mockArrayBuffer), + }) + + const buffer = await asyncBufferFromUrl({ url: 'https://example.com', byteLength: 1024 }) + const chunk = await buffer.slice(0, 100) + + expect(fetch).toHaveBeenCalledWith('https://example.com', { + headers: new Headers({ Range: 'bytes=0-99' }), + }) + + expect(chunk.byteLength).toBe(100) + }) + + it('does not make multiple requests for multiple slices', async () => { + const mockArrayBuffer = new ArrayBuffer(1024) + global.fetch = vi.fn().mockResolvedValue({ + ok: true, + status: 200, + body: {}, + arrayBuffer: () => Promise.resolve(mockArrayBuffer), + }) + + const buffer = await asyncBufferFromUrl({ url: 'https://example.com', byteLength: 1024 }) + + await buffer.slice(0, 100) + await buffer.slice(550, 600) + + expect(fetch).toBeCalledTimes(1) + }) + }) })