Support endpoints that don't support range requests in asyncBufferFromUrl (#57)

* Support endpoints that don't support range requests in asyncBufferFromUrl

Before this commit asyncBufferFromUrl assumes that the body of whatever
successful response it gets is equivalent to the range it requested. If
the origin server does not support HTTP range requests then this
assumption is usually wrong and will lead to parsing failures.

This commit changes asyncBufferFromUrl to change its behaviour slightly
based on the status code in the response:
- if 200 then we got the whole parquet file as the response. Save it and
  use the resulting ArrayBuffer to serve all future slice calls.
- if 206 then we got a range response and we can just return that.

I have also included some test cases to ensure that such responses are
handled correctly and also tweaked other existing mocks to also include
the relevant status code.

* Fix all lint warnings

* replace switch with if-else
This commit is contained in:
Sean Lynch 2025-01-16 14:55:05 -05:00 committed by GitHub
parent 248aeaa5e5
commit 725545731d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 64 additions and 2 deletions

@ -86,17 +86,38 @@ export async function asyncBufferFromUrl({ url, byteLength, requestInit }) {
if (!url) throw new Error('missing url')
// byte length from HEAD request
byteLength ||= await byteLengthFromUrl(url, requestInit)
/**
* A promise for the whole buffer, if range requests are not supported.
* @type {Promise<ArrayBuffer>|undefined}
*/
let buffer = undefined
const init = requestInit || {}
return {
byteLength,
async slice(start, end) {
// fetch byte range from url
if (buffer) {
return buffer.then(buffer => buffer.slice(start, end))
}
const headers = new Headers(init.headers)
const endStr = end === undefined ? '' : end - 1
headers.set('Range', `bytes=${start}-${endStr}`)
const res = await fetch(url, { ...init, headers })
if (!res.ok || !res.body) throw new Error(`fetch failed ${res.status}`)
return res.arrayBuffer()
if (res.status === 200) {
// Endpoint does not support range requests and returned the whole object
buffer = res.arrayBuffer()
return buffer.then(buffer => buffer.slice(start, end))
} else if (res.status === 206) {
// The endpoint supports range requests and sent us the requested range
return res.arrayBuffer()
} else {
throw new Error(`fetch received unexpected status code ${res.status}`)
}
},
}
}

@ -114,6 +114,7 @@ describe('asyncBufferFromUrl', () => {
global.fetch = vi.fn().mockResolvedValue({
ok: true,
body: {},
status: 206,
arrayBuffer: () => Promise.resolve(mockArrayBuffer),
})
@ -131,6 +132,7 @@ describe('asyncBufferFromUrl', () => {
global.fetch = vi.fn().mockResolvedValue({
ok: true,
body: {},
status: 206,
arrayBuffer: () => Promise.resolve(mockArrayBuffer),
})
@ -191,6 +193,7 @@ describe('asyncBufferFromUrl', () => {
return Promise.resolve({
ok: true,
body: {},
status: 206,
arrayBuffer: () => Promise.resolve(mockArrayBuffer),
})
})
@ -203,4 +206,42 @@ describe('asyncBufferFromUrl', () => {
await expect(withHeaders.slice(0, 10)).rejects.toThrow('fetch failed 404')
})
describe('when range requests are unsupported', () => {
it('creates an AsyncBuffer with the correct byte length', async () => {
const mockArrayBuffer = new ArrayBuffer(1024)
global.fetch = vi.fn().mockResolvedValue({
ok: true,
status: 200,
body: {},
arrayBuffer: () => Promise.resolve(mockArrayBuffer),
})
const buffer = await asyncBufferFromUrl({ url: 'https://example.com', byteLength: 1024 })
const chunk = await buffer.slice(0, 100)
expect(fetch).toHaveBeenCalledWith('https://example.com', {
headers: new Headers({ Range: 'bytes=0-99' }),
})
expect(chunk.byteLength).toBe(100)
})
it('does not make multiple requests for multiple slices', async () => {
const mockArrayBuffer = new ArrayBuffer(1024)
global.fetch = vi.fn().mockResolvedValue({
ok: true,
status: 200,
body: {},
arrayBuffer: () => Promise.resolve(mockArrayBuffer),
})
const buffer = await asyncBufferFromUrl({ url: 'https://example.com', byteLength: 1024 })
await buffer.slice(0, 100)
await buffer.slice(550, 600)
expect(fetch).toBeCalledTimes(1)
})
})
})