Support endpoints that don't support range requests in asyncBufferFromUrl (#57)

* Support endpoints that don't support range requests in asyncBufferFromUrl Before this commit asyncBufferFromUrl assumes that the body of whatever successful response it gets is equivalent to the range it requested. If the origin server does not support HTTP range requests then this assumption is usually wrong and will lead to parsing failures. This commit changes asyncBufferFromUrl to change its behaviour slightly based on the status code in the response: - if 200 then we got the whole parquet file as the response. Save it and use the resulting ArrayBuffer to serve all future slice calls. - if 206 then we got a range response and we can just return that. I have also included some test cases to ensure that such responses are handled correctly and also tweaked other existing mocks to also include the relevant status code. * Fix all lint warnings * replace switch with if-else
2026-02-23 21:01:32 +00:00 · 2025-01-16 14:55:05 -05:00 · 2025-01-16 14:55:05 -05:00 · 725545731d
commit 725545731d
parent 248aeaa5e5
2 changed files with 64 additions and 2 deletions
--- a/src/utils.js
+++ b/src/utils.js
@ -86,17 +86,38 @@ export async function asyncBufferFromUrl({ url, byteLength, requestInit }) {
  if (!url) throw new Error('missing url')
  // byte length from HEAD request
  byteLength ||= await byteLengthFromUrl(url, requestInit)
+
+  /**
+   * A promise for the whole buffer, if range requests are not supported.
+   * @type {Promise<ArrayBuffer>|undefined}
+   */
+  let buffer = undefined
  const init = requestInit || {}
+
  return {
    byteLength,
    async slice(start, end) {
-      // fetch byte range from url
+      if (buffer) {
+        return buffer.then(buffer => buffer.slice(start, end))
+      }
+
      const headers = new Headers(init.headers)
      const endStr = end === undefined ? '' : end - 1
      headers.set('Range', `bytes=${start}-${endStr}`)
+
      const res = await fetch(url, { ...init, headers })
      if (!res.ok || !res.body) throw new Error(`fetch failed ${res.status}`)
-      return res.arrayBuffer()
+
+      if (res.status === 200) {
+        // Endpoint does not support range requests and returned the whole object
+        buffer = res.arrayBuffer()
+        return buffer.then(buffer => buffer.slice(start, end))
+      } else if (res.status === 206) {
+        // The endpoint supports range requests and sent us the requested range
+        return res.arrayBuffer()
+      } else {
+        throw new Error(`fetch received unexpected status code ${res.status}`)
+      }
    },
  }
 }
--- a/test/utils.test.js
+++ b/test/utils.test.js
@ -114,6 +114,7 @@ describe('asyncBufferFromUrl', () => {
    global.fetch = vi.fn().mockResolvedValue({
      ok: true,
      body: {},
+      status: 206,
      arrayBuffer: () => Promise.resolve(mockArrayBuffer),
    })

@ -131,6 +132,7 @@ describe('asyncBufferFromUrl', () => {
    global.fetch = vi.fn().mockResolvedValue({
      ok: true,
      body: {},
+      status: 206,
      arrayBuffer: () => Promise.resolve(mockArrayBuffer),
    })

@ -191,6 +193,7 @@ describe('asyncBufferFromUrl', () => {
      return Promise.resolve({
        ok: true,
        body: {},
+        status: 206,
        arrayBuffer: () => Promise.resolve(mockArrayBuffer),
      })
    })
@ -203,4 +206,42 @@ describe('asyncBufferFromUrl', () => {

    await expect(withHeaders.slice(0, 10)).rejects.toThrow('fetch failed 404')
  })
+
+  describe('when range requests are unsupported', () => {
+    it('creates an AsyncBuffer with the correct byte length', async () => {
+      const mockArrayBuffer = new ArrayBuffer(1024)
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        status: 200,
+        body: {},
+        arrayBuffer: () => Promise.resolve(mockArrayBuffer),
+      })
+
+      const buffer = await asyncBufferFromUrl({ url: 'https://example.com', byteLength: 1024 })
+      const chunk = await buffer.slice(0, 100)
+
+      expect(fetch).toHaveBeenCalledWith('https://example.com', {
+        headers: new Headers({ Range: 'bytes=0-99' }),
+      })
+
+      expect(chunk.byteLength).toBe(100)
+    })
+
+    it('does not make multiple requests for multiple slices', async () => {
+      const mockArrayBuffer = new ArrayBuffer(1024)
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        status: 200,
+        body: {},
+        arrayBuffer: () => Promise.resolve(mockArrayBuffer),
+      })
+
+      const buffer = await asyncBufferFromUrl({ url: 'https://example.com', byteLength: 1024 })
+
+      await buffer.slice(0, 100)
+      await buffer.slice(550, 600)
+
+      expect(fetch).toBeCalledTimes(1)
+    })
+  })
 })