diff --git a/package.json b/package.json index f4d4945..88c15bb 100644 --- a/package.json +++ b/package.json @@ -27,15 +27,15 @@ "typecheck": "tsc" }, "devDependencies": { - "@types/node": "20.12.3", + "@types/node": "20.12.4", "@typescript-eslint/eslint-plugin": "7.5.0", "@vitest/coverage-v8": "1.4.0", "eslint": "8.57.0", "eslint-plugin-import": "2.29.1", - "eslint-plugin-jsdoc": "48.2.2", + "eslint-plugin-jsdoc": "48.2.3", "http-server": "14.1.1", "hysnappy": "0.3.0", - "typescript": "5.4.3", + "typescript": "5.4.4", "vitest": "1.4.0" } } diff --git a/src/assemble.js b/src/assemble.js index 373fda0..be619be 100644 --- a/src/assemble.js +++ b/src/assemble.js @@ -88,3 +88,5 @@ export function assembleObjects( return output } + +// TODO: depends on prior def level diff --git a/src/metadata.js b/src/metadata.js index 22ed256..d53c4a0 100644 --- a/src/metadata.js +++ b/src/metadata.js @@ -29,6 +29,8 @@ import { deserializeTCompactProtocol } from './thrift.js' * @returns {Promise} parquet metadata object */ export async function parquetMetadataAsync(asyncBuffer, initialFetchSize = 1 << 19 /* 512kb */) { + if (!asyncBuffer) throw new Error('parquet asyncBuffer is required') + // fetch last bytes (footer) of the file const footerOffset = Math.max(0, asyncBuffer.byteLength - initialFetchSize) const footerBuffer = await asyncBuffer.slice(footerOffset) @@ -64,12 +66,14 @@ export async function parquetMetadataAsync(asyncBuffer, initialFetchSize = 1 << } /** - * Read parquet metadata from a buffer + * Read parquet metadata from a buffer synchronously. * * @param {ArrayBuffer} arrayBuffer parquet file contents * @returns {FileMetaData} parquet metadata object */ export function parquetMetadata(arrayBuffer) { + if (!arrayBuffer) throw new Error('parquet arrayBuffer is required') + // DataView for easier manipulation of the buffer const view = new DataView(arrayBuffer) diff --git a/src/read.js b/src/read.js index 7082d0a..3d516fc 100644 --- a/src/read.js +++ b/src/read.js @@ -30,6 +30,8 @@ import { getColumnName, isMapLike } from './schema.js' * @returns {Promise} resolves when all requested rows and columns are parsed */ export async function parquetRead(options) { + if (!options.file) throw new Error('parquet file is required') + // load metadata if not provided options.metadata ||= await parquetMetadataAsync(options.file) if (!options.metadata) throw new Error('parquet metadata not found') diff --git a/test/metadata.test.js b/test/metadata.test.js index db2732f..484096d 100644 --- a/test/metadata.test.js +++ b/test/metadata.test.js @@ -17,12 +17,17 @@ describe('parquetMetadata', () => { }) }) - it('should throw an error for a too short file', () => { + it('throws for arrayBuffer undefined', () => { + // @ts-expect-error testing invalid input + expect(() => parquetMetadata(undefined)).toThrow('parquet arrayBuffer is required') + }) + + it('throws for a too short file', () => { const arrayBuffer = new ArrayBuffer(0) expect(() => parquetMetadata(arrayBuffer)).toThrow('parquet file is too short') }) - it('should throw an error for invalid metadata length', () => { + it('throws for invalid metadata length', () => { const arrayBuffer = new ArrayBuffer(12) const view = new DataView(arrayBuffer) view.setUint32(0, 0x31524150, true) // magic number PAR1 @@ -32,13 +37,13 @@ describe('parquetMetadata', () => { .toThrow('parquet metadata length 1000 exceeds available buffer 4') }) - it('should throw an error for invalid magic number', () => { + it('throws for invalid magic number', () => { const arrayBuffer = new ArrayBuffer(8) expect(() => parquetMetadata(arrayBuffer)) .toThrow('parquet file invalid (footer != PAR1)') }) - it('should throw an error for invalid metadata length', () => { + it('throws for invalid metadata length', () => { const { buffer } = new Uint8Array([255, 255, 255, 255, 80, 65, 82, 49]) expect(() => parquetMetadata(buffer)) .toThrow('parquet metadata length 4294967295 exceeds available buffer 0') @@ -56,15 +61,21 @@ describe('parquetMetadataAsync', () => { }) }) - it('should throw an error for invalid magic number', () => { + it('throws for asyncBuffer undefined', async () => { + const arrayBuffer = undefined + await expect(parquetMetadataAsync(arrayBuffer)).rejects + .toThrow('parquet asyncBuffer is required') + }) + + it('throws for invalid magic number', async () => { const { buffer } = new Uint8Array([255, 255, 255, 255, 255, 255, 255, 255]) - expect(parquetMetadataAsync(buffer)).rejects + await expect(parquetMetadataAsync(buffer)).rejects .toThrow('parquet file invalid (footer != PAR1)') }) - it('should throw an error for invalid metadata length', () => { + it('throws for invalid metadata length', async () => { const { buffer } = new Uint8Array([255, 255, 255, 255, 80, 65, 82, 49]) - expect(parquetMetadataAsync(buffer)).rejects + await expect(parquetMetadataAsync(buffer)).rejects .toThrow('parquet metadata length 4294967295 exceeds available buffer 0') }) }) diff --git a/test/read.test.js b/test/read.test.js index 7f3a651..f868e41 100644 --- a/test/read.test.js +++ b/test/read.test.js @@ -34,6 +34,12 @@ describe('parquetRead', () => { }) }) + it('throws reasonable error messages', async () => { + const file = undefined + await expect(parquetRead({ file })) + .rejects.toThrow('parquet file is required') + }) + it('should read a single column from a file', async () => { const asyncBuffer = fileToAsyncBuffer('test/files/datapage_v2.snappy.parquet') await parquetRead({