mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-31 01:16:37 +00:00
Error handling for undefined parquet file
This commit is contained in:
parent
e3b3ddafa7
commit
48dc10fd18
@ -27,15 +27,15 @@
|
||||
"typecheck": "tsc"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "20.12.3",
|
||||
"@types/node": "20.12.4",
|
||||
"@typescript-eslint/eslint-plugin": "7.5.0",
|
||||
"@vitest/coverage-v8": "1.4.0",
|
||||
"eslint": "8.57.0",
|
||||
"eslint-plugin-import": "2.29.1",
|
||||
"eslint-plugin-jsdoc": "48.2.2",
|
||||
"eslint-plugin-jsdoc": "48.2.3",
|
||||
"http-server": "14.1.1",
|
||||
"hysnappy": "0.3.0",
|
||||
"typescript": "5.4.3",
|
||||
"typescript": "5.4.4",
|
||||
"vitest": "1.4.0"
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,3 +88,5 @@ export function assembleObjects(
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
// TODO: depends on prior def level
|
||||
|
||||
@ -29,6 +29,8 @@ import { deserializeTCompactProtocol } from './thrift.js'
|
||||
* @returns {Promise<FileMetaData>} parquet metadata object
|
||||
*/
|
||||
export async function parquetMetadataAsync(asyncBuffer, initialFetchSize = 1 << 19 /* 512kb */) {
|
||||
if (!asyncBuffer) throw new Error('parquet asyncBuffer is required')
|
||||
|
||||
// fetch last bytes (footer) of the file
|
||||
const footerOffset = Math.max(0, asyncBuffer.byteLength - initialFetchSize)
|
||||
const footerBuffer = await asyncBuffer.slice(footerOffset)
|
||||
@ -64,12 +66,14 @@ export async function parquetMetadataAsync(asyncBuffer, initialFetchSize = 1 <<
|
||||
}
|
||||
|
||||
/**
|
||||
* Read parquet metadata from a buffer
|
||||
* Read parquet metadata from a buffer synchronously.
|
||||
*
|
||||
* @param {ArrayBuffer} arrayBuffer parquet file contents
|
||||
* @returns {FileMetaData} parquet metadata object
|
||||
*/
|
||||
export function parquetMetadata(arrayBuffer) {
|
||||
if (!arrayBuffer) throw new Error('parquet arrayBuffer is required')
|
||||
|
||||
// DataView for easier manipulation of the buffer
|
||||
const view = new DataView(arrayBuffer)
|
||||
|
||||
|
||||
@ -30,6 +30,8 @@ import { getColumnName, isMapLike } from './schema.js'
|
||||
* @returns {Promise<void>} resolves when all requested rows and columns are parsed
|
||||
*/
|
||||
export async function parquetRead(options) {
|
||||
if (!options.file) throw new Error('parquet file is required')
|
||||
|
||||
// load metadata if not provided
|
||||
options.metadata ||= await parquetMetadataAsync(options.file)
|
||||
if (!options.metadata) throw new Error('parquet metadata not found')
|
||||
|
||||
@ -17,12 +17,17 @@ describe('parquetMetadata', () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('should throw an error for a too short file', () => {
|
||||
it('throws for arrayBuffer undefined', () => {
|
||||
// @ts-expect-error testing invalid input
|
||||
expect(() => parquetMetadata(undefined)).toThrow('parquet arrayBuffer is required')
|
||||
})
|
||||
|
||||
it('throws for a too short file', () => {
|
||||
const arrayBuffer = new ArrayBuffer(0)
|
||||
expect(() => parquetMetadata(arrayBuffer)).toThrow('parquet file is too short')
|
||||
})
|
||||
|
||||
it('should throw an error for invalid metadata length', () => {
|
||||
it('throws for invalid metadata length', () => {
|
||||
const arrayBuffer = new ArrayBuffer(12)
|
||||
const view = new DataView(arrayBuffer)
|
||||
view.setUint32(0, 0x31524150, true) // magic number PAR1
|
||||
@ -32,13 +37,13 @@ describe('parquetMetadata', () => {
|
||||
.toThrow('parquet metadata length 1000 exceeds available buffer 4')
|
||||
})
|
||||
|
||||
it('should throw an error for invalid magic number', () => {
|
||||
it('throws for invalid magic number', () => {
|
||||
const arrayBuffer = new ArrayBuffer(8)
|
||||
expect(() => parquetMetadata(arrayBuffer))
|
||||
.toThrow('parquet file invalid (footer != PAR1)')
|
||||
})
|
||||
|
||||
it('should throw an error for invalid metadata length', () => {
|
||||
it('throws for invalid metadata length', () => {
|
||||
const { buffer } = new Uint8Array([255, 255, 255, 255, 80, 65, 82, 49])
|
||||
expect(() => parquetMetadata(buffer))
|
||||
.toThrow('parquet metadata length 4294967295 exceeds available buffer 0')
|
||||
@ -56,15 +61,21 @@ describe('parquetMetadataAsync', () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('should throw an error for invalid magic number', () => {
|
||||
it('throws for asyncBuffer undefined', async () => {
|
||||
const arrayBuffer = undefined
|
||||
await expect(parquetMetadataAsync(arrayBuffer)).rejects
|
||||
.toThrow('parquet asyncBuffer is required')
|
||||
})
|
||||
|
||||
it('throws for invalid magic number', async () => {
|
||||
const { buffer } = new Uint8Array([255, 255, 255, 255, 255, 255, 255, 255])
|
||||
expect(parquetMetadataAsync(buffer)).rejects
|
||||
await expect(parquetMetadataAsync(buffer)).rejects
|
||||
.toThrow('parquet file invalid (footer != PAR1)')
|
||||
})
|
||||
|
||||
it('should throw an error for invalid metadata length', () => {
|
||||
it('throws for invalid metadata length', async () => {
|
||||
const { buffer } = new Uint8Array([255, 255, 255, 255, 80, 65, 82, 49])
|
||||
expect(parquetMetadataAsync(buffer)).rejects
|
||||
await expect(parquetMetadataAsync(buffer)).rejects
|
||||
.toThrow('parquet metadata length 4294967295 exceeds available buffer 0')
|
||||
})
|
||||
})
|
||||
|
||||
@ -34,6 +34,12 @@ describe('parquetRead', () => {
|
||||
})
|
||||
})
|
||||
|
||||
it('throws reasonable error messages', async () => {
|
||||
const file = undefined
|
||||
await expect(parquetRead({ file }))
|
||||
.rejects.toThrow('parquet file is required')
|
||||
})
|
||||
|
||||
it('should read a single column from a file', async () => {
|
||||
const asyncBuffer = fileToAsyncBuffer('test/files/datapage_v2.snappy.parquet')
|
||||
await parquetRead({
|
||||
|
||||
Loading…
Reference in New Issue
Block a user