mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Error handling for missing byteLength
This commit is contained in:
parent
d0213bf0f1
commit
4b7d864319
@ -3,21 +3,22 @@ import { snappyUncompressor } from 'hysnappy'
|
||||
import { parquetRead } from './src/hyparquet.js'
|
||||
|
||||
const url = 'https://huggingface.co/datasets/wikimedia/wikipedia/resolve/main/20231101.en/train-00000-of-00041.parquet'
|
||||
const filename = 'example.parquet'
|
||||
|
||||
// download test parquet file if needed
|
||||
let stat = await fs.stat('example.parquet').catch(() => undefined)
|
||||
let stat = await fs.stat(filename).catch(() => undefined)
|
||||
if (!stat) {
|
||||
console.log('downloading ' + url)
|
||||
const res = await fetch(url)
|
||||
if (!res.ok) throw new Error(res.statusText)
|
||||
// write to file async
|
||||
const writeStream = createWriteStream('example.parquet')
|
||||
const writeStream = createWriteStream(filename)
|
||||
for await (const chunk of res.body) {
|
||||
writeStream.write(chunk)
|
||||
}
|
||||
writeStream.end()
|
||||
console.log('downloaded example.parquet')
|
||||
stat = await fs.stat('example.parquet').catch(() => undefined)
|
||||
stat = await fs.stat(filename).catch(() => undefined)
|
||||
}
|
||||
|
||||
// asyncBuffer
|
||||
@ -25,7 +26,7 @@ const file = {
|
||||
byteLength: stat.size,
|
||||
async slice(start, end) {
|
||||
// read file slice
|
||||
const readStream = createReadStream('example.parquet', { start, end })
|
||||
const readStream = createReadStream(filename, { start, end })
|
||||
const buffer = await readStreamToArrayBuffer(readStream)
|
||||
return new Uint8Array(buffer).buffer
|
||||
},
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
"type": "module",
|
||||
"types": "src/hyparquet.d.ts",
|
||||
"scripts": {
|
||||
"coverage": "vitest run --coverage",
|
||||
"coverage": "vitest run --coverage --coverage.include=src",
|
||||
"demo": "http-server -o",
|
||||
"lint": "eslint .",
|
||||
"test": "vitest run",
|
||||
@ -28,14 +28,14 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "20.12.7",
|
||||
"@typescript-eslint/eslint-plugin": "7.6.0",
|
||||
"@vitest/coverage-v8": "1.4.0",
|
||||
"@typescript-eslint/eslint-plugin": "7.7.0",
|
||||
"@vitest/coverage-v8": "1.5.0",
|
||||
"eslint": "8.57.0",
|
||||
"eslint-plugin-import": "2.29.1",
|
||||
"eslint-plugin-jsdoc": "48.2.3",
|
||||
"http-server": "14.1.1",
|
||||
"hysnappy": "0.3.0",
|
||||
"typescript": "5.4.5",
|
||||
"vitest": "1.4.0"
|
||||
"vitest": "1.5.0"
|
||||
}
|
||||
}
|
||||
|
||||
@ -29,11 +29,12 @@ import { deserializeTCompactProtocol } from './thrift.js'
|
||||
* @returns {Promise<FileMetaData>} parquet metadata object
|
||||
*/
|
||||
export async function parquetMetadataAsync(asyncBuffer, initialFetchSize = 1 << 19 /* 512kb */) {
|
||||
if (!asyncBuffer) throw new Error('parquet asyncBuffer is required')
|
||||
if (!asyncBuffer) throw new Error('parquet file is required')
|
||||
if (!(asyncBuffer.byteLength >= 0)) throw new Error('parquet file byteLength is required')
|
||||
|
||||
// fetch last bytes (footer) of the file
|
||||
const footerOffset = Math.max(0, asyncBuffer.byteLength - initialFetchSize)
|
||||
const footerBuffer = await asyncBuffer.slice(footerOffset)
|
||||
const footerBuffer = await asyncBuffer.slice(footerOffset, asyncBuffer.byteLength)
|
||||
|
||||
// Check for parquet magic number "PAR1"
|
||||
const footerView = new DataView(footerBuffer)
|
||||
@ -72,7 +73,7 @@ export async function parquetMetadataAsync(asyncBuffer, initialFetchSize = 1 <<
|
||||
* @returns {FileMetaData} parquet metadata object
|
||||
*/
|
||||
export function parquetMetadata(arrayBuffer) {
|
||||
if (!arrayBuffer) throw new Error('parquet arrayBuffer is required')
|
||||
if (!arrayBuffer) throw new Error('parquet file is required')
|
||||
const view = new DataView(arrayBuffer)
|
||||
|
||||
// Validate footer magic number "PAR1"
|
||||
|
||||
@ -19,7 +19,7 @@ describe('parquetMetadata', () => {
|
||||
|
||||
it('throws for arrayBuffer undefined', () => {
|
||||
// @ts-expect-error testing invalid input
|
||||
expect(() => parquetMetadata(undefined)).toThrow('parquet arrayBuffer is required')
|
||||
expect(() => parquetMetadata(undefined)).toThrow('parquet file is required')
|
||||
})
|
||||
|
||||
it('throws for a too short file', () => {
|
||||
@ -64,7 +64,7 @@ describe('parquetMetadataAsync', () => {
|
||||
it('throws for asyncBuffer undefined', async () => {
|
||||
const arrayBuffer = undefined
|
||||
await expect(parquetMetadataAsync(arrayBuffer)).rejects
|
||||
.toThrow('parquet asyncBuffer is required')
|
||||
.toThrow('parquet file is required')
|
||||
})
|
||||
|
||||
it('throws for invalid magic number', async () => {
|
||||
|
||||
@ -10,6 +10,12 @@ describe('parquetRead', () => {
|
||||
.rejects.toThrow('parquet file is required')
|
||||
})
|
||||
|
||||
it('throws error for undefined byteLength', async () => {
|
||||
const file = { byteLength: undefined, slice: () => new ArrayBuffer(0) }
|
||||
await expect(parquetRead({ file }))
|
||||
.rejects.toThrow('parquet file byteLength is required')
|
||||
})
|
||||
|
||||
it('filter by row', async () => {
|
||||
const file = fileToAsyncBuffer('test/files/rowgroups.parquet')
|
||||
await parquetRead({
|
||||
|
||||
Loading…
Reference in New Issue
Block a user