Include metadata length in metadata

This commit is contained in:
Kenny Daniel 2024-01-12 14:35:20 -08:00
parent b01bfa8422
commit 03727d0156
No known key found for this signature in database
GPG Key ID: 6A3C5E318BE71391
4 changed files with 12 additions and 8 deletions

@ -27,14 +27,14 @@
"typecheck": "tsc"
},
"devDependencies": {
"@types/node": "20.10.8",
"@types/node": "20.11.0",
"@typescript-eslint/eslint-plugin": "6.18.1",
"@vitest/coverage-v8": "1.1.3",
"@vitest/coverage-v8": "1.2.0",
"eslint": "8.56.0",
"eslint-plugin-import": "2.29.1",
"eslint-plugin-jsdoc": "48.0.2",
"http-server": "14.1.1",
"typescript": "5.3.3",
"vitest": "1.1.3"
"vitest": "1.2.0"
}
}

@ -23,11 +23,11 @@ export function parquetMetadata(arrayBuffer) {
// Metadata length is 4 bytes before the last PAR1
const metadataLengthOffset = view.byteLength - 8
const metadataLength = view.getUint32(view.byteLength - 8, true)
if (metadataLength <= 0 || metadataLength > metadataLengthOffset) {
throw new Error('parquet file invalid metadata length')
if (metadataLength <= 0) {
throw new Error('parquet invalid metadata length')
}
if (metadataLength > view.byteLength - 8) {
throw new Error('parquet file metadata length exceeds file size')
throw new Error('parquet metadata length exceeds buffer size')
}
const metadataOffset = metadataLengthOffset - metadataLength
@ -98,5 +98,6 @@ export function parquetMetadata(arrayBuffer) {
row_groups,
key_value_metadata,
created_by,
metadata_length: metadataLength,
}
}

1
src/types.d.ts vendored

@ -15,6 +15,7 @@ export interface FileMetaData {
row_groups: RowGroup[]
key_value_metadata?: KeyValue[]
created_by?: string
metadata_length: number
}
export interface SchemaElement {

@ -22,6 +22,8 @@ describe('parquetMetadata', () => {
// Parquet v1 from DuckDB
const expectedMetadata = {
version: 1,
created_by: 'DuckDB',
metadata_length: 149,
schema: [
{ repetition_type: 0, name: 'duckdb_schema', num_children: 1 },
{ type: 6, repetition_type: 1, name: 'ADDRTYPE', converted_type: 0 },
@ -55,7 +57,6 @@ describe('parquetMetadata', () => {
num_rows: 10,
},
],
created_by: 'DuckDB',
}
const casted = toJson(result)
@ -69,6 +70,8 @@ describe('parquetMetadata', () => {
// Parquet v2 from pandas with 2 row groups
const expectedMetadata = {
version: 2,
created_by: 'parquet-cpp-arrow version 14.0.2',
metadata_length: 1602,
schema: [
{
repetition_type: 0,
@ -153,7 +156,6 @@ describe('parquetMetadata', () => {
// value: base64
},
],
created_by: 'parquet-cpp-arrow version 14.0.2',
}
const casted = toJson(result)