diff --git a/package.json b/package.json index 5a644c4..8bb7804 100644 --- a/package.json +++ b/package.json @@ -27,14 +27,14 @@ "typecheck": "tsc" }, "devDependencies": { - "@types/node": "20.10.8", + "@types/node": "20.11.0", "@typescript-eslint/eslint-plugin": "6.18.1", - "@vitest/coverage-v8": "1.1.3", + "@vitest/coverage-v8": "1.2.0", "eslint": "8.56.0", "eslint-plugin-import": "2.29.1", "eslint-plugin-jsdoc": "48.0.2", "http-server": "14.1.1", "typescript": "5.3.3", - "vitest": "1.1.3" + "vitest": "1.2.0" } } diff --git a/src/metadata.js b/src/metadata.js index a969805..9cf61d2 100644 --- a/src/metadata.js +++ b/src/metadata.js @@ -23,11 +23,11 @@ export function parquetMetadata(arrayBuffer) { // Metadata length is 4 bytes before the last PAR1 const metadataLengthOffset = view.byteLength - 8 const metadataLength = view.getUint32(view.byteLength - 8, true) - if (metadataLength <= 0 || metadataLength > metadataLengthOffset) { - throw new Error('parquet file invalid metadata length') + if (metadataLength <= 0) { + throw new Error('parquet invalid metadata length') } if (metadataLength > view.byteLength - 8) { - throw new Error('parquet file metadata length exceeds file size') + throw new Error('parquet metadata length exceeds buffer size') } const metadataOffset = metadataLengthOffset - metadataLength @@ -98,5 +98,6 @@ export function parquetMetadata(arrayBuffer) { row_groups, key_value_metadata, created_by, + metadata_length: metadataLength, } } diff --git a/src/types.d.ts b/src/types.d.ts index daa27b8..6ca0b76 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -15,6 +15,7 @@ export interface FileMetaData { row_groups: RowGroup[] key_value_metadata?: KeyValue[] created_by?: string + metadata_length: number } export interface SchemaElement { diff --git a/test/metadata.test.js b/test/metadata.test.js index 6406f44..1a8746b 100644 --- a/test/metadata.test.js +++ b/test/metadata.test.js @@ -22,6 +22,8 @@ describe('parquetMetadata', () => { // Parquet v1 from DuckDB const expectedMetadata = { version: 1, + created_by: 'DuckDB', + metadata_length: 149, schema: [ { repetition_type: 0, name: 'duckdb_schema', num_children: 1 }, { type: 6, repetition_type: 1, name: 'ADDRTYPE', converted_type: 0 }, @@ -55,7 +57,6 @@ describe('parquetMetadata', () => { num_rows: 10, }, ], - created_by: 'DuckDB', } const casted = toJson(result) @@ -69,6 +70,8 @@ describe('parquetMetadata', () => { // Parquet v2 from pandas with 2 row groups const expectedMetadata = { version: 2, + created_by: 'parquet-cpp-arrow version 14.0.2', + metadata_length: 1602, schema: [ { repetition_type: 0, @@ -153,7 +156,6 @@ describe('parquetMetadata', () => { // value: base64 }, ], - created_by: 'parquet-cpp-arrow version 14.0.2', } const casted = toJson(result)