mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Metadata tests
This commit is contained in:
parent
88ff71d924
commit
6616c2f7af
@ -10,7 +10,7 @@
|
||||
"@typescript-eslint/no-explicit-any": "warn",
|
||||
"@typescript-eslint/no-unused-vars": "warn",
|
||||
"arrow-spacing": "error",
|
||||
"camelcase": "error",
|
||||
"camelcase": "off",
|
||||
"comma-spacing": "error",
|
||||
"comma-dangle": ["error", "always-multiline"],
|
||||
"eol-last": "error",
|
||||
@ -32,7 +32,7 @@
|
||||
"jsdoc/check-tag-names": "error",
|
||||
"jsdoc/no-types": "error",
|
||||
"jsdoc/sort-tags": "error",
|
||||
"no-constant-condition": "warn",
|
||||
"no-constant-condition": "off",
|
||||
"no-multi-spaces": "error",
|
||||
"no-trailing-spaces": "error",
|
||||
"no-var": "error",
|
||||
|
||||
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
@ -4,7 +4,7 @@ on:
|
||||
push:
|
||||
|
||||
jobs:
|
||||
hyllama:
|
||||
hyparquet:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
11
package.json
11
package.json
@ -9,7 +9,7 @@
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/hyparam/hyllama"
|
||||
"url": "https://github.com/hyparam/hyparquet"
|
||||
},
|
||||
"main": "dist/hyparquet.js",
|
||||
"files": [
|
||||
@ -24,12 +24,13 @@
|
||||
"test": "vitest run"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@typescript-eslint/eslint-plugin": "6.16.0",
|
||||
"@vitest/coverage-v8": "1.1.0",
|
||||
"@types/node": "20.10.6",
|
||||
"@typescript-eslint/eslint-plugin": "6.17.0",
|
||||
"@vitest/coverage-v8": "1.1.1",
|
||||
"eslint": "8.56.0",
|
||||
"eslint-plugin-import": "2.29.1",
|
||||
"eslint-plugin-jsdoc": "46.9.1",
|
||||
"eslint-plugin-jsdoc": "48.0.2",
|
||||
"typescript": "5.3.3",
|
||||
"vitest": "1.1.0"
|
||||
"vitest": "1.1.1"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { parquetMetadata } from './metadata'
|
||||
import { parquetMetadata } from './metadata.js'
|
||||
|
||||
/**
|
||||
* Read parquet data rows from a file
|
||||
|
||||
@ -1,12 +1,11 @@
|
||||
import { deserializeTCompactProtocol } from './thrift'
|
||||
import type { FileMetaData, SchemaElement } from './types'
|
||||
import { deserializeTCompactProtocol } from './thrift.js'
|
||||
import type { FileMetaData, SchemaElement } from './types.ts'
|
||||
|
||||
/**
|
||||
* Read parquet header, metadata, and schema information from a file
|
||||
* @param arrayBuffer parquet file contents
|
||||
* @returns metadata object
|
||||
*/
|
||||
/* eslint-disable camelcase */
|
||||
export function parquetMetadata(arrayBuffer: ArrayBuffer): FileMetaData {
|
||||
// DataView for easier manipulation of the buffer
|
||||
const view = new DataView(arrayBuffer)
|
||||
@ -110,3 +109,22 @@ export function schemaElement(schema: SchemaElement[], name: string[]): any {
|
||||
}
|
||||
return element
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace bigints with numbers.
|
||||
*/
|
||||
export function castBigInts(obj: any): any {
|
||||
if (typeof obj === 'bigint') {
|
||||
return Number(obj)
|
||||
} else if (Array.isArray(obj)) {
|
||||
return obj.map(castBigInts)
|
||||
} else if (typeof obj === 'object') {
|
||||
const newObj = {}
|
||||
for (const key of Object.keys(obj)) {
|
||||
newObj[key] = castBigInts(obj[key])
|
||||
}
|
||||
return newObj
|
||||
} else {
|
||||
return obj
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { Decoded } from './types'
|
||||
import type { Decoded } from './types.ts'
|
||||
|
||||
// TCompactProtocol types
|
||||
const CompactType = {
|
||||
|
||||
BIN
test/files/addrtype-missing-value.parquet
Normal file
BIN
test/files/addrtype-missing-value.parquet
Normal file
Binary file not shown.
67
test/metadata.test.ts
Normal file
67
test/metadata.test.ts
Normal file
@ -0,0 +1,67 @@
|
||||
import { promises as fs } from 'fs'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { castBigInts, parquetMetadata } from '../src/metadata'
|
||||
|
||||
// Helper function to read .parquet file into ArrayBuffer
|
||||
async function readFileToArrayBuffer(filePath: string): Promise<ArrayBuffer> {
|
||||
const buffer = await fs.readFile(filePath)
|
||||
return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
|
||||
}
|
||||
|
||||
describe('parquetMetadata', () => {
|
||||
it('should correctly decode metadata from addrtype-missing-value.parquet', async () => {
|
||||
const arrayBuffer = await readFileToArrayBuffer('test/files/addrtype-missing-value.parquet')
|
||||
const result = parquetMetadata(arrayBuffer)
|
||||
|
||||
const expectedMetadata = {
|
||||
version: 1,
|
||||
schema: [
|
||||
{ repetition_type: 0, name: 'duckdb_schema', num_children: 1 },
|
||||
{ type: 6, repetition_type: 1, name: 'ADDRTYPE', converted_type: 0 },
|
||||
],
|
||||
num_rows: 10,
|
||||
row_groups: [
|
||||
{
|
||||
columns: [
|
||||
{
|
||||
file_offset: 0,
|
||||
meta_data: {
|
||||
type: 6,
|
||||
encodings: [0, 8],
|
||||
path_in_schema: ['ADDRTYPE'],
|
||||
codec: 1,
|
||||
num_values: 10,
|
||||
total_uncompressed_size: 78,
|
||||
total_compressed_size: 82,
|
||||
data_page_offset: 31,
|
||||
dictionary_page_offset: 4,
|
||||
statistics: {
|
||||
max: 'Intersection',
|
||||
min: 'Block',
|
||||
null_count: 1,
|
||||
distinct_count: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
total_byte_size: 33024,
|
||||
num_rows: 10,
|
||||
},
|
||||
],
|
||||
created_by: 'DuckDB',
|
||||
}
|
||||
|
||||
const casted = castBigInts(result)
|
||||
expect(casted).toEqual(expectedMetadata)
|
||||
})
|
||||
|
||||
it('should throw an error for a too short file', () => {
|
||||
const arrayBuffer = new ArrayBuffer(0)
|
||||
expect(() => parquetMetadata(arrayBuffer)).toThrow('parquet file is too short')
|
||||
})
|
||||
|
||||
it('should throw an error for invalid magic number', () => {
|
||||
const arrayBuffer = new ArrayBuffer(8)
|
||||
expect(() => parquetMetadata(arrayBuffer)).toThrow('parquet file invalid magic number')
|
||||
})
|
||||
})
|
||||
@ -2,6 +2,7 @@
|
||||
"compilerOptions": {
|
||||
"declaration": true,
|
||||
"lib": ["esnext", "dom"],
|
||||
"module": "nodenext",
|
||||
"outDir": "dist",
|
||||
"sourceMap": true,
|
||||
"target": "esnext",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user