hyparquet/test/metadata.test.js

74 lines
2.2 KiB
JavaScript
Raw Normal View History

2024-01-03 18:33:37 +00:00
import { promises as fs } from 'fs'
import { describe, expect, it } from 'vitest'
2024-01-05 09:39:59 +00:00
import { parquetMetadata } from '../src/metadata.js'
import { toJson } from '../src/toJson.js'
2024-01-03 18:33:37 +00:00
2024-01-03 21:07:08 +00:00
/**
* Helper function to read .parquet file into ArrayBuffer
*
* @param {string} filePath
* @returns {Promise<ArrayBuffer>}
*/
async function readFileToArrayBuffer(filePath) {
2024-01-03 18:33:37 +00:00
const buffer = await fs.readFile(filePath)
return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
}
describe('parquetMetadata', () => {
it('should correctly decode metadata from addrtype-missing-value.parquet', async () => {
const arrayBuffer = await readFileToArrayBuffer('test/files/addrtype-missing-value.parquet')
const result = parquetMetadata(arrayBuffer)
const expectedMetadata = {
version: 1,
schema: [
{ repetition_type: 0, name: 'duckdb_schema', num_children: 1 },
{ type: 6, repetition_type: 1, name: 'ADDRTYPE', converted_type: 0 },
],
num_rows: 10,
row_groups: [
{
columns: [
{
file_offset: 0,
meta_data: {
type: 6,
encodings: [0, 8],
path_in_schema: ['ADDRTYPE'],
codec: 1,
num_values: 10,
total_uncompressed_size: 78,
total_compressed_size: 82,
data_page_offset: 31,
dictionary_page_offset: 4,
statistics: {
max: 'Intersection',
min: 'Block',
null_count: 1,
distinct_count: 2,
},
},
},
],
total_byte_size: 33024,
num_rows: 10,
},
],
created_by: 'DuckDB',
}
2024-01-04 17:27:47 +00:00
const casted = toJson(result)
2024-01-03 18:33:37 +00:00
expect(casted).toEqual(expectedMetadata)
})
it('should throw an error for a too short file', () => {
const arrayBuffer = new ArrayBuffer(0)
expect(() => parquetMetadata(arrayBuffer)).toThrow('parquet file is too short')
})
it('should throw an error for invalid magic number', () => {
const arrayBuffer = new ArrayBuffer(8)
expect(() => parquetMetadata(arrayBuffer)).toThrow('parquet file invalid magic number')
})
})