2024-02-13 18:52:29 +00:00
|
|
|
import fs from 'fs'
|
2024-01-03 18:33:37 +00:00
|
|
|
import { describe, expect, it } from 'vitest'
|
2025-05-30 22:47:02 +00:00
|
|
|
import { parquetMetadata, parquetMetadataAsync, toJson } from '../src/index.js'
|
2025-05-30 20:01:20 +00:00
|
|
|
import { asyncBufferFromFile } from '../src/node.js'
|
2024-07-26 22:01:01 +00:00
|
|
|
import { fileToJson } from './helpers.js'
|
2024-01-15 21:40:12 +00:00
|
|
|
|
2024-02-24 19:01:08 +00:00
|
|
|
const files = fs.readdirSync('test/files').filter(f => f.endsWith('.parquet'))
|
|
|
|
|
|
2024-01-03 18:33:37 +00:00
|
|
|
describe('parquetMetadata', () => {
|
2024-02-24 19:01:08 +00:00
|
|
|
files.forEach(file => {
|
2024-04-12 20:09:31 +00:00
|
|
|
it(`parse metadata from ${file}`, async () => {
|
2024-07-26 22:01:01 +00:00
|
|
|
const asyncBuffer = await asyncBufferFromFile(`test/files/${file}`)
|
|
|
|
|
const arrayBuffer = await asyncBuffer.slice(0)
|
2024-02-17 00:07:09 +00:00
|
|
|
const result = toJson(parquetMetadata(arrayBuffer))
|
2024-02-13 18:52:29 +00:00
|
|
|
const base = file.replace('.parquet', '')
|
|
|
|
|
const expected = fileToJson(`test/files/${base}.metadata.json`)
|
2024-02-17 00:07:09 +00:00
|
|
|
expect(result, JSON.stringify(result, null, 2)).toEqual(expected)
|
2024-02-24 19:01:08 +00:00
|
|
|
})
|
2024-01-11 19:06:37 +00:00
|
|
|
})
|
|
|
|
|
|
2024-04-05 18:08:10 +00:00
|
|
|
it('throws for arrayBuffer undefined', () => {
|
|
|
|
|
// @ts-expect-error testing invalid input
|
2025-05-26 00:43:26 +00:00
|
|
|
expect(() => parquetMetadata(undefined)).toThrow('parquet expected ArrayBuffer')
|
2024-04-05 18:08:10 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('throws for a too short file', () => {
|
2024-01-03 18:33:37 +00:00
|
|
|
const arrayBuffer = new ArrayBuffer(0)
|
|
|
|
|
expect(() => parquetMetadata(arrayBuffer)).toThrow('parquet file is too short')
|
|
|
|
|
})
|
|
|
|
|
|
2024-04-05 18:08:10 +00:00
|
|
|
it('throws for invalid metadata length', () => {
|
2024-01-15 21:40:12 +00:00
|
|
|
const arrayBuffer = new ArrayBuffer(12)
|
|
|
|
|
const view = new DataView(arrayBuffer)
|
|
|
|
|
view.setUint32(0, 0x31524150, true) // magic number PAR1
|
|
|
|
|
view.setUint32(4, 1000, true) // 1000 bytes exceeds buffer
|
|
|
|
|
view.setUint32(8, 0x31524150, true) // magic number PAR1
|
|
|
|
|
expect(() => parquetMetadata(arrayBuffer))
|
|
|
|
|
.toThrow('parquet metadata length 1000 exceeds available buffer 4')
|
|
|
|
|
})
|
|
|
|
|
|
2024-04-05 18:08:10 +00:00
|
|
|
it('throws for invalid magic number', () => {
|
2024-01-03 18:33:37 +00:00
|
|
|
const arrayBuffer = new ArrayBuffer(8)
|
2024-02-02 08:06:37 +00:00
|
|
|
expect(() => parquetMetadata(arrayBuffer))
|
|
|
|
|
.toThrow('parquet file invalid (footer != PAR1)')
|
|
|
|
|
})
|
|
|
|
|
|
2024-04-05 18:08:10 +00:00
|
|
|
it('throws for invalid metadata length', () => {
|
2024-02-02 08:06:37 +00:00
|
|
|
const { buffer } = new Uint8Array([255, 255, 255, 255, 80, 65, 82, 49])
|
|
|
|
|
expect(() => parquetMetadata(buffer))
|
|
|
|
|
.toThrow('parquet metadata length 4294967295 exceeds available buffer 0')
|
2024-01-03 18:33:37 +00:00
|
|
|
})
|
|
|
|
|
})
|
2024-01-15 21:40:12 +00:00
|
|
|
|
|
|
|
|
describe('parquetMetadataAsync', () => {
|
2024-02-24 19:01:08 +00:00
|
|
|
files.forEach(file => {
|
2024-04-12 20:09:31 +00:00
|
|
|
it(`parse metadata async from ${file}`, async () => {
|
2024-07-26 22:01:01 +00:00
|
|
|
const asyncBuffer = await asyncBufferFromFile(`test/files/${file}`)
|
2024-02-13 18:52:29 +00:00
|
|
|
const result = await parquetMetadataAsync(asyncBuffer)
|
|
|
|
|
const base = file.replace('.parquet', '')
|
|
|
|
|
const expected = fileToJson(`test/files/${base}.metadata.json`)
|
2024-02-14 05:25:40 +00:00
|
|
|
expect(toJson(result)).toEqual(expected)
|
2024-02-24 19:01:08 +00:00
|
|
|
})
|
2024-01-15 21:40:12 +00:00
|
|
|
})
|
2024-02-02 08:06:37 +00:00
|
|
|
|
2024-04-05 18:08:10 +00:00
|
|
|
it('throws for asyncBuffer undefined', async () => {
|
|
|
|
|
const arrayBuffer = undefined
|
2024-04-26 19:52:42 +00:00
|
|
|
// @ts-expect-error testing invalid input
|
2024-04-05 18:08:10 +00:00
|
|
|
await expect(parquetMetadataAsync(arrayBuffer)).rejects
|
2025-05-26 00:43:26 +00:00
|
|
|
.toThrow('parquet expected AsyncBuffer')
|
2024-04-05 18:08:10 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('throws for invalid magic number', async () => {
|
2024-02-02 08:06:37 +00:00
|
|
|
const { buffer } = new Uint8Array([255, 255, 255, 255, 255, 255, 255, 255])
|
2024-04-05 18:08:10 +00:00
|
|
|
await expect(parquetMetadataAsync(buffer)).rejects
|
2024-02-02 08:06:37 +00:00
|
|
|
.toThrow('parquet file invalid (footer != PAR1)')
|
|
|
|
|
})
|
|
|
|
|
|
2024-04-05 18:08:10 +00:00
|
|
|
it('throws for invalid metadata length', async () => {
|
2024-02-02 08:06:37 +00:00
|
|
|
const { buffer } = new Uint8Array([255, 255, 255, 255, 80, 65, 82, 49])
|
2024-04-05 18:08:10 +00:00
|
|
|
await expect(parquetMetadataAsync(buffer)).rejects
|
2024-02-02 08:06:37 +00:00
|
|
|
.toThrow('parquet metadata length 4294967295 exceeds available buffer 0')
|
|
|
|
|
})
|
2024-01-15 21:40:12 +00:00
|
|
|
})
|