2025-03-26 00:49:59 +00:00
|
|
|
import { parquetMetadata } from 'hyparquet'
|
|
|
|
|
import { describe, expect, it } from 'vitest'
|
|
|
|
|
import { Writer } from '../src/writer.js'
|
|
|
|
|
import { writeMetadata } from '../src/metadata.js'
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @import {FileMetaData} from 'hyparquet'
|
|
|
|
|
* @type {FileMetaData}
|
|
|
|
|
*/
|
|
|
|
|
export const exampleMetadata = {
|
|
|
|
|
version: 2,
|
|
|
|
|
created_by: 'hyparquet',
|
|
|
|
|
schema: [
|
2025-03-26 03:15:14 +00:00
|
|
|
{ name: 'root', num_children: 5, repetition_type: 'REQUIRED' },
|
|
|
|
|
{ name: 'bool', type: 'BOOLEAN', repetition_type: 'REQUIRED' },
|
|
|
|
|
{ name: 'int', type: 'INT32', repetition_type: 'REQUIRED' },
|
|
|
|
|
{ name: 'bigint', type: 'INT64', repetition_type: 'REQUIRED' },
|
|
|
|
|
{ name: 'double', type: 'DOUBLE', repetition_type: 'REQUIRED' },
|
|
|
|
|
{ name: 'string', type: 'BYTE_ARRAY', repetition_type: 'REQUIRED' },
|
2025-03-26 00:49:59 +00:00
|
|
|
],
|
|
|
|
|
num_rows: 4n,
|
|
|
|
|
row_groups: [{
|
|
|
|
|
columns: [
|
|
|
|
|
{
|
|
|
|
|
file_path: 'bool',
|
2025-03-26 03:15:14 +00:00
|
|
|
file_offset: 4n,
|
2025-03-26 00:49:59 +00:00
|
|
|
meta_data: {
|
|
|
|
|
type: 'BOOLEAN',
|
|
|
|
|
encodings: ['PLAIN'],
|
|
|
|
|
path_in_schema: ['bool'],
|
|
|
|
|
codec: 'UNCOMPRESSED',
|
|
|
|
|
num_values: 4n,
|
2025-03-26 03:15:14 +00:00
|
|
|
total_uncompressed_size: 23n,
|
|
|
|
|
total_compressed_size: 23n,
|
2025-03-26 00:49:59 +00:00
|
|
|
data_page_offset: 4n,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
file_path: 'int',
|
2025-03-26 03:15:14 +00:00
|
|
|
file_offset: 27n,
|
2025-03-26 00:49:59 +00:00
|
|
|
meta_data: {
|
|
|
|
|
type: 'INT32',
|
|
|
|
|
encodings: ['PLAIN'],
|
|
|
|
|
path_in_schema: ['int'],
|
|
|
|
|
codec: 'UNCOMPRESSED',
|
|
|
|
|
num_values: 4n,
|
2025-03-26 03:15:14 +00:00
|
|
|
total_uncompressed_size: 38n,
|
|
|
|
|
total_compressed_size: 38n,
|
|
|
|
|
data_page_offset: 27n,
|
2025-03-26 00:49:59 +00:00
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
file_path: 'bigint',
|
2025-03-26 03:15:14 +00:00
|
|
|
file_offset: 65n,
|
2025-03-26 00:49:59 +00:00
|
|
|
meta_data: {
|
|
|
|
|
type: 'INT64',
|
|
|
|
|
encodings: ['PLAIN'],
|
|
|
|
|
path_in_schema: ['bigint'],
|
|
|
|
|
codec: 'UNCOMPRESSED',
|
|
|
|
|
num_values: 4n,
|
2025-03-26 03:15:14 +00:00
|
|
|
total_uncompressed_size: 54n,
|
|
|
|
|
total_compressed_size: 54n,
|
|
|
|
|
data_page_offset: 65n,
|
2025-03-26 00:49:59 +00:00
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
file_path: 'double',
|
2025-03-26 03:15:14 +00:00
|
|
|
file_offset: 119n,
|
2025-03-26 00:49:59 +00:00
|
|
|
meta_data: {
|
|
|
|
|
type: 'DOUBLE',
|
|
|
|
|
encodings: ['PLAIN'],
|
|
|
|
|
path_in_schema: ['double'],
|
|
|
|
|
codec: 'UNCOMPRESSED',
|
|
|
|
|
num_values: 4n,
|
2025-03-26 03:15:14 +00:00
|
|
|
total_uncompressed_size: 54n,
|
|
|
|
|
total_compressed_size: 54n,
|
|
|
|
|
data_page_offset: 119n,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
file_path: 'string',
|
|
|
|
|
file_offset: 173n,
|
|
|
|
|
meta_data: {
|
|
|
|
|
type: 'BYTE_ARRAY',
|
|
|
|
|
encodings: ['PLAIN'],
|
|
|
|
|
path_in_schema: ['string'],
|
|
|
|
|
codec: 'UNCOMPRESSED',
|
|
|
|
|
num_values: 4n,
|
|
|
|
|
total_uncompressed_size: 42n,
|
|
|
|
|
total_compressed_size: 42n,
|
|
|
|
|
data_page_offset: 173n,
|
2025-03-26 00:49:59 +00:00
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
],
|
2025-03-26 03:15:14 +00:00
|
|
|
total_byte_size: 211n,
|
2025-03-26 00:49:59 +00:00
|
|
|
num_rows: 4n,
|
|
|
|
|
}],
|
2025-03-26 03:15:14 +00:00
|
|
|
metadata_length: 280,
|
2025-03-26 00:49:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
describe('writeMetadata', () => {
|
|
|
|
|
it('writes metadata and parses in hyparquet', () => {
|
|
|
|
|
const writer = new Writer()
|
|
|
|
|
|
|
|
|
|
// Write header PAR1
|
|
|
|
|
writer.appendUint32(0x31524150)
|
|
|
|
|
|
|
|
|
|
// Write metadata
|
|
|
|
|
/** @type {FileMetaData} */
|
|
|
|
|
writeMetadata(writer, exampleMetadata)
|
|
|
|
|
|
|
|
|
|
// Write footer PAR1
|
|
|
|
|
writer.appendUint32(0x31524150)
|
|
|
|
|
|
|
|
|
|
const file = writer.getBuffer()
|
|
|
|
|
const output = parquetMetadata(file)
|
|
|
|
|
|
|
|
|
|
/** @type {FileMetaData} */
|
|
|
|
|
expect(output).toEqual(exampleMetadata)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
})
|