hyparquet-writer/test/metadata.test.js

175 lines
4.5 KiB
JavaScript
Raw Normal View History

2025-03-26 00:49:59 +00:00
import { parquetMetadata } from 'hyparquet'
import { describe, expect, it } from 'vitest'
2025-04-08 06:14:48 +00:00
import { ByteWriter } from '../src/bytewriter.js'
2025-03-26 00:49:59 +00:00
import { writeMetadata } from '../src/metadata.js'
/**
* @import {FileMetaData} from 'hyparquet'
* @type {FileMetaData}
*/
export const exampleMetadata = {
version: 2,
created_by: 'hyparquet',
schema: [
2025-03-26 05:36:06 +00:00
{ name: 'root', num_children: 6 },
2025-03-26 03:15:14 +00:00
{ name: 'bool', type: 'BOOLEAN', repetition_type: 'REQUIRED' },
{ name: 'int', type: 'INT32', repetition_type: 'REQUIRED' },
{ name: 'bigint', type: 'INT64', repetition_type: 'REQUIRED' },
{ name: 'double', type: 'DOUBLE', repetition_type: 'REQUIRED' },
{ name: 'string', type: 'BYTE_ARRAY', repetition_type: 'REQUIRED', converted_type: 'UTF8' },
2025-03-26 05:36:06 +00:00
{ name: 'nullable', type: 'BOOLEAN', repetition_type: 'OPTIONAL' },
2025-03-26 00:49:59 +00:00
],
num_rows: 4n,
row_groups: [{
columns: [
{
file_path: 'bool',
2025-03-26 03:15:14 +00:00
file_offset: 4n,
2025-03-26 00:49:59 +00:00
meta_data: {
type: 'BOOLEAN',
encodings: ['PLAIN'],
path_in_schema: ['bool'],
2025-03-27 07:01:24 +00:00
codec: 'SNAPPY',
2025-03-26 00:49:59 +00:00
num_values: 4n,
2025-03-27 07:01:24 +00:00
total_uncompressed_size: 24n,
total_compressed_size: 24n,
2025-03-26 00:49:59 +00:00
data_page_offset: 4n,
2025-04-03 20:21:57 +00:00
statistics: {
null_count: 0n,
min_value: false,
max_value: true,
},
2025-03-26 00:49:59 +00:00
},
},
{
file_path: 'int',
2025-03-27 07:01:24 +00:00
file_offset: 28n,
2025-03-26 00:49:59 +00:00
meta_data: {
type: 'INT32',
encodings: ['PLAIN'],
path_in_schema: ['int'],
2025-03-27 07:01:24 +00:00
codec: 'SNAPPY',
2025-03-26 00:49:59 +00:00
num_values: 4n,
2025-03-27 07:01:24 +00:00
total_uncompressed_size: 39n,
total_compressed_size: 39n,
data_page_offset: 28n,
2025-04-03 20:21:57 +00:00
statistics: {
null_count: 0n,
min_value: 0,
max_value: 0x7fffffff,
},
2025-03-26 00:49:59 +00:00
},
},
{
file_path: 'bigint',
2025-03-27 07:01:24 +00:00
file_offset: 67n,
2025-03-26 00:49:59 +00:00
meta_data: {
type: 'INT64',
encodings: ['PLAIN'],
path_in_schema: ['bigint'],
2025-03-27 07:01:24 +00:00
codec: 'SNAPPY',
2025-03-26 00:49:59 +00:00
num_values: 4n,
2025-03-27 07:01:24 +00:00
total_uncompressed_size: 43n,
total_compressed_size: 43n,
data_page_offset: 67n,
2025-04-03 20:21:57 +00:00
statistics: {
null_count: 0n,
min_value: 0n,
max_value: 0x7fffffffffffffffn,
},
2025-03-26 00:49:59 +00:00
},
},
{
file_path: 'double',
2025-03-27 07:01:24 +00:00
file_offset: 110n,
2025-03-26 00:49:59 +00:00
meta_data: {
type: 'DOUBLE',
encodings: ['PLAIN'],
path_in_schema: ['double'],
2025-03-27 07:01:24 +00:00
codec: 'SNAPPY',
2025-03-26 00:49:59 +00:00
num_values: 4n,
2025-03-27 07:01:24 +00:00
total_uncompressed_size: 51n,
total_compressed_size: 51n,
data_page_offset: 110n,
2025-04-03 20:21:57 +00:00
statistics: {
null_count: 0n,
min_value: 0,
max_value: 1e100,
},
2025-03-26 03:15:14 +00:00
},
},
{
file_path: 'string',
2025-03-27 07:01:24 +00:00
file_offset: 161n,
2025-03-26 03:15:14 +00:00
meta_data: {
type: 'BYTE_ARRAY',
encodings: ['PLAIN'],
path_in_schema: ['string'],
2025-03-27 07:01:24 +00:00
codec: 'SNAPPY',
2025-03-26 03:15:14 +00:00
num_values: 4n,
total_uncompressed_size: 42n,
total_compressed_size: 42n,
2025-03-27 07:01:24 +00:00
data_page_offset: 161n,
2025-04-03 20:21:57 +00:00
statistics: {
null_count: 0n,
min_value: 'a',
max_value: 'd',
},
2025-03-26 00:49:59 +00:00
},
},
2025-03-26 05:36:06 +00:00
{
file_path: 'nullable',
2025-03-27 07:01:24 +00:00
file_offset: 203n,
2025-03-26 05:36:06 +00:00
meta_data: {
type: 'BOOLEAN',
encodings: ['PLAIN'],
path_in_schema: ['nullable'],
2025-03-27 07:01:24 +00:00
codec: 'SNAPPY',
2025-03-26 05:36:06 +00:00
num_values: 4n,
2025-03-27 07:01:24 +00:00
total_uncompressed_size: 26n,
total_compressed_size: 26n,
data_page_offset: 203n,
2025-04-03 20:21:57 +00:00
statistics: {
null_count: 2n,
min_value: false,
max_value: true,
},
2025-03-26 05:36:06 +00:00
},
},
2025-03-26 00:49:59 +00:00
],
2025-03-27 07:01:24 +00:00
total_byte_size: 225n,
2025-03-26 00:49:59 +00:00
num_rows: 4n,
}],
2025-04-03 20:21:57 +00:00
metadata_length: 432,
2025-03-26 00:49:59 +00:00
}
describe('writeMetadata', () => {
it('writes metadata and parses in hyparquet', () => {
2025-04-08 06:14:48 +00:00
const writer = new ByteWriter()
2025-03-26 00:49:59 +00:00
// Write header PAR1
writer.appendUint32(0x31524150)
// Write metadata
2025-03-31 20:42:57 +00:00
const withKvMetadata = {
...exampleMetadata,
key_value_metadata: [
{ key: 'key1', value: 'value1' },
{ key: 'key2', value: 'value2' },
],
2025-04-03 20:21:57 +00:00
metadata_length: 464,
2025-03-31 20:42:57 +00:00
}
writeMetadata(writer, withKvMetadata)
2025-03-26 00:49:59 +00:00
// Write footer PAR1
writer.appendUint32(0x31524150)
const file = writer.getBuffer()
const output = parquetMetadata(file)
/** @type {FileMetaData} */
2025-03-31 20:42:57 +00:00
expect(output).toEqual(withKvMetadata)
2025-03-26 00:49:59 +00:00
})
})