diff --git a/test/example.js b/test/example.js new file mode 100644 index 0000000..4433488 --- /dev/null +++ b/test/example.js @@ -0,0 +1,171 @@ +/** @type {ColumnData[]} */ +export const exampleData = [ + { name: 'bool', data: [true, false, true, false] }, + { name: 'int', data: [0, 127, 0x7fff, 0x7fffffff] }, + { name: 'bigint', data: [0n, 127n, 0x7fffn, 0x7fffffffffffffffn] }, + { name: 'float', data: [0, 0.0001, 123.456, 1e100], type: 'FLOAT', repetition_type: 'REQUIRED' }, + { name: 'double', data: [0, 0.0001, 123.456, 1e100] }, + { name: 'string', data: ['a', 'b', 'c', 'd'] }, + { name: 'nullable', data: [true, false, null, null] }, +] + +/** + * @import {FileMetaData, LogicalType} from 'hyparquet' + * @import {ColumnData, ThriftObject} from '../src/types.js' + * @type {FileMetaData} + */ +export const exampleMetadata = { + version: 2, + created_by: 'hyparquet', + schema: [ + { name: 'root', num_children: 7 }, + { name: 'bool', type: 'BOOLEAN', repetition_type: 'REQUIRED' }, + { name: 'int', type: 'INT32', repetition_type: 'REQUIRED' }, + { name: 'bigint', type: 'INT64', repetition_type: 'REQUIRED' }, + { name: 'float', type: 'FLOAT', repetition_type: 'REQUIRED' }, + { name: 'double', type: 'DOUBLE', repetition_type: 'REQUIRED' }, + { name: 'string', type: 'BYTE_ARRAY', repetition_type: 'REQUIRED', converted_type: 'UTF8' }, + { name: 'nullable', type: 'BOOLEAN', repetition_type: 'OPTIONAL' }, + ], + num_rows: 4n, + row_groups: [{ + columns: [ + { + file_path: 'bool', + file_offset: 4n, + meta_data: { + type: 'BOOLEAN', + encodings: ['PLAIN'], + path_in_schema: ['bool'], + codec: 'SNAPPY', + num_values: 4n, + total_uncompressed_size: 24n, + total_compressed_size: 24n, + data_page_offset: 4n, + statistics: { + null_count: 0n, + min_value: false, + max_value: true, + }, + }, + }, + { + file_path: 'int', + file_offset: 28n, + meta_data: { + type: 'INT32', + encodings: ['PLAIN'], + path_in_schema: ['int'], + codec: 'SNAPPY', + num_values: 4n, + total_uncompressed_size: 39n, + total_compressed_size: 39n, + data_page_offset: 28n, + statistics: { + null_count: 0n, + min_value: 0, + max_value: 0x7fffffff, + }, + }, + }, + { + file_path: 'bigint', + file_offset: 67n, + meta_data: { + type: 'INT64', + encodings: ['PLAIN'], + path_in_schema: ['bigint'], + codec: 'SNAPPY', + num_values: 4n, + total_uncompressed_size: 43n, + total_compressed_size: 43n, + data_page_offset: 67n, + statistics: { + null_count: 0n, + min_value: 0n, + max_value: 0x7fffffffffffffffn, + }, + }, + }, + { + file_path: 'float', + file_offset: 110n, + meta_data: { + type: 'FLOAT', + encodings: ['PLAIN'], + path_in_schema: ['float'], + codec: 'SNAPPY', + num_values: 4n, + total_uncompressed_size: 39n, + total_compressed_size: 39n, + data_page_offset: 110n, + statistics: { + null_count: 0n, + min_value: 0, + max_value: Infinity, + }, + }, + }, + { + file_path: 'double', + file_offset: 149n, + meta_data: { + type: 'DOUBLE', + encodings: ['PLAIN'], + path_in_schema: ['double'], + codec: 'SNAPPY', + num_values: 4n, + total_uncompressed_size: 51n, + total_compressed_size: 51n, + data_page_offset: 149n, + statistics: { + null_count: 0n, + min_value: 0, + max_value: 1e100, + }, + }, + }, + { + file_path: 'string', + file_offset: 200n, + meta_data: { + type: 'BYTE_ARRAY', + encodings: ['PLAIN'], + path_in_schema: ['string'], + codec: 'SNAPPY', + num_values: 4n, + total_uncompressed_size: 42n, + total_compressed_size: 42n, + data_page_offset: 200n, + statistics: { + null_count: 0n, + min_value: 'a', + max_value: 'd', + }, + }, + }, + { + file_path: 'nullable', + file_offset: 242n, + meta_data: { + type: 'BOOLEAN', + encodings: ['PLAIN'], + path_in_schema: ['nullable'], + codec: 'SNAPPY', + num_values: 4n, + total_uncompressed_size: 26n, + total_compressed_size: 26n, + data_page_offset: 242n, + statistics: { + null_count: 2n, + min_value: false, + max_value: true, + }, + }, + }, + ], + total_byte_size: 264n, + num_rows: 4n, + }], + metadata_length: 497, +} diff --git a/test/metadata.test.js b/test/metadata.test.js index 71d0303..527b493 100644 --- a/test/metadata.test.js +++ b/test/metadata.test.js @@ -2,167 +2,12 @@ import { parquetMetadata } from 'hyparquet' import { describe, expect, it } from 'vitest' import { ByteWriter } from '../src/bytewriter.js' import { logicalType, writeMetadata } from '../src/metadata.js' +import { exampleMetadata } from './example.js' /** * @import {FileMetaData, LogicalType} from 'hyparquet' * @import {ThriftObject} from '../src/types.js' - * @type {FileMetaData} */ -export const exampleMetadata = { - version: 2, - created_by: 'hyparquet', - schema: [ - { name: 'root', num_children: 7 }, - { name: 'bool', type: 'BOOLEAN', repetition_type: 'REQUIRED' }, - { name: 'int', type: 'INT32', repetition_type: 'REQUIRED' }, - { name: 'bigint', type: 'INT64', repetition_type: 'REQUIRED' }, - { name: 'float', type: 'FLOAT', repetition_type: 'REQUIRED' }, - { name: 'double', type: 'DOUBLE', repetition_type: 'REQUIRED' }, - { name: 'string', type: 'BYTE_ARRAY', repetition_type: 'REQUIRED', converted_type: 'UTF8' }, - { name: 'nullable', type: 'BOOLEAN', repetition_type: 'OPTIONAL' }, - ], - num_rows: 4n, - row_groups: [{ - columns: [ - { - file_path: 'bool', - file_offset: 4n, - meta_data: { - type: 'BOOLEAN', - encodings: ['PLAIN'], - path_in_schema: ['bool'], - codec: 'SNAPPY', - num_values: 4n, - total_uncompressed_size: 24n, - total_compressed_size: 24n, - data_page_offset: 4n, - statistics: { - null_count: 0n, - min_value: false, - max_value: true, - }, - }, - }, - { - file_path: 'int', - file_offset: 28n, - meta_data: { - type: 'INT32', - encodings: ['PLAIN'], - path_in_schema: ['int'], - codec: 'SNAPPY', - num_values: 4n, - total_uncompressed_size: 39n, - total_compressed_size: 39n, - data_page_offset: 28n, - statistics: { - null_count: 0n, - min_value: 0, - max_value: 0x7fffffff, - }, - }, - }, - { - file_path: 'bigint', - file_offset: 67n, - meta_data: { - type: 'INT64', - encodings: ['PLAIN'], - path_in_schema: ['bigint'], - codec: 'SNAPPY', - num_values: 4n, - total_uncompressed_size: 43n, - total_compressed_size: 43n, - data_page_offset: 67n, - statistics: { - null_count: 0n, - min_value: 0n, - max_value: 0x7fffffffffffffffn, - }, - }, - }, - { - file_path: 'float', - file_offset: 110n, - meta_data: { - type: 'FLOAT', - encodings: ['PLAIN'], - path_in_schema: ['float'], - codec: 'SNAPPY', - num_values: 4n, - total_uncompressed_size: 39n, - total_compressed_size: 39n, - data_page_offset: 110n, - statistics: { - null_count: 0n, - min_value: 0, - max_value: Infinity, - }, - }, - }, - { - file_path: 'double', - file_offset: 149n, - meta_data: { - type: 'DOUBLE', - encodings: ['PLAIN'], - path_in_schema: ['double'], - codec: 'SNAPPY', - num_values: 4n, - total_uncompressed_size: 51n, - total_compressed_size: 51n, - data_page_offset: 149n, - statistics: { - null_count: 0n, - min_value: 0, - max_value: 1e100, - }, - }, - }, - { - file_path: 'string', - file_offset: 200n, - meta_data: { - type: 'BYTE_ARRAY', - encodings: ['PLAIN'], - path_in_schema: ['string'], - codec: 'SNAPPY', - num_values: 4n, - total_uncompressed_size: 42n, - total_compressed_size: 42n, - data_page_offset: 200n, - statistics: { - null_count: 0n, - min_value: 'a', - max_value: 'd', - }, - }, - }, - { - file_path: 'nullable', - file_offset: 242n, - meta_data: { - type: 'BOOLEAN', - encodings: ['PLAIN'], - path_in_schema: ['nullable'], - codec: 'SNAPPY', - num_values: 4n, - total_uncompressed_size: 26n, - total_compressed_size: 26n, - data_page_offset: 242n, - statistics: { - null_count: 2n, - min_value: false, - max_value: true, - }, - }, - }, - ], - total_byte_size: 264n, - num_rows: 4n, - }], - metadata_length: 497, -} describe('writeMetadata', () => { it('writes metadata and parses in hyparquet', () => { diff --git a/test/write.buffer.test.js b/test/write.buffer.test.js index 6d64c0d..c0e7748 100644 --- a/test/write.buffer.test.js +++ b/test/write.buffer.test.js @@ -1,7 +1,7 @@ import { parquetMetadata, parquetReadObjects } from 'hyparquet' import { describe, expect, it } from 'vitest' import { parquetWriteBuffer } from '../src/index.js' -import { exampleMetadata } from './metadata.test.js' +import { exampleData, exampleMetadata } from './example.js' /** * Utility to encode a parquet file and then read it back into a JS object. @@ -15,26 +15,15 @@ async function roundTripDeserialize(columnData) { return await parquetReadObjects({ file, utf8: false }) } -/** @type {ColumnData[]} */ -export const basicData = [ - { name: 'bool', data: [true, false, true, false] }, - { name: 'int', data: [0, 127, 0x7fff, 0x7fffffff] }, - { name: 'bigint', data: [0n, 127n, 0x7fffn, 0x7fffffffffffffffn] }, - { name: 'float', data: [0, 0.0001, 123.456, 1e100], type: 'FLOAT', repetition_type: 'REQUIRED' }, - { name: 'double', data: [0, 0.0001, 123.456, 1e100] }, - { name: 'string', data: ['a', 'b', 'c', 'd'] }, - { name: 'nullable', data: [true, false, null, null] }, -] - describe('parquetWriteBuffer', () => { it('writes expected metadata', () => { - const file = parquetWriteBuffer({ columnData: basicData }) + const file = parquetWriteBuffer({ columnData: exampleData }) const metadata = parquetMetadata(file) expect(metadata).toEqual(exampleMetadata) }) it('serializes basic types', async () => { - const result = await roundTripDeserialize(basicData) + const result = await roundTripDeserialize(exampleData) expect(result).toEqual([ { bool: true, int: 0, bigint: 0n, float: 0, double: 0, string: 'a', nullable: true }, { bool: false, int: 127, bigint: 127n, float: 0.00009999999747378752, double: 0.0001, string: 'b', nullable: false }, @@ -90,8 +79,8 @@ describe('parquetWriteBuffer', () => { }) it('writes statistics when enabled', () => { - const withStats = parquetWriteBuffer({ columnData: basicData, statistics: true }) - const noStats = parquetWriteBuffer({ columnData: basicData, statistics: false }) + const withStats = parquetWriteBuffer({ columnData: exampleData, statistics: true }) + const noStats = parquetWriteBuffer({ columnData: exampleData, statistics: false }) expect(withStats.byteLength).toBe(773) expect(noStats.byteLength).toBe(663) }) diff --git a/test/write.file.test.js b/test/write.file.test.js index edac247..fcc5966 100644 --- a/test/write.file.test.js +++ b/test/write.file.test.js @@ -1,9 +1,8 @@ import fs from 'fs' import { asyncBufferFromFile, parquetMetadataAsync, parquetReadObjects } from 'hyparquet' import { afterEach, beforeEach, describe, expect, it } from 'vitest' -import { exampleMetadata } from './metadata.test.js' import { parquetWriteFile } from '../src/index.js' -import { basicData } from './write.buffer.test.js' +import { exampleData, exampleMetadata } from './example.js' const filedir = 'data/' const filename = 'data/write.file.parquet' @@ -24,7 +23,7 @@ describe('parquetWriteFile', () => { }) it('writes parquet file', async () => { - parquetWriteFile({ filename, columnData: basicData }) + parquetWriteFile({ filename, columnData: exampleData }) // check parquet metadata const file = await asyncBufferFromFile(filename)