Move example data to test/example.js

This commit is contained in:
Kenny Daniel 2025-04-13 21:15:29 -07:00
parent 6226d50734
commit f6740aba3f
No known key found for this signature in database
GPG Key ID: FDF16101AF5AFD3A
4 changed files with 179 additions and 175 deletions

171
test/example.js Normal file

@ -0,0 +1,171 @@
/** @type {ColumnData[]} */
export const exampleData = [
{ name: 'bool', data: [true, false, true, false] },
{ name: 'int', data: [0, 127, 0x7fff, 0x7fffffff] },
{ name: 'bigint', data: [0n, 127n, 0x7fffn, 0x7fffffffffffffffn] },
{ name: 'float', data: [0, 0.0001, 123.456, 1e100], type: 'FLOAT', repetition_type: 'REQUIRED' },
{ name: 'double', data: [0, 0.0001, 123.456, 1e100] },
{ name: 'string', data: ['a', 'b', 'c', 'd'] },
{ name: 'nullable', data: [true, false, null, null] },
]
/**
* @import {FileMetaData, LogicalType} from 'hyparquet'
* @import {ColumnData, ThriftObject} from '../src/types.js'
* @type {FileMetaData}
*/
export const exampleMetadata = {
version: 2,
created_by: 'hyparquet',
schema: [
{ name: 'root', num_children: 7 },
{ name: 'bool', type: 'BOOLEAN', repetition_type: 'REQUIRED' },
{ name: 'int', type: 'INT32', repetition_type: 'REQUIRED' },
{ name: 'bigint', type: 'INT64', repetition_type: 'REQUIRED' },
{ name: 'float', type: 'FLOAT', repetition_type: 'REQUIRED' },
{ name: 'double', type: 'DOUBLE', repetition_type: 'REQUIRED' },
{ name: 'string', type: 'BYTE_ARRAY', repetition_type: 'REQUIRED', converted_type: 'UTF8' },
{ name: 'nullable', type: 'BOOLEAN', repetition_type: 'OPTIONAL' },
],
num_rows: 4n,
row_groups: [{
columns: [
{
file_path: 'bool',
file_offset: 4n,
meta_data: {
type: 'BOOLEAN',
encodings: ['PLAIN'],
path_in_schema: ['bool'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 24n,
total_compressed_size: 24n,
data_page_offset: 4n,
statistics: {
null_count: 0n,
min_value: false,
max_value: true,
},
},
},
{
file_path: 'int',
file_offset: 28n,
meta_data: {
type: 'INT32',
encodings: ['PLAIN'],
path_in_schema: ['int'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 39n,
total_compressed_size: 39n,
data_page_offset: 28n,
statistics: {
null_count: 0n,
min_value: 0,
max_value: 0x7fffffff,
},
},
},
{
file_path: 'bigint',
file_offset: 67n,
meta_data: {
type: 'INT64',
encodings: ['PLAIN'],
path_in_schema: ['bigint'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 43n,
total_compressed_size: 43n,
data_page_offset: 67n,
statistics: {
null_count: 0n,
min_value: 0n,
max_value: 0x7fffffffffffffffn,
},
},
},
{
file_path: 'float',
file_offset: 110n,
meta_data: {
type: 'FLOAT',
encodings: ['PLAIN'],
path_in_schema: ['float'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 39n,
total_compressed_size: 39n,
data_page_offset: 110n,
statistics: {
null_count: 0n,
min_value: 0,
max_value: Infinity,
},
},
},
{
file_path: 'double',
file_offset: 149n,
meta_data: {
type: 'DOUBLE',
encodings: ['PLAIN'],
path_in_schema: ['double'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 51n,
total_compressed_size: 51n,
data_page_offset: 149n,
statistics: {
null_count: 0n,
min_value: 0,
max_value: 1e100,
},
},
},
{
file_path: 'string',
file_offset: 200n,
meta_data: {
type: 'BYTE_ARRAY',
encodings: ['PLAIN'],
path_in_schema: ['string'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 42n,
total_compressed_size: 42n,
data_page_offset: 200n,
statistics: {
null_count: 0n,
min_value: 'a',
max_value: 'd',
},
},
},
{
file_path: 'nullable',
file_offset: 242n,
meta_data: {
type: 'BOOLEAN',
encodings: ['PLAIN'],
path_in_schema: ['nullable'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 26n,
total_compressed_size: 26n,
data_page_offset: 242n,
statistics: {
null_count: 2n,
min_value: false,
max_value: true,
},
},
},
],
total_byte_size: 264n,
num_rows: 4n,
}],
metadata_length: 497,
}

@ -2,167 +2,12 @@ import { parquetMetadata } from 'hyparquet'
import { describe, expect, it } from 'vitest'
import { ByteWriter } from '../src/bytewriter.js'
import { logicalType, writeMetadata } from '../src/metadata.js'
import { exampleMetadata } from './example.js'
/**
* @import {FileMetaData, LogicalType} from 'hyparquet'
* @import {ThriftObject} from '../src/types.js'
* @type {FileMetaData}
*/
export const exampleMetadata = {
version: 2,
created_by: 'hyparquet',
schema: [
{ name: 'root', num_children: 7 },
{ name: 'bool', type: 'BOOLEAN', repetition_type: 'REQUIRED' },
{ name: 'int', type: 'INT32', repetition_type: 'REQUIRED' },
{ name: 'bigint', type: 'INT64', repetition_type: 'REQUIRED' },
{ name: 'float', type: 'FLOAT', repetition_type: 'REQUIRED' },
{ name: 'double', type: 'DOUBLE', repetition_type: 'REQUIRED' },
{ name: 'string', type: 'BYTE_ARRAY', repetition_type: 'REQUIRED', converted_type: 'UTF8' },
{ name: 'nullable', type: 'BOOLEAN', repetition_type: 'OPTIONAL' },
],
num_rows: 4n,
row_groups: [{
columns: [
{
file_path: 'bool',
file_offset: 4n,
meta_data: {
type: 'BOOLEAN',
encodings: ['PLAIN'],
path_in_schema: ['bool'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 24n,
total_compressed_size: 24n,
data_page_offset: 4n,
statistics: {
null_count: 0n,
min_value: false,
max_value: true,
},
},
},
{
file_path: 'int',
file_offset: 28n,
meta_data: {
type: 'INT32',
encodings: ['PLAIN'],
path_in_schema: ['int'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 39n,
total_compressed_size: 39n,
data_page_offset: 28n,
statistics: {
null_count: 0n,
min_value: 0,
max_value: 0x7fffffff,
},
},
},
{
file_path: 'bigint',
file_offset: 67n,
meta_data: {
type: 'INT64',
encodings: ['PLAIN'],
path_in_schema: ['bigint'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 43n,
total_compressed_size: 43n,
data_page_offset: 67n,
statistics: {
null_count: 0n,
min_value: 0n,
max_value: 0x7fffffffffffffffn,
},
},
},
{
file_path: 'float',
file_offset: 110n,
meta_data: {
type: 'FLOAT',
encodings: ['PLAIN'],
path_in_schema: ['float'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 39n,
total_compressed_size: 39n,
data_page_offset: 110n,
statistics: {
null_count: 0n,
min_value: 0,
max_value: Infinity,
},
},
},
{
file_path: 'double',
file_offset: 149n,
meta_data: {
type: 'DOUBLE',
encodings: ['PLAIN'],
path_in_schema: ['double'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 51n,
total_compressed_size: 51n,
data_page_offset: 149n,
statistics: {
null_count: 0n,
min_value: 0,
max_value: 1e100,
},
},
},
{
file_path: 'string',
file_offset: 200n,
meta_data: {
type: 'BYTE_ARRAY',
encodings: ['PLAIN'],
path_in_schema: ['string'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 42n,
total_compressed_size: 42n,
data_page_offset: 200n,
statistics: {
null_count: 0n,
min_value: 'a',
max_value: 'd',
},
},
},
{
file_path: 'nullable',
file_offset: 242n,
meta_data: {
type: 'BOOLEAN',
encodings: ['PLAIN'],
path_in_schema: ['nullable'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 26n,
total_compressed_size: 26n,
data_page_offset: 242n,
statistics: {
null_count: 2n,
min_value: false,
max_value: true,
},
},
},
],
total_byte_size: 264n,
num_rows: 4n,
}],
metadata_length: 497,
}
describe('writeMetadata', () => {
it('writes metadata and parses in hyparquet', () => {

@ -1,7 +1,7 @@
import { parquetMetadata, parquetReadObjects } from 'hyparquet'
import { describe, expect, it } from 'vitest'
import { parquetWriteBuffer } from '../src/index.js'
import { exampleMetadata } from './metadata.test.js'
import { exampleData, exampleMetadata } from './example.js'
/**
* Utility to encode a parquet file and then read it back into a JS object.
@ -15,26 +15,15 @@ async function roundTripDeserialize(columnData) {
return await parquetReadObjects({ file, utf8: false })
}
/** @type {ColumnData[]} */
export const basicData = [
{ name: 'bool', data: [true, false, true, false] },
{ name: 'int', data: [0, 127, 0x7fff, 0x7fffffff] },
{ name: 'bigint', data: [0n, 127n, 0x7fffn, 0x7fffffffffffffffn] },
{ name: 'float', data: [0, 0.0001, 123.456, 1e100], type: 'FLOAT', repetition_type: 'REQUIRED' },
{ name: 'double', data: [0, 0.0001, 123.456, 1e100] },
{ name: 'string', data: ['a', 'b', 'c', 'd'] },
{ name: 'nullable', data: [true, false, null, null] },
]
describe('parquetWriteBuffer', () => {
it('writes expected metadata', () => {
const file = parquetWriteBuffer({ columnData: basicData })
const file = parquetWriteBuffer({ columnData: exampleData })
const metadata = parquetMetadata(file)
expect(metadata).toEqual(exampleMetadata)
})
it('serializes basic types', async () => {
const result = await roundTripDeserialize(basicData)
const result = await roundTripDeserialize(exampleData)
expect(result).toEqual([
{ bool: true, int: 0, bigint: 0n, float: 0, double: 0, string: 'a', nullable: true },
{ bool: false, int: 127, bigint: 127n, float: 0.00009999999747378752, double: 0.0001, string: 'b', nullable: false },
@ -90,8 +79,8 @@ describe('parquetWriteBuffer', () => {
})
it('writes statistics when enabled', () => {
const withStats = parquetWriteBuffer({ columnData: basicData, statistics: true })
const noStats = parquetWriteBuffer({ columnData: basicData, statistics: false })
const withStats = parquetWriteBuffer({ columnData: exampleData, statistics: true })
const noStats = parquetWriteBuffer({ columnData: exampleData, statistics: false })
expect(withStats.byteLength).toBe(773)
expect(noStats.byteLength).toBe(663)
})

@ -1,9 +1,8 @@
import fs from 'fs'
import { asyncBufferFromFile, parquetMetadataAsync, parquetReadObjects } from 'hyparquet'
import { afterEach, beforeEach, describe, expect, it } from 'vitest'
import { exampleMetadata } from './metadata.test.js'
import { parquetWriteFile } from '../src/index.js'
import { basicData } from './write.buffer.test.js'
import { exampleData, exampleMetadata } from './example.js'
const filedir = 'data/'
const filename = 'data/write.file.parquet'
@ -24,7 +23,7 @@ describe('parquetWriteFile', () => {
})
it('writes parquet file', async () => {
parquetWriteFile({ filename, columnData: basicData })
parquetWriteFile({ filename, columnData: exampleData })
// check parquet metadata
const file = await asyncBufferFromFile(filename)