Float life

This commit is contained in:
Kenny Daniel 2025-04-08 04:20:32 -07:00
parent 5c686412c1
commit 6545196a1d
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
8 changed files with 88 additions and 23 deletions

@ -71,6 +71,15 @@ ByteWriter.prototype.appendInt64 = function(value) {
this.offset += 8
}
/**
* @param {number} value
*/
ByteWriter.prototype.appendFloat32 = function(value) {
this.ensure(this.offset + 8)
this.view.setFloat32(this.offset, value, true)
this.offset += 4
}
/**
* @param {number} value
*/

@ -13,6 +13,8 @@ export function writePlain(writer, values, type) {
writePlainInt32(writer, values)
} else if (type === 'INT64') {
writePlainInt64(writer, values)
} else if (type === 'FLOAT') {
writePlainFloat(writer, values)
} else if (type === 'DOUBLE') {
writePlainDouble(writer, values)
} else if (type === 'BYTE_ARRAY') {
@ -69,6 +71,16 @@ function writePlainInt64(writer, values) {
}
}
/**
* @param {Writer} writer
* @param {DecodedArray} values
*/
function writePlainFloat(writer, values) {
for (const value of values) {
writer.appendFloat32(value)
}
}
/**
* @param {Writer} writer
* @param {DecodedArray} values

@ -55,9 +55,14 @@ export function getSchemaElementForValues(name, values, type) {
type = valueType
} else if (type === 'INT32' && valueType === 'DOUBLE') {
type = 'DOUBLE'
} else if (type === 'FLOAT' && valueType === 'INT32') {
valueType = 'FLOAT'
} else if (type === 'FLOAT' && valueType === 'DOUBLE') {
valueType = 'FLOAT'
} else if (type === 'DOUBLE' && valueType === 'INT32') {
// keep
} else if (type !== valueType) {
valueType = 'DOUBLE'
}
if (type !== valueType) {
throw new Error(`parquet cannot write mixed types: ${type} and ${valueType}`)
}
}

1
src/types.d.ts vendored

@ -27,6 +27,7 @@ export interface Writer {
appendUint32(value: number): void
appendInt32(value: number): void
appendInt64(value: bigint): void
appendFloat32(value: number): void
appendFloat64(value: number): void
appendBuffer(buffer: ArrayBuffer): void
appendBytes(value: Uint8Array): void

@ -11,10 +11,11 @@ export const exampleMetadata = {
version: 2,
created_by: 'hyparquet',
schema: [
{ name: 'root', num_children: 6 },
{ name: 'root', num_children: 7 },
{ name: 'bool', type: 'BOOLEAN', repetition_type: 'REQUIRED' },
{ name: 'int', type: 'INT32', repetition_type: 'REQUIRED' },
{ name: 'bigint', type: 'INT64', repetition_type: 'REQUIRED' },
{ name: 'float', type: 'FLOAT', repetition_type: 'REQUIRED' },
{ name: 'double', type: 'DOUBLE', repetition_type: 'REQUIRED' },
{ name: 'string', type: 'BYTE_ARRAY', repetition_type: 'REQUIRED', converted_type: 'UTF8' },
{ name: 'nullable', type: 'BOOLEAN', repetition_type: 'OPTIONAL' },
@ -80,8 +81,27 @@ export const exampleMetadata = {
},
},
{
file_path: 'double',
file_path: 'float',
file_offset: 110n,
meta_data: {
type: 'FLOAT',
encodings: ['PLAIN'],
path_in_schema: ['float'],
codec: 'SNAPPY',
num_values: 4n,
total_uncompressed_size: 39n,
total_compressed_size: 39n,
data_page_offset: 110n,
statistics: {
null_count: 0n,
min_value: 0,
max_value: Infinity,
},
},
},
{
file_path: 'double',
file_offset: 149n,
meta_data: {
type: 'DOUBLE',
encodings: ['PLAIN'],
@ -90,7 +110,7 @@ export const exampleMetadata = {
num_values: 4n,
total_uncompressed_size: 51n,
total_compressed_size: 51n,
data_page_offset: 110n,
data_page_offset: 149n,
statistics: {
null_count: 0n,
min_value: 0,
@ -100,7 +120,7 @@ export const exampleMetadata = {
},
{
file_path: 'string',
file_offset: 161n,
file_offset: 200n,
meta_data: {
type: 'BYTE_ARRAY',
encodings: ['PLAIN'],
@ -109,7 +129,7 @@ export const exampleMetadata = {
num_values: 4n,
total_uncompressed_size: 42n,
total_compressed_size: 42n,
data_page_offset: 161n,
data_page_offset: 200n,
statistics: {
null_count: 0n,
min_value: 'a',
@ -119,7 +139,7 @@ export const exampleMetadata = {
},
{
file_path: 'nullable',
file_offset: 203n,
file_offset: 242n,
meta_data: {
type: 'BOOLEAN',
encodings: ['PLAIN'],
@ -128,7 +148,7 @@ export const exampleMetadata = {
num_values: 4n,
total_uncompressed_size: 26n,
total_compressed_size: 26n,
data_page_offset: 203n,
data_page_offset: 242n,
statistics: {
null_count: 2n,
min_value: false,
@ -137,10 +157,10 @@ export const exampleMetadata = {
},
},
],
total_byte_size: 225n,
total_byte_size: 264n,
num_rows: 4n,
}],
metadata_length: 432,
metadata_length: 497,
}
describe('writeMetadata', () => {
@ -158,7 +178,7 @@ describe('writeMetadata', () => {
{ key: 'key1', value: 'value1' },
{ key: 'key2', value: 'value2' },
],
metadata_length: 464,
metadata_length: 529,
}
writeMetadata(writer, withKvMetadata)

@ -41,6 +41,24 @@ describe('writePlain', () => {
}
})
it('writes FLOAT', () => {
const writer = new ByteWriter()
const floats = [0, 300.5, -2.7100000381469727, Infinity, -Infinity, NaN]
writePlain(writer, floats, 'FLOAT')
// 4 bytes per float
expect(writer.offset).toBe(4 * floats.length)
for (let i = 0; i < floats.length; i++) {
const val = writer.view.getFloat32(i * 4, true)
if (Number.isNaN(floats[i])) {
expect(Number.isNaN(val)).toBe(true)
} else {
expect(val).toBe(floats[i])
}
}
})
it('writes DOUBLE', () => {
const writer = new ByteWriter()
const doubles = [0, 3.14, -2.71, Infinity, -Infinity, NaN]

@ -20,7 +20,7 @@ export const basicData = [
{ name: 'bool', data: [true, false, true, false] },
{ name: 'int', data: [0, 127, 0x7fff, 0x7fffffff] },
{ name: 'bigint', data: [0n, 127n, 0x7fffn, 0x7fffffffffffffffn] },
// { name: 'float', data: [0, 0.0001, 123.456, 1e100], type: 'FLOAT' }, // TODO
{ name: 'float', data: [0, 0.0001, 123.456, 1e100], type: 'FLOAT' },
{ name: 'double', data: [0, 0.0001, 123.456, 1e100] },
{ name: 'string', data: ['a', 'b', 'c', 'd'] },
{ name: 'nullable', data: [true, false, null, null] },
@ -36,10 +36,10 @@ describe('parquetWriteBuffer', () => {
it('serializes basic types', async () => {
const result = await roundTripDeserialize(basicData)
expect(result).toEqual([
{ bool: true, int: 0, bigint: 0n, double: 0, string: 'a', nullable: true },
{ bool: false, int: 127, bigint: 127n, double: 0.0001, string: 'b', nullable: false },
{ bool: true, int: 0x7fff, bigint: 0x7fffn, double: 123.456, string: 'c', nullable: null },
{ bool: false, int: 0x7fffffff, bigint: 0x7fffffffffffffffn, double: 1e100, string: 'd', nullable: null },
{ bool: true, int: 0, bigint: 0n, float: 0, double: 0, string: 'a', nullable: true },
{ bool: false, int: 127, bigint: 127n, float: 0.00009999999747378752, double: 0.0001, string: 'b', nullable: false },
{ bool: true, int: 0x7fff, bigint: 0x7fffn, float: 123.45600128173828, double: 123.456, string: 'c', nullable: null },
{ bool: false, int: 0x7fffffff, bigint: 0x7fffffffffffffffn, float: Infinity, double: 1e100, string: 'd', nullable: null },
])
})
@ -92,8 +92,8 @@ describe('parquetWriteBuffer', () => {
it('writes statistics when enabled', () => {
const withStats = parquetWriteBuffer({ columnData: basicData, statistics: true })
const noStats = parquetWriteBuffer({ columnData: basicData, statistics: false })
expect(withStats.byteLength).toBe(669)
expect(noStats.byteLength).toBe(575)
expect(withStats.byteLength).toBe(773)
expect(noStats.byteLength).toBe(663)
})
it('serializes list types', async () => {

@ -34,10 +34,10 @@ describe('parquetWrite with FileWriter', () => {
// check parquet data
const result = await parquetReadObjects({ file, metadata })
expect(result).toEqual([
{ bool: true, int: 0, bigint: 0n, double: 0, string: 'a', nullable: true },
{ bool: false, int: 127, bigint: 127n, double: 0.0001, string: 'b', nullable: false },
{ bool: true, int: 0x7fff, bigint: 0x7fffn, double: 123.456, string: 'c', nullable: null },
{ bool: false, int: 0x7fffffff, bigint: 0x7fffffffffffffffn, double: 1e100, string: 'd', nullable: null },
{ bool: true, int: 0, bigint: 0n, float: 0, double: 0, string: 'a', nullable: true },
{ bool: false, int: 127, bigint: 127n, float: 0.00009999999747378752, double: 0.0001, string: 'b', nullable: false },
{ bool: true, int: 0x7fff, bigint: 0x7fffn, float: 123.45600128173828, double: 123.456, string: 'c', nullable: null },
{ bool: false, int: 0x7fffffff, bigint: 0x7fffffffffffffffn, float: Infinity, double: 1e100, string: 'd', nullable: null },
])
})
})