mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2026-01-04 19:06:37 +00:00
Float life
This commit is contained in:
parent
5c686412c1
commit
6545196a1d
@ -71,6 +71,15 @@ ByteWriter.prototype.appendInt64 = function(value) {
|
||||
this.offset += 8
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
ByteWriter.prototype.appendFloat32 = function(value) {
|
||||
this.ensure(this.offset + 8)
|
||||
this.view.setFloat32(this.offset, value, true)
|
||||
this.offset += 4
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
|
||||
12
src/plain.js
12
src/plain.js
@ -13,6 +13,8 @@ export function writePlain(writer, values, type) {
|
||||
writePlainInt32(writer, values)
|
||||
} else if (type === 'INT64') {
|
||||
writePlainInt64(writer, values)
|
||||
} else if (type === 'FLOAT') {
|
||||
writePlainFloat(writer, values)
|
||||
} else if (type === 'DOUBLE') {
|
||||
writePlainDouble(writer, values)
|
||||
} else if (type === 'BYTE_ARRAY') {
|
||||
@ -69,6 +71,16 @@ function writePlainInt64(writer, values) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Writer} writer
|
||||
* @param {DecodedArray} values
|
||||
*/
|
||||
function writePlainFloat(writer, values) {
|
||||
for (const value of values) {
|
||||
writer.appendFloat32(value)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Writer} writer
|
||||
* @param {DecodedArray} values
|
||||
|
||||
@ -55,9 +55,14 @@ export function getSchemaElementForValues(name, values, type) {
|
||||
type = valueType
|
||||
} else if (type === 'INT32' && valueType === 'DOUBLE') {
|
||||
type = 'DOUBLE'
|
||||
} else if (type === 'FLOAT' && valueType === 'INT32') {
|
||||
valueType = 'FLOAT'
|
||||
} else if (type === 'FLOAT' && valueType === 'DOUBLE') {
|
||||
valueType = 'FLOAT'
|
||||
} else if (type === 'DOUBLE' && valueType === 'INT32') {
|
||||
// keep
|
||||
} else if (type !== valueType) {
|
||||
valueType = 'DOUBLE'
|
||||
}
|
||||
if (type !== valueType) {
|
||||
throw new Error(`parquet cannot write mixed types: ${type} and ${valueType}`)
|
||||
}
|
||||
}
|
||||
|
||||
1
src/types.d.ts
vendored
1
src/types.d.ts
vendored
@ -27,6 +27,7 @@ export interface Writer {
|
||||
appendUint32(value: number): void
|
||||
appendInt32(value: number): void
|
||||
appendInt64(value: bigint): void
|
||||
appendFloat32(value: number): void
|
||||
appendFloat64(value: number): void
|
||||
appendBuffer(buffer: ArrayBuffer): void
|
||||
appendBytes(value: Uint8Array): void
|
||||
|
||||
@ -11,10 +11,11 @@ export const exampleMetadata = {
|
||||
version: 2,
|
||||
created_by: 'hyparquet',
|
||||
schema: [
|
||||
{ name: 'root', num_children: 6 },
|
||||
{ name: 'root', num_children: 7 },
|
||||
{ name: 'bool', type: 'BOOLEAN', repetition_type: 'REQUIRED' },
|
||||
{ name: 'int', type: 'INT32', repetition_type: 'REQUIRED' },
|
||||
{ name: 'bigint', type: 'INT64', repetition_type: 'REQUIRED' },
|
||||
{ name: 'float', type: 'FLOAT', repetition_type: 'REQUIRED' },
|
||||
{ name: 'double', type: 'DOUBLE', repetition_type: 'REQUIRED' },
|
||||
{ name: 'string', type: 'BYTE_ARRAY', repetition_type: 'REQUIRED', converted_type: 'UTF8' },
|
||||
{ name: 'nullable', type: 'BOOLEAN', repetition_type: 'OPTIONAL' },
|
||||
@ -80,8 +81,27 @@ export const exampleMetadata = {
|
||||
},
|
||||
},
|
||||
{
|
||||
file_path: 'double',
|
||||
file_path: 'float',
|
||||
file_offset: 110n,
|
||||
meta_data: {
|
||||
type: 'FLOAT',
|
||||
encodings: ['PLAIN'],
|
||||
path_in_schema: ['float'],
|
||||
codec: 'SNAPPY',
|
||||
num_values: 4n,
|
||||
total_uncompressed_size: 39n,
|
||||
total_compressed_size: 39n,
|
||||
data_page_offset: 110n,
|
||||
statistics: {
|
||||
null_count: 0n,
|
||||
min_value: 0,
|
||||
max_value: Infinity,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
file_path: 'double',
|
||||
file_offset: 149n,
|
||||
meta_data: {
|
||||
type: 'DOUBLE',
|
||||
encodings: ['PLAIN'],
|
||||
@ -90,7 +110,7 @@ export const exampleMetadata = {
|
||||
num_values: 4n,
|
||||
total_uncompressed_size: 51n,
|
||||
total_compressed_size: 51n,
|
||||
data_page_offset: 110n,
|
||||
data_page_offset: 149n,
|
||||
statistics: {
|
||||
null_count: 0n,
|
||||
min_value: 0,
|
||||
@ -100,7 +120,7 @@ export const exampleMetadata = {
|
||||
},
|
||||
{
|
||||
file_path: 'string',
|
||||
file_offset: 161n,
|
||||
file_offset: 200n,
|
||||
meta_data: {
|
||||
type: 'BYTE_ARRAY',
|
||||
encodings: ['PLAIN'],
|
||||
@ -109,7 +129,7 @@ export const exampleMetadata = {
|
||||
num_values: 4n,
|
||||
total_uncompressed_size: 42n,
|
||||
total_compressed_size: 42n,
|
||||
data_page_offset: 161n,
|
||||
data_page_offset: 200n,
|
||||
statistics: {
|
||||
null_count: 0n,
|
||||
min_value: 'a',
|
||||
@ -119,7 +139,7 @@ export const exampleMetadata = {
|
||||
},
|
||||
{
|
||||
file_path: 'nullable',
|
||||
file_offset: 203n,
|
||||
file_offset: 242n,
|
||||
meta_data: {
|
||||
type: 'BOOLEAN',
|
||||
encodings: ['PLAIN'],
|
||||
@ -128,7 +148,7 @@ export const exampleMetadata = {
|
||||
num_values: 4n,
|
||||
total_uncompressed_size: 26n,
|
||||
total_compressed_size: 26n,
|
||||
data_page_offset: 203n,
|
||||
data_page_offset: 242n,
|
||||
statistics: {
|
||||
null_count: 2n,
|
||||
min_value: false,
|
||||
@ -137,10 +157,10 @@ export const exampleMetadata = {
|
||||
},
|
||||
},
|
||||
],
|
||||
total_byte_size: 225n,
|
||||
total_byte_size: 264n,
|
||||
num_rows: 4n,
|
||||
}],
|
||||
metadata_length: 432,
|
||||
metadata_length: 497,
|
||||
}
|
||||
|
||||
describe('writeMetadata', () => {
|
||||
@ -158,7 +178,7 @@ describe('writeMetadata', () => {
|
||||
{ key: 'key1', value: 'value1' },
|
||||
{ key: 'key2', value: 'value2' },
|
||||
],
|
||||
metadata_length: 464,
|
||||
metadata_length: 529,
|
||||
}
|
||||
writeMetadata(writer, withKvMetadata)
|
||||
|
||||
|
||||
@ -41,6 +41,24 @@ describe('writePlain', () => {
|
||||
}
|
||||
})
|
||||
|
||||
it('writes FLOAT', () => {
|
||||
const writer = new ByteWriter()
|
||||
const floats = [0, 300.5, -2.7100000381469727, Infinity, -Infinity, NaN]
|
||||
writePlain(writer, floats, 'FLOAT')
|
||||
|
||||
// 4 bytes per float
|
||||
expect(writer.offset).toBe(4 * floats.length)
|
||||
|
||||
for (let i = 0; i < floats.length; i++) {
|
||||
const val = writer.view.getFloat32(i * 4, true)
|
||||
if (Number.isNaN(floats[i])) {
|
||||
expect(Number.isNaN(val)).toBe(true)
|
||||
} else {
|
||||
expect(val).toBe(floats[i])
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('writes DOUBLE', () => {
|
||||
const writer = new ByteWriter()
|
||||
const doubles = [0, 3.14, -2.71, Infinity, -Infinity, NaN]
|
||||
|
||||
@ -20,7 +20,7 @@ export const basicData = [
|
||||
{ name: 'bool', data: [true, false, true, false] },
|
||||
{ name: 'int', data: [0, 127, 0x7fff, 0x7fffffff] },
|
||||
{ name: 'bigint', data: [0n, 127n, 0x7fffn, 0x7fffffffffffffffn] },
|
||||
// { name: 'float', data: [0, 0.0001, 123.456, 1e100], type: 'FLOAT' }, // TODO
|
||||
{ name: 'float', data: [0, 0.0001, 123.456, 1e100], type: 'FLOAT' },
|
||||
{ name: 'double', data: [0, 0.0001, 123.456, 1e100] },
|
||||
{ name: 'string', data: ['a', 'b', 'c', 'd'] },
|
||||
{ name: 'nullable', data: [true, false, null, null] },
|
||||
@ -36,10 +36,10 @@ describe('parquetWriteBuffer', () => {
|
||||
it('serializes basic types', async () => {
|
||||
const result = await roundTripDeserialize(basicData)
|
||||
expect(result).toEqual([
|
||||
{ bool: true, int: 0, bigint: 0n, double: 0, string: 'a', nullable: true },
|
||||
{ bool: false, int: 127, bigint: 127n, double: 0.0001, string: 'b', nullable: false },
|
||||
{ bool: true, int: 0x7fff, bigint: 0x7fffn, double: 123.456, string: 'c', nullable: null },
|
||||
{ bool: false, int: 0x7fffffff, bigint: 0x7fffffffffffffffn, double: 1e100, string: 'd', nullable: null },
|
||||
{ bool: true, int: 0, bigint: 0n, float: 0, double: 0, string: 'a', nullable: true },
|
||||
{ bool: false, int: 127, bigint: 127n, float: 0.00009999999747378752, double: 0.0001, string: 'b', nullable: false },
|
||||
{ bool: true, int: 0x7fff, bigint: 0x7fffn, float: 123.45600128173828, double: 123.456, string: 'c', nullable: null },
|
||||
{ bool: false, int: 0x7fffffff, bigint: 0x7fffffffffffffffn, float: Infinity, double: 1e100, string: 'd', nullable: null },
|
||||
])
|
||||
})
|
||||
|
||||
@ -92,8 +92,8 @@ describe('parquetWriteBuffer', () => {
|
||||
it('writes statistics when enabled', () => {
|
||||
const withStats = parquetWriteBuffer({ columnData: basicData, statistics: true })
|
||||
const noStats = parquetWriteBuffer({ columnData: basicData, statistics: false })
|
||||
expect(withStats.byteLength).toBe(669)
|
||||
expect(noStats.byteLength).toBe(575)
|
||||
expect(withStats.byteLength).toBe(773)
|
||||
expect(noStats.byteLength).toBe(663)
|
||||
})
|
||||
|
||||
it('serializes list types', async () => {
|
||||
|
||||
@ -34,10 +34,10 @@ describe('parquetWrite with FileWriter', () => {
|
||||
// check parquet data
|
||||
const result = await parquetReadObjects({ file, metadata })
|
||||
expect(result).toEqual([
|
||||
{ bool: true, int: 0, bigint: 0n, double: 0, string: 'a', nullable: true },
|
||||
{ bool: false, int: 127, bigint: 127n, double: 0.0001, string: 'b', nullable: false },
|
||||
{ bool: true, int: 0x7fff, bigint: 0x7fffn, double: 123.456, string: 'c', nullable: null },
|
||||
{ bool: false, int: 0x7fffffff, bigint: 0x7fffffffffffffffn, double: 1e100, string: 'd', nullable: null },
|
||||
{ bool: true, int: 0, bigint: 0n, float: 0, double: 0, string: 'a', nullable: true },
|
||||
{ bool: false, int: 127, bigint: 127n, float: 0.00009999999747378752, double: 0.0001, string: 'b', nullable: false },
|
||||
{ bool: true, int: 0x7fff, bigint: 0x7fffn, float: 123.45600128173828, double: 123.456, string: 'c', nullable: null },
|
||||
{ bool: false, int: 0x7fffffff, bigint: 0x7fffffffffffffffn, float: Infinity, double: 1e100, string: 'd', nullable: null },
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user