hyparquet-writer/src/plain.js

134 lines
3.6 KiB
JavaScript
Raw Normal View History

2025-03-25 23:42:52 +00:00
/**
* @import {DecodedArray, ParquetType} from 'hyparquet/src/types.js'
2025-04-08 06:14:48 +00:00
* @import {Writer} from '../src/types.js'
2025-03-25 23:42:52 +00:00
* @param {Writer} writer
* @param {DecodedArray} values
* @param {ParquetType} type
* @param {number | undefined} fixedLength
2025-03-25 23:42:52 +00:00
*/
export function writePlain(writer, values, type, fixedLength) {
2025-03-25 23:42:52 +00:00
if (type === 'BOOLEAN') {
writePlainBoolean(writer, values)
} else if (type === 'INT32') {
writePlainInt32(writer, values)
} else if (type === 'INT64') {
writePlainInt64(writer, values)
2025-04-08 11:20:32 +00:00
} else if (type === 'FLOAT') {
writePlainFloat(writer, values)
2025-03-25 23:42:52 +00:00
} else if (type === 'DOUBLE') {
writePlainDouble(writer, values)
2025-03-26 03:15:14 +00:00
} else if (type === 'BYTE_ARRAY') {
writePlainByteArray(writer, values)
2025-04-11 08:41:56 +00:00
} else if (type === 'FIXED_LEN_BYTE_ARRAY') {
if (!fixedLength) throw new Error('parquet FIXED_LEN_BYTE_ARRAY expected type_length')
writePlainByteArrayFixed(writer, values, fixedLength)
2025-03-25 23:42:52 +00:00
} else {
throw new Error(`parquet unsupported type: ${type}`)
}
}
/**
* @param {Writer} writer
* @param {DecodedArray} values
*/
function writePlainBoolean(writer, values) {
let currentByte = 0
for (let i = 0; i < values.length; i++) {
2025-04-11 08:41:56 +00:00
if (typeof values[i] !== 'boolean') throw new Error('parquet expected boolean value')
2025-03-25 23:42:52 +00:00
const bitOffset = i % 8
if (values[i]) {
currentByte |= 1 << bitOffset
}
2025-04-11 08:41:56 +00:00
// once we've packed 8 bits or are at a multiple of 8, we write out the byte
2025-03-25 23:42:52 +00:00
if (bitOffset === 7) {
writer.appendUint8(currentByte)
currentByte = 0
}
}
2025-04-11 08:41:56 +00:00
// if the array length is not a multiple of 8, write the leftover bits
2025-03-25 23:42:52 +00:00
if (values.length % 8 !== 0) {
writer.appendUint8(currentByte)
}
}
/**
* @param {Writer} writer
* @param {DecodedArray} values
*/
function writePlainInt32(writer, values) {
for (const value of values) {
2025-04-21 02:38:45 +00:00
if (!Number.isSafeInteger(value)) throw new Error('parquet expected integer value')
2025-03-25 23:42:52 +00:00
writer.appendInt32(value)
}
}
/**
* @param {Writer} writer
* @param {DecodedArray} values
*/
function writePlainInt64(writer, values) {
for (const value of values) {
2025-04-11 08:41:56 +00:00
if (typeof value !== 'bigint') throw new Error('parquet expected bigint value')
2025-03-25 23:42:52 +00:00
writer.appendInt64(value)
}
}
2025-04-08 11:20:32 +00:00
/**
* @param {Writer} writer
* @param {DecodedArray} values
*/
function writePlainFloat(writer, values) {
for (const value of values) {
2025-04-11 08:41:56 +00:00
if (typeof value !== 'number') throw new Error('parquet expected number value')
2025-04-08 11:20:32 +00:00
writer.appendFloat32(value)
}
}
2025-03-25 23:42:52 +00:00
/**
* @param {Writer} writer
* @param {DecodedArray} values
*/
function writePlainDouble(writer, values) {
for (const value of values) {
2025-04-11 08:41:56 +00:00
if (typeof value !== 'number') throw new Error('parquet expected number value')
2025-03-25 23:42:52 +00:00
writer.appendFloat64(value)
}
}
2025-03-26 03:15:14 +00:00
/**
* @param {Writer} writer
* @param {DecodedArray} values
*/
function writePlainByteArray(writer, values) {
for (const value of values) {
2025-04-08 07:53:45 +00:00
let bytes = value
if (typeof bytes === 'string') {
// convert string to Uint8Array
bytes = new TextEncoder().encode(value)
}
if (!(bytes instanceof Uint8Array)) {
2025-04-21 02:38:45 +00:00
throw new Error('parquet expected Uint8Array value')
2025-04-08 07:53:45 +00:00
}
writer.appendUint32(bytes.length)
writer.appendBytes(bytes)
2025-03-26 03:15:14 +00:00
}
}
2025-04-11 23:26:07 +00:00
/**
* @param {Writer} writer
* @param {DecodedArray} values
* @param {number} fixedLength
2025-04-11 23:26:07 +00:00
*/
function writePlainByteArrayFixed(writer, values, fixedLength) {
2025-04-11 23:26:07 +00:00
for (const value of values) {
if (!(value instanceof Uint8Array)) throw new Error('parquet expected Uint8Array value')
if (value.length !== fixedLength) throw new Error(`parquet expected Uint8Array of length ${fixedLength}`)
2025-04-11 23:26:07 +00:00
writer.appendBytes(value)
}
}