diff --git a/package.json b/package.json index 1a706f8..06d63b8 100644 --- a/package.json +++ b/package.json @@ -26,12 +26,12 @@ "test": "vitest run" }, "devDependencies": { - "@babel/eslint-parser": "7.26.10", - "@types/node": "22.13.10", + "@babel/eslint-parser": "7.27.0", + "@types/node": "22.13.13", "@vitest/coverage-v8": "3.0.9", - "eslint": "9.22.0", - "eslint-plugin-jsdoc": "50.6.8", - "hyparquet": "1.10.0", + "eslint": "9.23.0", + "eslint-plugin-jsdoc": "50.6.9", + "hyparquet": "1.10.1", "typescript": "5.8.2", "vitest": "3.0.9" } diff --git a/src/plain.js b/src/plain.js new file mode 100644 index 0000000..9845adc --- /dev/null +++ b/src/plain.js @@ -0,0 +1,78 @@ + +/** + * @import {DecodedArray, ParquetType} from 'hyparquet/src/types.js' + * @import {Writer} from './writer.js' + * @param {Writer} writer + * @param {DecodedArray} values + * @param {ParquetType} type + */ +export function writePlain(writer, values, type) { + if (type === 'BOOLEAN') { + writePlainBoolean(writer, values) + } else if (type === 'INT32') { + writePlainInt32(writer, values) + } else if (type === 'INT64') { + writePlainInt64(writer, values) + } else if (type === 'DOUBLE') { + writePlainDouble(writer, values) + } else { + throw new Error(`parquet unsupported type: ${type}`) + } +} + +/** + * @param {Writer} writer + * @param {DecodedArray} values + */ +function writePlainBoolean(writer, values) { + let currentByte = 0 + + for (let i = 0; i < values.length; i++) { + const bitOffset = i % 8 + + if (values[i]) { + currentByte |= 1 << bitOffset + } + + // Once we've packed 8 bits or are at a multiple of 8, we write out the byte + if (bitOffset === 7) { + writer.appendUint8(currentByte) + currentByte = 0 + } + } + + // If the array length is not a multiple of 8, write the leftover bits + if (values.length % 8 !== 0) { + writer.appendUint8(currentByte) + } +} + +/** + * @param {Writer} writer + * @param {DecodedArray} values + */ +function writePlainInt32(writer, values) { + for (const value of values) { + writer.appendInt32(value) + } +} + +/** + * @param {Writer} writer + * @param {DecodedArray} values + */ +function writePlainInt64(writer, values) { + for (const value of values) { + writer.appendInt64(value) + } +} + +/** + * @param {Writer} writer + * @param {DecodedArray} values + */ +function writePlainDouble(writer, values) { + for (const value of values) { + writer.appendFloat64(value) + } +} diff --git a/src/types.d.ts b/src/types.d.ts index 048cc76..fd39f58 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -5,6 +5,8 @@ export interface Writer { getBuffer(): ArrayBuffer appendUint8(value: number): void appendUint32(value: number): void + appendInt32(value: number): void + appendInt64(value: bigint): void appendFloat64(value: number): void appendBuffer(buffer: ArrayBuffer): void appendVarInt(value: number): void diff --git a/src/writer.js b/src/writer.js index 6532ade..78717f4 100644 --- a/src/writer.js +++ b/src/writer.js @@ -47,6 +47,24 @@ Writer.prototype.appendUint32 = function(value) { this.offset += 4 } +/** + * @param {number} value + */ +Writer.prototype.appendInt32 = function(value) { + this.ensure(this.offset + 4) + this.view.setInt32(this.offset, value, true) + this.offset += 4 +} + +/** + * @param {bigint} value + */ +Writer.prototype.appendInt64 = function(value) { + this.ensure(this.offset + 8) + this.view.setBigInt64(this.offset, BigInt(value), true) + this.offset += 8 +} + /** * @param {number} value */ diff --git a/test/plain.test.js b/test/plain.test.js new file mode 100644 index 0000000..60fbe80 --- /dev/null +++ b/test/plain.test.js @@ -0,0 +1,67 @@ +import { describe, expect, it } from 'vitest' +import { Writer } from '../src/writer.js' +import { writePlain } from '../src/plain.js' + +describe('writePlain', () => { + it('writes BOOLEAN (multiple of 8 bits, plus leftover)', () => { + const writer = new Writer() + const booleans = [true, false, true, true, false, false, false, true, true] + writePlain(writer, booleans, 'BOOLEAN') + + expect(writer.offset).toBe(2) + expect(writer.view.getUint8(0)).toBe(0b10001101) + expect(writer.view.getUint8(1)).toBe(0b00000001) + }) + + it('writes INT32', () => { + const writer = new Writer() + const ints = [0, 1, 255, 256, 65535, -1, -2147483648, 2147483647] + writePlain(writer, ints, 'INT32') + + // 4 bytes per int + expect(writer.offset).toBe(4 * ints.length) + + for (let i = 0; i < ints.length; i++) { + const value = writer.view.getInt32(i * 4, true) + expect(value).toBe(ints[i]) + } + }) + + it('writes INT64', () => { + const writer = new Writer() + const bigints = [0n, 1n, 42n, BigInt(2 ** 53 - 1)] + writePlain(writer, bigints, 'INT64') + + // 8 bytes per int64 + expect(writer.offset).toBe(8 * bigints.length) + + for (let i = 0; i < bigints.length; i++) { + const value = writer.view.getBigInt64(i * 8, true) + expect(value).toBe(bigints[i]) + } + }) + + it('writes DOUBLE', () => { + const writer = new Writer() + const doubles = [0, 3.14, -2.71, Infinity, -Infinity, NaN] + writePlain(writer, doubles, 'DOUBLE') + + // 8 bytes per double + expect(writer.offset).toBe(8 * doubles.length) + + for (let i = 0; i < doubles.length; i++) { + const val = writer.view.getFloat64(i * 8, true) + if (Number.isNaN(doubles[i])) { + expect(Number.isNaN(val)).toBe(true) + } else { + expect(val).toBe(doubles[i]) + } + } + }) + + it('throws error on unsupported type', () => { + const writer = new Writer() + expect(() => writePlain(writer, [1, 2, 3], 'BYTE_ARRAY')) + .toThrow(/parquet unsupported type/i) + }) +})