mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2026-01-05 03:16:36 +00:00
Write plain encoding
This commit is contained in:
parent
76889680b9
commit
a0560ee412
10
package.json
10
package.json
@ -26,12 +26,12 @@
|
||||
"test": "vitest run"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/eslint-parser": "7.26.10",
|
||||
"@types/node": "22.13.10",
|
||||
"@babel/eslint-parser": "7.27.0",
|
||||
"@types/node": "22.13.13",
|
||||
"@vitest/coverage-v8": "3.0.9",
|
||||
"eslint": "9.22.0",
|
||||
"eslint-plugin-jsdoc": "50.6.8",
|
||||
"hyparquet": "1.10.0",
|
||||
"eslint": "9.23.0",
|
||||
"eslint-plugin-jsdoc": "50.6.9",
|
||||
"hyparquet": "1.10.1",
|
||||
"typescript": "5.8.2",
|
||||
"vitest": "3.0.9"
|
||||
}
|
||||
|
||||
78
src/plain.js
Normal file
78
src/plain.js
Normal file
@ -0,0 +1,78 @@
|
||||
|
||||
/**
|
||||
* @import {DecodedArray, ParquetType} from 'hyparquet/src/types.js'
|
||||
* @import {Writer} from './writer.js'
|
||||
* @param {Writer} writer
|
||||
* @param {DecodedArray} values
|
||||
* @param {ParquetType} type
|
||||
*/
|
||||
export function writePlain(writer, values, type) {
|
||||
if (type === 'BOOLEAN') {
|
||||
writePlainBoolean(writer, values)
|
||||
} else if (type === 'INT32') {
|
||||
writePlainInt32(writer, values)
|
||||
} else if (type === 'INT64') {
|
||||
writePlainInt64(writer, values)
|
||||
} else if (type === 'DOUBLE') {
|
||||
writePlainDouble(writer, values)
|
||||
} else {
|
||||
throw new Error(`parquet unsupported type: ${type}`)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Writer} writer
|
||||
* @param {DecodedArray} values
|
||||
*/
|
||||
function writePlainBoolean(writer, values) {
|
||||
let currentByte = 0
|
||||
|
||||
for (let i = 0; i < values.length; i++) {
|
||||
const bitOffset = i % 8
|
||||
|
||||
if (values[i]) {
|
||||
currentByte |= 1 << bitOffset
|
||||
}
|
||||
|
||||
// Once we've packed 8 bits or are at a multiple of 8, we write out the byte
|
||||
if (bitOffset === 7) {
|
||||
writer.appendUint8(currentByte)
|
||||
currentByte = 0
|
||||
}
|
||||
}
|
||||
|
||||
// If the array length is not a multiple of 8, write the leftover bits
|
||||
if (values.length % 8 !== 0) {
|
||||
writer.appendUint8(currentByte)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Writer} writer
|
||||
* @param {DecodedArray} values
|
||||
*/
|
||||
function writePlainInt32(writer, values) {
|
||||
for (const value of values) {
|
||||
writer.appendInt32(value)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Writer} writer
|
||||
* @param {DecodedArray} values
|
||||
*/
|
||||
function writePlainInt64(writer, values) {
|
||||
for (const value of values) {
|
||||
writer.appendInt64(value)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Writer} writer
|
||||
* @param {DecodedArray} values
|
||||
*/
|
||||
function writePlainDouble(writer, values) {
|
||||
for (const value of values) {
|
||||
writer.appendFloat64(value)
|
||||
}
|
||||
}
|
||||
2
src/types.d.ts
vendored
2
src/types.d.ts
vendored
@ -5,6 +5,8 @@ export interface Writer {
|
||||
getBuffer(): ArrayBuffer
|
||||
appendUint8(value: number): void
|
||||
appendUint32(value: number): void
|
||||
appendInt32(value: number): void
|
||||
appendInt64(value: bigint): void
|
||||
appendFloat64(value: number): void
|
||||
appendBuffer(buffer: ArrayBuffer): void
|
||||
appendVarInt(value: number): void
|
||||
|
||||
@ -47,6 +47,24 @@ Writer.prototype.appendUint32 = function(value) {
|
||||
this.offset += 4
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
Writer.prototype.appendInt32 = function(value) {
|
||||
this.ensure(this.offset + 4)
|
||||
this.view.setInt32(this.offset, value, true)
|
||||
this.offset += 4
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {bigint} value
|
||||
*/
|
||||
Writer.prototype.appendInt64 = function(value) {
|
||||
this.ensure(this.offset + 8)
|
||||
this.view.setBigInt64(this.offset, BigInt(value), true)
|
||||
this.offset += 8
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
|
||||
67
test/plain.test.js
Normal file
67
test/plain.test.js
Normal file
@ -0,0 +1,67 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { Writer } from '../src/writer.js'
|
||||
import { writePlain } from '../src/plain.js'
|
||||
|
||||
describe('writePlain', () => {
|
||||
it('writes BOOLEAN (multiple of 8 bits, plus leftover)', () => {
|
||||
const writer = new Writer()
|
||||
const booleans = [true, false, true, true, false, false, false, true, true]
|
||||
writePlain(writer, booleans, 'BOOLEAN')
|
||||
|
||||
expect(writer.offset).toBe(2)
|
||||
expect(writer.view.getUint8(0)).toBe(0b10001101)
|
||||
expect(writer.view.getUint8(1)).toBe(0b00000001)
|
||||
})
|
||||
|
||||
it('writes INT32', () => {
|
||||
const writer = new Writer()
|
||||
const ints = [0, 1, 255, 256, 65535, -1, -2147483648, 2147483647]
|
||||
writePlain(writer, ints, 'INT32')
|
||||
|
||||
// 4 bytes per int
|
||||
expect(writer.offset).toBe(4 * ints.length)
|
||||
|
||||
for (let i = 0; i < ints.length; i++) {
|
||||
const value = writer.view.getInt32(i * 4, true)
|
||||
expect(value).toBe(ints[i])
|
||||
}
|
||||
})
|
||||
|
||||
it('writes INT64', () => {
|
||||
const writer = new Writer()
|
||||
const bigints = [0n, 1n, 42n, BigInt(2 ** 53 - 1)]
|
||||
writePlain(writer, bigints, 'INT64')
|
||||
|
||||
// 8 bytes per int64
|
||||
expect(writer.offset).toBe(8 * bigints.length)
|
||||
|
||||
for (let i = 0; i < bigints.length; i++) {
|
||||
const value = writer.view.getBigInt64(i * 8, true)
|
||||
expect(value).toBe(bigints[i])
|
||||
}
|
||||
})
|
||||
|
||||
it('writes DOUBLE', () => {
|
||||
const writer = new Writer()
|
||||
const doubles = [0, 3.14, -2.71, Infinity, -Infinity, NaN]
|
||||
writePlain(writer, doubles, 'DOUBLE')
|
||||
|
||||
// 8 bytes per double
|
||||
expect(writer.offset).toBe(8 * doubles.length)
|
||||
|
||||
for (let i = 0; i < doubles.length; i++) {
|
||||
const val = writer.view.getFloat64(i * 8, true)
|
||||
if (Number.isNaN(doubles[i])) {
|
||||
expect(Number.isNaN(val)).toBe(true)
|
||||
} else {
|
||||
expect(val).toBe(doubles[i])
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('throws error on unsupported type', () => {
|
||||
const writer = new Writer()
|
||||
expect(() => writePlain(writer, [1, 2, 3], 'BYTE_ARRAY'))
|
||||
.toThrow(/parquet unsupported type/i)
|
||||
})
|
||||
})
|
||||
Loading…
Reference in New Issue
Block a user