mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
BYO writer
This commit is contained in:
parent
5e89e80cad
commit
41ddace347
30
README.md
30
README.md
@ -6,14 +6,20 @@
|
||||
[](https://www.npmjs.com/package/hyparquet-writer)
|
||||
[](https://github.com/hyparam/hyparquet-writer/actions)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||

|
||||

|
||||
[](https://www.npmjs.com/package/hyparquet-writer?activeTab=dependencies)
|
||||
|
||||
Hyparquet Writer is a JavaScript library for writing [Apache Parquet](https://parquet.apache.org) files. It is designed to be lightweight, fast and store data very efficiently. It is a companion to the [hyparquet](https://github.com/hyparam/hyparquet) library, which is a JavaScript library for reading parquet files.
|
||||
|
||||
## Usage
|
||||
|
||||
Call `parquetWrite` with a list of columns, each column is an object with a `name` and `data` field. The `data` field should be an array of same-type values.
|
||||
Call `parquetWrite` with argument `columnData`. Each column in `columnData` should contain:
|
||||
|
||||
- `name`: the column name
|
||||
- `data`: an array of same-type values
|
||||
- `type`: the parquet schema type (optional, type guessed from data if not provided)
|
||||
|
||||
Example:
|
||||
|
||||
```javascript
|
||||
import { parquetWrite } from 'hyparquet-writer'
|
||||
@ -28,11 +34,31 @@ const arrayBuffer = parquetWrite({
|
||||
|
||||
## Options
|
||||
|
||||
Options can be passed to `parquetWrite` to change parquet file properties:
|
||||
|
||||
- `compression`: use snappy compression (default true)
|
||||
- `statistics`: write column statistics (default true)
|
||||
- `rowGroupSize`: number of rows in each row group (default 100000)
|
||||
- `kvMetadata`: extra key-value metadata
|
||||
|
||||
```javascript
|
||||
import { parquetWrite } from 'hyparquet-writer'
|
||||
|
||||
const arrayBuffer = parquetWrite({
|
||||
columnData: [
|
||||
{ name: 'name', data: ['Alice', 'Bob', 'Charlie'], type: 'STRING' },
|
||||
{ name: 'age', data: [25, 30, 35], type: 'INT32' },
|
||||
],
|
||||
compression: false,
|
||||
statistics: false,
|
||||
rowGroupSize: 1000,
|
||||
kvMetadata: {
|
||||
'key1': 'value1',
|
||||
'key2': 'value2',
|
||||
},
|
||||
})
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- https://github.com/hyparam/hyparquet
|
||||
|
||||
@ -40,7 +40,7 @@
|
||||
"test": "vitest run"
|
||||
},
|
||||
"dependencies": {
|
||||
"hyparquet": "1.10.4"
|
||||
"hyparquet": "1.11.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/eslint-parser": "7.27.0",
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
|
||||
/**
|
||||
* Self-expanding buffer view
|
||||
* Generic buffered writer.
|
||||
*
|
||||
* @returns {import('../src/types.js').Writer}
|
||||
* @import {Writer} from '../src/types.js'
|
||||
* @returns {Writer}
|
||||
*/
|
||||
export function Writer() {
|
||||
export function ByteWriter() {
|
||||
this.buffer = new ArrayBuffer(1024)
|
||||
this.offset = 0
|
||||
this.view = new DataView(this.buffer)
|
||||
@ -14,24 +15,29 @@ export function Writer() {
|
||||
/**
|
||||
* @param {number} size
|
||||
*/
|
||||
Writer.prototype.ensure = function(size) {
|
||||
ByteWriter.prototype.ensure = function(size) {
|
||||
// auto-expanding buffer
|
||||
if (this.offset + size > this.buffer.byteLength) {
|
||||
const newSize = Math.max(this.buffer.byteLength * 2, this.offset + size)
|
||||
const newBuffer = new ArrayBuffer(newSize)
|
||||
// TODO: save buffers until later and merge once?
|
||||
new Uint8Array(newBuffer).set(new Uint8Array(this.buffer))
|
||||
this.buffer = newBuffer
|
||||
this.view = new DataView(this.buffer)
|
||||
}
|
||||
}
|
||||
|
||||
Writer.prototype.getBuffer = function() {
|
||||
ByteWriter.prototype.finish = function() {
|
||||
}
|
||||
|
||||
ByteWriter.prototype.getBuffer = function() {
|
||||
return this.buffer.slice(0, this.offset)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
Writer.prototype.appendUint8 = function(value) {
|
||||
ByteWriter.prototype.appendUint8 = function(value) {
|
||||
this.ensure(this.offset + 1)
|
||||
this.view.setUint8(this.offset, value)
|
||||
this.offset++
|
||||
@ -40,7 +46,7 @@ Writer.prototype.appendUint8 = function(value) {
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
Writer.prototype.appendUint32 = function(value) {
|
||||
ByteWriter.prototype.appendUint32 = function(value) {
|
||||
this.ensure(this.offset + 4)
|
||||
this.view.setUint32(this.offset, value, true)
|
||||
this.offset += 4
|
||||
@ -49,7 +55,7 @@ Writer.prototype.appendUint32 = function(value) {
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
Writer.prototype.appendInt32 = function(value) {
|
||||
ByteWriter.prototype.appendInt32 = function(value) {
|
||||
this.ensure(this.offset + 4)
|
||||
this.view.setInt32(this.offset, value, true)
|
||||
this.offset += 4
|
||||
@ -58,7 +64,7 @@ Writer.prototype.appendInt32 = function(value) {
|
||||
/**
|
||||
* @param {bigint} value
|
||||
*/
|
||||
Writer.prototype.appendInt64 = function(value) {
|
||||
ByteWriter.prototype.appendInt64 = function(value) {
|
||||
this.ensure(this.offset + 8)
|
||||
this.view.setBigInt64(this.offset, BigInt(value), true)
|
||||
this.offset += 8
|
||||
@ -67,7 +73,7 @@ Writer.prototype.appendInt64 = function(value) {
|
||||
/**
|
||||
* @param {number} value
|
||||
*/
|
||||
Writer.prototype.appendFloat64 = function(value) {
|
||||
ByteWriter.prototype.appendFloat64 = function(value) {
|
||||
this.ensure(this.offset + 8)
|
||||
this.view.setFloat64(this.offset, value, true)
|
||||
this.offset += 8
|
||||
@ -76,14 +82,14 @@ Writer.prototype.appendFloat64 = function(value) {
|
||||
/**
|
||||
* @param {ArrayBuffer} value
|
||||
*/
|
||||
Writer.prototype.appendBuffer = function(value) {
|
||||
ByteWriter.prototype.appendBuffer = function(value) {
|
||||
this.appendBytes(new Uint8Array(value))
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Uint8Array} value
|
||||
*/
|
||||
Writer.prototype.appendBytes = function(value) {
|
||||
ByteWriter.prototype.appendBytes = function(value) {
|
||||
this.ensure(this.offset + value.length)
|
||||
new Uint8Array(this.buffer, this.offset, value.length).set(value)
|
||||
this.offset += value.length
|
||||
@ -95,7 +101,7 @@ Writer.prototype.appendBytes = function(value) {
|
||||
*
|
||||
* @param {number} value
|
||||
*/
|
||||
Writer.prototype.appendVarInt = function(value) {
|
||||
ByteWriter.prototype.appendVarInt = function(value) {
|
||||
while (true) {
|
||||
if ((value & ~0x7f) === 0) {
|
||||
// fits in 7 bits
|
||||
@ -114,7 +120,7 @@ Writer.prototype.appendVarInt = function(value) {
|
||||
*
|
||||
* @param {bigint} value
|
||||
*/
|
||||
Writer.prototype.appendVarBigInt = function(value) {
|
||||
ByteWriter.prototype.appendVarBigInt = function(value) {
|
||||
while (true) {
|
||||
if ((value & ~0x7fn) === 0n) {
|
||||
// fits in 7 bits
|
||||
@ -5,10 +5,11 @@ import { writePlain } from './plain.js'
|
||||
import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js'
|
||||
import { snappyCompress } from './snappy.js'
|
||||
import { serializeTCompactProtocol } from './thrift.js'
|
||||
import { Writer } from './writer.js'
|
||||
import { ByteWriter } from './bytewriter.js'
|
||||
|
||||
/**
|
||||
* @import {ColumnMetaData, DecodedArray, PageHeader, ParquetType, SchemaElement, Statistics} from 'hyparquet'
|
||||
* @import {Writer} from '../src/types.js'
|
||||
* @param {Writer} writer
|
||||
* @param {SchemaElement[]} schemaPath
|
||||
* @param {DecodedArray} values
|
||||
@ -50,7 +51,7 @@ export function writeColumn(writer, schemaPath, values, compressed, stats) {
|
||||
}
|
||||
|
||||
// Write levels to temp buffer
|
||||
const levels = new Writer()
|
||||
const levels = new ByteWriter()
|
||||
const { definition_levels_byte_length, repetition_levels_byte_length, num_nulls } = writeLevels(levels, schemaPath, values)
|
||||
|
||||
// dictionary encoding
|
||||
@ -81,7 +82,7 @@ export function writeColumn(writer, schemaPath, values, compressed, stats) {
|
||||
}
|
||||
|
||||
// write page data to temp buffer
|
||||
const page = new Writer()
|
||||
const page = new ByteWriter()
|
||||
/** @type {import('hyparquet').Encoding} */
|
||||
const encoding = dictionary ? 'RLE_DICTIONARY' : 'PLAIN'
|
||||
if (dictionary) {
|
||||
@ -95,7 +96,7 @@ export function writeColumn(writer, schemaPath, values, compressed, stats) {
|
||||
// compress page data
|
||||
let compressedPage = page
|
||||
if (compressed) {
|
||||
compressedPage = new Writer()
|
||||
compressedPage = new ByteWriter()
|
||||
snappyCompress(compressedPage, new Uint8Array(page.getBuffer()))
|
||||
}
|
||||
|
||||
@ -187,13 +188,13 @@ function useDictionary(values, type) {
|
||||
* @param {boolean} compressed
|
||||
*/
|
||||
function writeDictionaryPage(writer, dictionary, type, compressed) {
|
||||
const dictionaryPage = new Writer()
|
||||
const dictionaryPage = new ByteWriter()
|
||||
writePlain(dictionaryPage, dictionary, type)
|
||||
|
||||
// compress dictionary page data
|
||||
let compressedDictionaryPage = dictionaryPage
|
||||
if (compressed) {
|
||||
compressedDictionaryPage = new Writer()
|
||||
compressedDictionaryPage = new ByteWriter()
|
||||
snappyCompress(compressedDictionaryPage, new Uint8Array(dictionaryPage.getBuffer()))
|
||||
}
|
||||
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
import { Writer } from './writer.js'
|
||||
import { ByteWriter } from './bytewriter.js'
|
||||
|
||||
/**
|
||||
* @import {DecodedArray} from 'hyparquet'
|
||||
* @import {Writer} from '../src/types.js'
|
||||
* @param {Writer} writer
|
||||
* @param {DecodedArray} values
|
||||
* @returns {number} bytes written
|
||||
@ -12,9 +13,9 @@ export function writeRleBitPackedHybrid(writer, values) {
|
||||
const bitWidth = Math.ceil(Math.log2(Math.max(...values) + 1))
|
||||
|
||||
// try both RLE and bit-packed and choose the best
|
||||
const rle = new Writer()
|
||||
const rle = new ByteWriter()
|
||||
writeRle(rle, values, bitWidth)
|
||||
const bitPacked = new Writer()
|
||||
const bitPacked = new ByteWriter()
|
||||
writeBitPacked(bitPacked, values, bitWidth)
|
||||
|
||||
if (rle.offset < bitPacked.offset) {
|
||||
|
||||
@ -4,7 +4,7 @@ import { unconvertMetadata } from './unconvert.js'
|
||||
|
||||
/**
|
||||
* @import {FileMetaData} from 'hyparquet'
|
||||
* @import {Writer} from './writer.js'
|
||||
* @import {Writer} from '../src/types.js'
|
||||
* @param {Writer} writer
|
||||
* @param {FileMetaData} metadata
|
||||
*/
|
||||
|
||||
@ -1,28 +1,29 @@
|
||||
import { writeColumn } from './column.js'
|
||||
import { Writer } from './writer.js'
|
||||
import { writeMetadata } from './metadata.js'
|
||||
|
||||
/**
|
||||
* Create a new ParquetWriter.
|
||||
*
|
||||
* @import {ColumnChunk, FileMetaData, RowGroup, SchemaElement, SchemaTree} from 'hyparquet'
|
||||
* @import {ColumnChunk, FileMetaData, RowGroup, SchemaElement} from 'hyparquet'
|
||||
* @import {KeyValue} from 'hyparquet/src/types.js'
|
||||
* @import {ColumnData} from './types.js'
|
||||
* @import {ColumnData, Writer} from '../src/types.js'
|
||||
* @param {object} options
|
||||
* @param {Writer} options.writer
|
||||
* @param {SchemaElement[]} options.schema
|
||||
* @param {boolean} [options.compressed]
|
||||
* @param {boolean} [options.statistics]
|
||||
* @param {KeyValue[]} [options.kvMetadata]
|
||||
*/
|
||||
export function ParquetWriter({ schema, compressed = true, statistics = true, kvMetadata }) {
|
||||
/** @type {RowGroup[]} */
|
||||
this.row_groups = []
|
||||
export function ParquetWriter({ writer, schema, compressed = true, statistics = true, kvMetadata }) {
|
||||
this.writer = writer
|
||||
this.schema = schema
|
||||
this.compressed = compressed
|
||||
this.statistics = statistics
|
||||
this.kvMetadata = kvMetadata
|
||||
|
||||
/** @type {RowGroup[]} */
|
||||
this.row_groups = []
|
||||
this.num_rows = BigInt(0)
|
||||
this.writer = new Writer()
|
||||
|
||||
// write header PAR1
|
||||
this.writer.appendUint32(0x31524150)
|
||||
@ -72,9 +73,7 @@ ParquetWriter.prototype.write = function({ columnData, rowGroupSize = 100000 })
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish writing the file and return the buffer.
|
||||
*
|
||||
* @returns {ArrayBuffer}
|
||||
* Finish writing the file.
|
||||
*/
|
||||
ParquetWriter.prototype.finish = function() {
|
||||
// write metadata
|
||||
@ -94,6 +93,5 @@ ParquetWriter.prototype.finish = function() {
|
||||
|
||||
// write footer PAR1
|
||||
this.writer.appendUint32(0x31524150)
|
||||
|
||||
return this.writer.getBuffer()
|
||||
this.writer.finish()
|
||||
}
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
|
||||
/**
|
||||
* @import {DecodedArray, ParquetType} from 'hyparquet/src/types.js'
|
||||
* @import {Writer} from './writer.js'
|
||||
* @import {Writer} from '../src/types.js'
|
||||
* @param {Writer} writer
|
||||
* @param {DecodedArray} values
|
||||
* @param {ParquetType} type
|
||||
|
||||
@ -95,3 +95,32 @@ export function getMaxDefinitionLevel(schemaPath) {
|
||||
}
|
||||
return maxLevel
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert column data to schema.
|
||||
*
|
||||
* @import {ColumnData} from '../src/types.js'
|
||||
* @param {ColumnData[]} columnData
|
||||
* @returns {SchemaElement[]}
|
||||
*/
|
||||
export function schemaFromColumnData(columnData) {
|
||||
/** @type {SchemaElement[]} */
|
||||
const schema = [{
|
||||
name: 'root',
|
||||
num_children: columnData.length,
|
||||
}]
|
||||
let num_rows = 0
|
||||
for (const { name, data, type } of columnData) {
|
||||
// check if all columns have the same length
|
||||
if (num_rows === 0) {
|
||||
num_rows = data.length
|
||||
} else if (num_rows !== data.length) {
|
||||
throw new Error('columns must have the same length')
|
||||
}
|
||||
// auto-detect type
|
||||
const schemaElement = getSchemaElementForValues(name, data, type)
|
||||
if (!schemaElement.type) throw new Error(`column ${name} cannot determine type`)
|
||||
schema.push(schemaElement)
|
||||
}
|
||||
return schema
|
||||
}
|
||||
|
||||
@ -14,7 +14,7 @@ const globalHashTables = new Array(MAX_HASH_TABLE_BITS + 1)
|
||||
* Compress snappy data.
|
||||
* Writes Snappy-compressed bytes into a writer.
|
||||
*
|
||||
* @import {Writer} from '../src/writer.js'
|
||||
* @import {Writer} from '../src/types.js'
|
||||
* @param {Writer} writer
|
||||
* @param {Uint8Array} input - uncompressed data
|
||||
*/
|
||||
|
||||
3
src/types.d.ts
vendored
3
src/types.d.ts
vendored
@ -9,6 +9,9 @@ export interface ColumnData {
|
||||
export interface Writer {
|
||||
buffer: ArrayBuffer
|
||||
offset: number
|
||||
view: DataView
|
||||
ensure(size: number): void
|
||||
finish(): void
|
||||
getBuffer(): ArrayBuffer
|
||||
appendUint8(value: number): void
|
||||
appendUint32(value: number): void
|
||||
|
||||
43
src/write.js
43
src/write.js
@ -1,10 +1,10 @@
|
||||
import { getSchemaElementForValues } from './schema.js'
|
||||
import { ParquetWriter } from './parquet-writer.js'
|
||||
import { schemaFromColumnData } from './schema.js'
|
||||
import { ByteWriter } from './bytewriter.js'
|
||||
|
||||
/**
|
||||
* Write data as parquet to an ArrayBuffer
|
||||
*
|
||||
* @import {ColumnChunk, DecodedArray, FileMetaData, RowGroup, SchemaElement, SchemaTree} from 'hyparquet'
|
||||
* @import {KeyValue} from 'hyparquet/src/types.js'
|
||||
* @import {ColumnData} from '../src/types.js'
|
||||
* @param {object} options
|
||||
@ -17,45 +17,18 @@ import { ParquetWriter } from './parquet-writer.js'
|
||||
*/
|
||||
export function parquetWrite({ columnData, compressed = true, statistics = true, rowGroupSize = 100000, kvMetadata }) {
|
||||
const schema = schemaFromColumnData(columnData)
|
||||
const writer = new ParquetWriter({
|
||||
const writer = new ByteWriter()
|
||||
const pq = new ParquetWriter({
|
||||
writer,
|
||||
schema,
|
||||
compressed,
|
||||
statistics,
|
||||
kvMetadata,
|
||||
})
|
||||
|
||||
writer.write({
|
||||
pq.write({
|
||||
columnData,
|
||||
rowGroupSize,
|
||||
})
|
||||
|
||||
return writer.finish()
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert column data to schema.
|
||||
*
|
||||
* @param {ColumnData[]} columnData
|
||||
* @returns {SchemaElement[]}
|
||||
*/
|
||||
function schemaFromColumnData(columnData) {
|
||||
/** @type {SchemaElement[]} */
|
||||
const schema = [{
|
||||
name: 'root',
|
||||
num_children: columnData.length,
|
||||
}]
|
||||
let num_rows = 0
|
||||
for (const { name, data, type } of columnData) {
|
||||
// check if all columns have the same length
|
||||
if (num_rows === 0) {
|
||||
num_rows = data.length
|
||||
} else if (num_rows !== data.length) {
|
||||
throw new Error('columns must have the same length')
|
||||
}
|
||||
// auto-detect type
|
||||
const schemaElement = getSchemaElementForValues(name, data, type)
|
||||
if (!schemaElement.type) throw new Error(`column ${name} cannot determine type`)
|
||||
schema.push(schemaElement)
|
||||
}
|
||||
return schema
|
||||
pq.finish()
|
||||
return writer.getBuffer()
|
||||
}
|
||||
|
||||
106
test/bytewriter.test.js
Normal file
106
test/bytewriter.test.js
Normal file
@ -0,0 +1,106 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { ByteWriter } from '../src/bytewriter.js'
|
||||
|
||||
describe('ByteWriter', () => {
|
||||
it('initializes with correct defaults', () => {
|
||||
const writer = new ByteWriter()
|
||||
expect(writer.offset).toBe(0)
|
||||
expect(writer.buffer.byteLength).toBe(1024)
|
||||
})
|
||||
|
||||
it('appendUint8 writes single byte', () => {
|
||||
const writer = new ByteWriter()
|
||||
writer.appendUint8(255)
|
||||
expect(new Uint8Array(writer.getBuffer())).toEqual(new Uint8Array([0xff]))
|
||||
})
|
||||
|
||||
it('appendUint32 writes a 32-bit integer in little-endian', () => {
|
||||
const writer = new ByteWriter()
|
||||
writer.appendUint32(0x12345678)
|
||||
expect(new Uint8Array(writer.getBuffer())).toEqual(
|
||||
new Uint8Array([0x78, 0x56, 0x34, 0x12])
|
||||
)
|
||||
})
|
||||
|
||||
it('appendInt32 writes signed 32-bit integer in little-endian', () => {
|
||||
const writer = new ByteWriter()
|
||||
writer.appendInt32(-1)
|
||||
expect(new Uint8Array(writer.getBuffer())).toEqual(
|
||||
new Uint8Array([0xff, 0xff, 0xff, 0xff])
|
||||
)
|
||||
})
|
||||
|
||||
it('appendInt64 writes a 64-bit bigint in little-endian', () => {
|
||||
const writer = new ByteWriter()
|
||||
writer.appendInt64(0x1122334455667788n)
|
||||
expect(new Uint8Array(writer.getBuffer())).toEqual(
|
||||
new Uint8Array([0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11])
|
||||
)
|
||||
})
|
||||
|
||||
it('appendFloat64 writes a 64-bit float in little-endian', () => {
|
||||
const writer = new ByteWriter()
|
||||
writer.appendFloat64(1.0)
|
||||
expect(new Uint8Array(writer.getBuffer())).toEqual(
|
||||
new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x3f])
|
||||
)
|
||||
})
|
||||
|
||||
it('appendBytes writes raw Uint8Array data', () => {
|
||||
const writer = new ByteWriter()
|
||||
writer.appendBytes(new Uint8Array([1, 2, 3, 4]))
|
||||
expect(new Uint8Array(writer.getBuffer())).toEqual(new Uint8Array([1, 2, 3, 4]))
|
||||
})
|
||||
|
||||
it('appendBuffer writes raw ArrayBuffer data', () => {
|
||||
const writer = new ByteWriter()
|
||||
const buf = new Uint8Array([10, 20, 30]).buffer
|
||||
writer.appendBuffer(buf)
|
||||
expect(new Uint8Array(writer.getBuffer())).toEqual(new Uint8Array([10, 20, 30]))
|
||||
})
|
||||
|
||||
it('appendVarInt encodes 32-bit varint', () => {
|
||||
const writer = new ByteWriter()
|
||||
writer.appendVarInt(127)
|
||||
writer.appendVarInt(128)
|
||||
writer.appendVarInt(300)
|
||||
|
||||
expect(new Uint8Array(writer.getBuffer())).toEqual(
|
||||
new Uint8Array([
|
||||
0x7f, // 127
|
||||
0x80, 0x01, // 128
|
||||
0xac, 0x02, // 300
|
||||
])
|
||||
)
|
||||
})
|
||||
|
||||
it('appendVarBigInt encodes bigint varint', () => {
|
||||
const writer = new ByteWriter()
|
||||
writer.appendVarBigInt(127n)
|
||||
writer.appendVarBigInt(128n)
|
||||
writer.appendVarBigInt(300n)
|
||||
|
||||
expect(new Uint8Array(writer.getBuffer())).toEqual(
|
||||
new Uint8Array([
|
||||
0x7f, // 127
|
||||
0x80, 0x01, // 128
|
||||
0xac, 0x02, // 300
|
||||
])
|
||||
)
|
||||
})
|
||||
|
||||
it('expands buffer automatically when needed', () => {
|
||||
const writer = new ByteWriter()
|
||||
// force expansion by writing more than initial 1024 bytes
|
||||
const largeArray = new Uint8Array(2000).fill(0xaa)
|
||||
writer.appendBytes(largeArray)
|
||||
expect(writer.buffer.byteLength).toBeGreaterThanOrEqual(2000)
|
||||
expect(new Uint8Array(writer.getBuffer()).length).toBe(2000)
|
||||
})
|
||||
|
||||
it('finish does nothing but is callable', () => {
|
||||
const writer = new ByteWriter()
|
||||
writer.finish()
|
||||
expect(writer.getBuffer().byteLength).toBe(0)
|
||||
})
|
||||
})
|
||||
@ -1,5 +1,5 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { Writer } from '../src/writer.js'
|
||||
import { ByteWriter } from '../src/bytewriter.js'
|
||||
import { writeRleBitPackedHybrid } from '../src/encoding.js'
|
||||
import { readRleBitPackedHybrid } from 'hyparquet/src/encoding.js'
|
||||
|
||||
@ -13,7 +13,7 @@ function roundTripDeserialize(values) {
|
||||
const bitWidth = Math.ceil(Math.log2(Math.max(...values) + 1))
|
||||
|
||||
// Serialize the values using writeRleBitPackedHybrid
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
writeRleBitPackedHybrid(writer, values)
|
||||
const buffer = writer.getBuffer()
|
||||
const reader = { view: new DataView(buffer), offset: 0 }
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { parquetMetadata } from 'hyparquet'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { Writer } from '../src/writer.js'
|
||||
import { ByteWriter } from '../src/bytewriter.js'
|
||||
import { writeMetadata } from '../src/metadata.js'
|
||||
|
||||
/**
|
||||
@ -145,7 +145,7 @@ export const exampleMetadata = {
|
||||
|
||||
describe('writeMetadata', () => {
|
||||
it('writes metadata and parses in hyparquet', () => {
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
|
||||
// Write header PAR1
|
||||
writer.appendUint32(0x31524150)
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { Writer } from '../src/writer.js'
|
||||
import { ByteWriter } from '../src/bytewriter.js'
|
||||
import { writePlain } from '../src/plain.js'
|
||||
|
||||
describe('writePlain', () => {
|
||||
it('writes BOOLEAN (multiple of 8 bits, plus leftover)', () => {
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
const booleans = [true, false, true, true, false, false, false, true, true]
|
||||
writePlain(writer, booleans, 'BOOLEAN')
|
||||
|
||||
@ -14,7 +14,7 @@ describe('writePlain', () => {
|
||||
})
|
||||
|
||||
it('writes INT32', () => {
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
const ints = [0, 1, 255, 256, 65535, -1, -2147483648, 2147483647]
|
||||
writePlain(writer, ints, 'INT32')
|
||||
|
||||
@ -28,7 +28,7 @@ describe('writePlain', () => {
|
||||
})
|
||||
|
||||
it('writes INT64', () => {
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
const bigints = [0n, 1n, 42n, BigInt(2 ** 53 - 1)]
|
||||
writePlain(writer, bigints, 'INT64')
|
||||
|
||||
@ -42,7 +42,7 @@ describe('writePlain', () => {
|
||||
})
|
||||
|
||||
it('writes DOUBLE', () => {
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
const doubles = [0, 3.14, -2.71, Infinity, -Infinity, NaN]
|
||||
writePlain(writer, doubles, 'DOUBLE')
|
||||
|
||||
@ -60,7 +60,7 @@ describe('writePlain', () => {
|
||||
})
|
||||
|
||||
it('writes BYTE_ARRAY', () => {
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
const strings = ['a', 'b', 'c', 'd']
|
||||
// strings must be pre-converted to Uint8Array
|
||||
const encoder = new TextEncoder()
|
||||
@ -81,7 +81,7 @@ describe('writePlain', () => {
|
||||
})
|
||||
|
||||
it('throws error on unsupported type', () => {
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
expect(() => writePlain(writer, [1, 2, 3], 'INT96'))
|
||||
.toThrow(/parquet unsupported type/i)
|
||||
})
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { snappyCompress } from '../src/snappy.js'
|
||||
import { Writer } from '../src/writer.js'
|
||||
import { ByteWriter } from '../src/bytewriter.js'
|
||||
import { snappyUncompress } from 'hyparquet'
|
||||
|
||||
describe('snappy compress', () => {
|
||||
@ -42,7 +42,7 @@ describe('snappy compress', () => {
|
||||
uncompressed: new Uint8Array([5]),
|
||||
},
|
||||
])('compresses valid input %p', ({ compressed, uncompressed }) => {
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
const encoder = new TextEncoder()
|
||||
const input = typeof uncompressed === 'string' ? encoder.encode(uncompressed) : new Uint8Array(uncompressed)
|
||||
snappyCompress(writer, input)
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import { deserializeTCompactProtocol } from 'hyparquet/src/thrift.js'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { serializeTCompactProtocol } from '../src/thrift.js'
|
||||
import { Writer } from '../src/writer.js'
|
||||
import { ByteWriter } from '../src/bytewriter.js'
|
||||
|
||||
/**
|
||||
* Utility to decode a Thrift-serialized buffer and return the parsed object.
|
||||
@ -28,7 +28,7 @@ describe('serializeTCompactProtocol', () => {
|
||||
field_9: new TextEncoder().encode('Hello, Thrift!'),
|
||||
}
|
||||
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
serializeTCompactProtocol(writer, data)
|
||||
const buf = writer.buffer.slice(0, writer.offset)
|
||||
const result = roundTripDeserialize(buf)
|
||||
@ -59,7 +59,7 @@ describe('serializeTCompactProtocol', () => {
|
||||
field_2: [true, false, true, false],
|
||||
}
|
||||
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
serializeTCompactProtocol(writer, data)
|
||||
const buf = writer.buffer.slice(0, writer.offset)
|
||||
const result = roundTripDeserialize(buf)
|
||||
@ -72,7 +72,7 @@ describe('serializeTCompactProtocol', () => {
|
||||
|
||||
it('handles empty object (only STOP)', () => {
|
||||
const data = {}
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
serializeTCompactProtocol(writer, data)
|
||||
const buf = writer.buffer.slice(0, writer.offset)
|
||||
const arr = new Uint8Array(buf)
|
||||
@ -89,7 +89,7 @@ describe('serializeTCompactProtocol', () => {
|
||||
field_2: 2,
|
||||
field_1: 1, // field_1 is out of order (less than field_2)
|
||||
}
|
||||
const writer = new Writer()
|
||||
const writer = new ByteWriter()
|
||||
expect(() => serializeTCompactProtocol(writer, invalidData)).toThrow()
|
||||
})
|
||||
})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user