2025-04-07 08:02:21 +00:00
|
|
|
import { ParquetWriter } from './parquet-writer.js'
|
2025-04-08 06:14:48 +00:00
|
|
|
import { schemaFromColumnData } from './schema.js'
|
|
|
|
|
import { ByteWriter } from './bytewriter.js'
|
2025-03-26 04:06:43 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Write data as parquet to an ArrayBuffer
|
|
|
|
|
*
|
2025-04-01 06:32:14 +00:00
|
|
|
* @import {KeyValue} from 'hyparquet/src/types.js'
|
|
|
|
|
* @import {ColumnData} from '../src/types.js'
|
2025-03-27 07:27:22 +00:00
|
|
|
* @param {object} options
|
|
|
|
|
* @param {ColumnData[]} options.columnData
|
|
|
|
|
* @param {boolean} [options.compressed]
|
2025-04-03 20:21:57 +00:00
|
|
|
* @param {boolean} [options.statistics]
|
2025-04-03 07:42:54 +00:00
|
|
|
* @param {number} [options.rowGroupSize]
|
2025-03-31 20:42:57 +00:00
|
|
|
* @param {KeyValue[]} [options.kvMetadata]
|
2025-03-26 04:06:43 +00:00
|
|
|
* @returns {ArrayBuffer}
|
|
|
|
|
*/
|
2025-04-03 20:21:57 +00:00
|
|
|
export function parquetWrite({ columnData, compressed = true, statistics = true, rowGroupSize = 100000, kvMetadata }) {
|
2025-04-07 08:02:21 +00:00
|
|
|
const schema = schemaFromColumnData(columnData)
|
2025-04-08 06:14:48 +00:00
|
|
|
const writer = new ByteWriter()
|
|
|
|
|
const pq = new ParquetWriter({
|
|
|
|
|
writer,
|
2025-04-07 08:02:21 +00:00
|
|
|
schema,
|
|
|
|
|
compressed,
|
|
|
|
|
statistics,
|
|
|
|
|
kvMetadata,
|
|
|
|
|
})
|
2025-04-08 06:14:48 +00:00
|
|
|
pq.write({
|
2025-04-07 08:02:21 +00:00
|
|
|
columnData,
|
|
|
|
|
rowGroupSize,
|
|
|
|
|
})
|
2025-04-08 06:14:48 +00:00
|
|
|
pq.finish()
|
|
|
|
|
return writer.getBuffer()
|
2025-03-26 04:06:43 +00:00
|
|
|
}
|