mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Option for key_value_metadata
This commit is contained in:
parent
07928e8eb7
commit
7be21caa9e
@ -4,7 +4,7 @@
|
||||
[](https://www.npmjs.com/package/hyparquet-writer)
|
||||
[](https://github.com/hyparam/hyparquet-writer/actions)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||

|
||||

|
||||
[](https://www.npmjs.com/package/hyparquet?activeTab=dependencies)
|
||||
|
||||
## Usage
|
||||
|
||||
@ -42,11 +42,11 @@
|
||||
"devDependencies": {
|
||||
"@babel/eslint-parser": "7.27.0",
|
||||
"@types/node": "22.13.14",
|
||||
"@vitest/coverage-v8": "3.0.9",
|
||||
"@vitest/coverage-v8": "3.1.1",
|
||||
"eslint": "9.23.0",
|
||||
"eslint-plugin-jsdoc": "50.6.9",
|
||||
"hyparquet": "1.10.1",
|
||||
"typescript": "5.8.2",
|
||||
"vitest": "3.0.9"
|
||||
"vitest": "3.1.1"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { ConvertedType, Encoding, FieldRepetitionType, ParquetType } from 'hyparquet/src/constants.js'
|
||||
import { ConvertedType, Encoding, FieldRepetitionType, PageType, ParquetType } from 'hyparquet/src/constants.js'
|
||||
import { serializeTCompactProtocol } from './thrift.js'
|
||||
|
||||
const CompressionCodec = [
|
||||
@ -51,10 +51,18 @@ export function writeMetadata(writer, metadata) {
|
||||
field_10: c.meta_data.index_page_offset,
|
||||
field_11: c.meta_data.dictionary_page_offset,
|
||||
field_12: c.meta_data.statistics,
|
||||
field_13: c.meta_data.encoding_stats,
|
||||
field_13: c.meta_data.encoding_stats && c.meta_data.encoding_stats.map(es => ({
|
||||
field_1: PageType.indexOf(es.page_type),
|
||||
field_2: Encoding.indexOf(es.encoding),
|
||||
field_3: es.count,
|
||||
})),
|
||||
field_14: c.meta_data.bloom_filter_offset,
|
||||
field_15: c.meta_data.bloom_filter_length,
|
||||
field_16: c.meta_data.size_statistics,
|
||||
field_16: c.meta_data.size_statistics && {
|
||||
field_1: c.meta_data.size_statistics.unencoded_byte_array_data_bytes,
|
||||
field_2: c.meta_data.size_statistics.repetition_level_histogram,
|
||||
field_3: c.meta_data.size_statistics.definition_level_histogram,
|
||||
},
|
||||
},
|
||||
field_4: c.offset_index_offset,
|
||||
field_5: c.offset_index_length,
|
||||
@ -65,12 +73,19 @@ export function writeMetadata(writer, metadata) {
|
||||
})),
|
||||
field_2: rg.total_byte_size,
|
||||
field_3: rg.num_rows,
|
||||
field_4: rg.sorting_columns,
|
||||
field_4: rg.sorting_columns && rg.sorting_columns.map(sc => ({
|
||||
field_1: sc.column_idx,
|
||||
field_2: sc.descending,
|
||||
field_3: sc.nulls_first,
|
||||
})),
|
||||
field_5: rg.file_offset,
|
||||
field_6: rg.total_compressed_size,
|
||||
field_7: rg.ordinal,
|
||||
})),
|
||||
field_5: metadata.key_value_metadata,
|
||||
field_5: metadata.key_value_metadata && metadata.key_value_metadata.map(kv => ({
|
||||
field_1: kv.key,
|
||||
field_2: kv.value,
|
||||
})),
|
||||
field_6: metadata.created_by,
|
||||
}
|
||||
|
||||
|
||||
5
src/types.d.ts
vendored
5
src/types.d.ts
vendored
@ -6,6 +6,11 @@ export interface ColumnData {
|
||||
type?: ParquetType
|
||||
}
|
||||
|
||||
export interface KeyValue {
|
||||
key: string
|
||||
value?: string
|
||||
}
|
||||
|
||||
export interface Writer {
|
||||
buffer: ArrayBuffer
|
||||
offset: number
|
||||
|
||||
@ -7,13 +7,14 @@ import { getSchemaElementForValues } from './schema.js'
|
||||
* Write data as parquet to an ArrayBuffer
|
||||
*
|
||||
* @import {ColumnChunk, DecodedArray, FileMetaData, SchemaElement, SchemaTree} from 'hyparquet'
|
||||
* @import {ColumnData} from '../src/types.js'
|
||||
* @import {ColumnData, KeyValue} from '../src/types.js'
|
||||
* @param {object} options
|
||||
* @param {ColumnData[]} options.columnData
|
||||
* @param {boolean} [options.compressed]
|
||||
* @param {KeyValue[]} [options.kvMetadata]
|
||||
* @returns {ArrayBuffer}
|
||||
*/
|
||||
export function parquetWrite({ columnData, compressed = true }) {
|
||||
export function parquetWrite({ columnData, compressed = true, kvMetadata }) {
|
||||
const writer = new Writer()
|
||||
|
||||
// Check if all columns have the same length
|
||||
@ -73,6 +74,7 @@ export function parquetWrite({ columnData, compressed = true }) {
|
||||
num_rows,
|
||||
}],
|
||||
metadata_length: 0,
|
||||
key_value_metadata: kvMetadata,
|
||||
}
|
||||
// @ts-ignore don't want to actually serialize metadata_length
|
||||
delete metadata.metadata_length
|
||||
|
||||
@ -121,8 +121,15 @@ describe('writeMetadata', () => {
|
||||
writer.appendUint32(0x31524150)
|
||||
|
||||
// Write metadata
|
||||
/** @type {FileMetaData} */
|
||||
writeMetadata(writer, exampleMetadata)
|
||||
const withKvMetadata = {
|
||||
...exampleMetadata,
|
||||
key_value_metadata: [
|
||||
{ key: 'key1', value: 'value1' },
|
||||
{ key: 'key2', value: 'value2' },
|
||||
],
|
||||
metadata_length: 370,
|
||||
}
|
||||
writeMetadata(writer, withKvMetadata)
|
||||
|
||||
// Write footer PAR1
|
||||
writer.appendUint32(0x31524150)
|
||||
@ -131,7 +138,7 @@ describe('writeMetadata', () => {
|
||||
const output = parquetMetadata(file)
|
||||
|
||||
/** @type {FileMetaData} */
|
||||
expect(output).toEqual(exampleMetadata)
|
||||
expect(output).toEqual(withKvMetadata)
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user