mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-06 06:51:54 +00:00
Return Uint8Array from thrift
This commit is contained in:
parent
f86c8c6359
commit
eabf62f5a1
@ -96,6 +96,10 @@ export function parquetMetadata(arrayBuffer) {
|
||||
const metadataOffset = metadataLengthOffset - metadataLength
|
||||
const reader = { view, offset: metadataOffset }
|
||||
const metadata = deserializeTCompactProtocol(reader)
|
||||
const decoder = new TextDecoder()
|
||||
function decode(/** @type {Uint8Array} */ value) {
|
||||
return value && decoder.decode(value)
|
||||
}
|
||||
|
||||
// Parse metadata from thrift data
|
||||
const version = metadata.field_1
|
||||
@ -103,7 +107,7 @@ export function parquetMetadata(arrayBuffer) {
|
||||
type: ParquetType[field.field_1],
|
||||
type_length: field.field_2,
|
||||
repetition_type: FieldRepetitionType[field.field_3],
|
||||
name: field.field_4,
|
||||
name: decode(field.field_4),
|
||||
num_children: field.field_5,
|
||||
converted_type: ConvertedType[field.field_6],
|
||||
scale: field.field_7,
|
||||
@ -114,12 +118,12 @@ export function parquetMetadata(arrayBuffer) {
|
||||
const num_rows = metadata.field_3
|
||||
const row_groups = metadata.field_4.map((/** @type {any} */ rowGroup) => ({
|
||||
columns: rowGroup.field_1.map((/** @type {any} */ column) => ({
|
||||
file_path: column.field_1,
|
||||
file_path: decode(column.field_1),
|
||||
file_offset: column.field_2,
|
||||
meta_data: column.field_3 && {
|
||||
type: ParquetType[column.field_3.field_1],
|
||||
encodings: column.field_3.field_2?.map((/** @type {number} */ e) => Encoding[e]),
|
||||
path_in_schema: column.field_3.field_3,
|
||||
path_in_schema: column.field_3.field_3.map(decode),
|
||||
codec: CompressionCodec[column.field_3.field_4],
|
||||
num_values: column.field_3.field_5,
|
||||
total_uncompressed_size: column.field_3.field_6,
|
||||
@ -129,8 +133,8 @@ export function parquetMetadata(arrayBuffer) {
|
||||
index_page_offset: column.field_3.field_10,
|
||||
dictionary_page_offset: column.field_3.field_11,
|
||||
statistics: column.field_3.field_12 && {
|
||||
max: column.field_3.field_12.field_1,
|
||||
min: column.field_3.field_12.field_2,
|
||||
max: decode(column.field_3.field_12.field_1),
|
||||
min: decode(column.field_3.field_12.field_2),
|
||||
null_count: column.field_3.field_12.field_3,
|
||||
distinct_count: column.field_3.field_12.field_4,
|
||||
},
|
||||
@ -150,10 +154,10 @@ export function parquetMetadata(arrayBuffer) {
|
||||
})),
|
||||
}))
|
||||
const key_value_metadata = metadata.field_5?.map((/** @type {any} */ keyValue) => ({
|
||||
key: keyValue.field_1,
|
||||
value: keyValue.field_2,
|
||||
key: decode(keyValue.field_1),
|
||||
value: decode(keyValue.field_2),
|
||||
}))
|
||||
const created_by = metadata.field_6
|
||||
const created_by = decode(metadata.field_6)
|
||||
|
||||
return {
|
||||
version,
|
||||
@ -192,7 +196,7 @@ function logicalType(logicalType) {
|
||||
}
|
||||
}
|
||||
// TODO: TimestampType
|
||||
// TOFO: TimeType
|
||||
// TODO: TimeType
|
||||
if (logicalType?.field_10) {
|
||||
return {
|
||||
logicalType: 'INTEGER',
|
||||
|
||||
@ -70,11 +70,10 @@ function readElement(reader, type) {
|
||||
return value
|
||||
}
|
||||
case CompactType.BINARY: {
|
||||
// strings are encoded as utf-8, no \0 delimiter
|
||||
const stringLength = readVarInt(reader)
|
||||
const strBytes = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, stringLength)
|
||||
reader.offset += stringLength
|
||||
return new TextDecoder().decode(strBytes)
|
||||
return strBytes
|
||||
}
|
||||
case CompactType.LIST: {
|
||||
const [elemType, listSize] = readCollectionBegin(reader)
|
||||
|
||||
@ -10,6 +10,7 @@ export function toJson(obj) {
|
||||
if (obj === undefined) return null
|
||||
if (typeof obj === 'bigint') return Number(obj)
|
||||
if (Array.isArray(obj)) return obj.map(toJson)
|
||||
if (obj instanceof Uint8Array) return Array.from(obj)
|
||||
if (obj instanceof Object) {
|
||||
/** @type {Record<string, unknown>} */
|
||||
const newObj = {}
|
||||
|
||||
@ -75,7 +75,7 @@ describe('deserializeTCompactProtocol function', () => {
|
||||
expect(value.field_5).toBe(0x7fffffff) // I32
|
||||
expect(value.field_6).toBe(BigInt('0x7fffffffffffffff')) // I64
|
||||
expect(value.field_7).toBeCloseTo(123.456) // DOUBLE
|
||||
expect(value.field_8).toBe('Hello, Thrift!') // STRING
|
||||
expect(new TextDecoder().decode(value.field_8)).toBe('Hello, Thrift!') // STRING
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user