mirror of
https://github.com/asadbek064/hyparquet.git
synced 2026-01-05 10:56:38 +00:00
Parse logical types from metadata
This commit is contained in:
parent
595c87a82b
commit
8a98407734
@ -1,3 +1,7 @@
|
||||
/**
|
||||
* @typedef {import('./types.js').ParquetType} ParquetTypeType
|
||||
* @type {ParquetTypeType[]}
|
||||
*/
|
||||
export const ParquetType = [
|
||||
'BOOLEAN',
|
||||
'INT32',
|
||||
@ -28,6 +32,10 @@ export const FieldRepetitionType = [
|
||||
'REPEATED',
|
||||
]
|
||||
|
||||
/**
|
||||
* @typedef {import('./types.js').ConvertedType} ConvertedTypeType
|
||||
* @type {ConvertedTypeType[]}
|
||||
*/
|
||||
export const ConvertedType = [
|
||||
'UTF8',
|
||||
'MAP',
|
||||
@ -53,6 +61,28 @@ export const ConvertedType = [
|
||||
'INTERVAL',
|
||||
]
|
||||
|
||||
/**
|
||||
* @typedef {import('./types.js').LogicalTypeType} LogicalTypeType
|
||||
* @type {LogicalTypeType[]}
|
||||
*/
|
||||
export const logicalTypeType = [
|
||||
'NULL',
|
||||
'STRING',
|
||||
'MAP',
|
||||
'LIST',
|
||||
'ENUM',
|
||||
'DECIMAL',
|
||||
'DATE',
|
||||
'TIME',
|
||||
'TIMESTAMP',
|
||||
'INTERVAL',
|
||||
'INTEGER',
|
||||
'NULL',
|
||||
'JSON',
|
||||
'BSON',
|
||||
'UUID',
|
||||
]
|
||||
|
||||
export const CompressionCodec = [
|
||||
'UNCOMPRESSED',
|
||||
'SNAPPY',
|
||||
|
||||
@ -105,6 +105,7 @@ export function parquetMetadata(arrayBuffer) {
|
||||
scale: field.field_7,
|
||||
precision: field.field_8,
|
||||
field_id: field.field_9,
|
||||
logical_type: logicalType(field.field_10),
|
||||
}))
|
||||
const num_rows = metadata.field_3
|
||||
const row_groups = metadata.field_4.map((/** @type {any} */ rowGroup) => ({
|
||||
@ -171,3 +172,32 @@ export function parquetMetadata(arrayBuffer) {
|
||||
export function parquetSchema(metadata) {
|
||||
return schemaTree(metadata.schema, 0)
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse logical type by type.
|
||||
*
|
||||
* @typedef {import("./types.d.ts").LogicalType} LogicalType
|
||||
* @param {any} logicalType
|
||||
* @returns {LogicalType | undefined}
|
||||
*/
|
||||
function logicalType(logicalType) {
|
||||
if (logicalType?.field_5) {
|
||||
return {
|
||||
logicalType: 'DECIMAL',
|
||||
scale: logicalType.field_5.field_1,
|
||||
precision: logicalType.field_5.field_2,
|
||||
}
|
||||
}
|
||||
// TODO: TimestampType
|
||||
// TOFO: TimeType
|
||||
if (logicalType?.field_10) {
|
||||
return {
|
||||
logicalType: 'INTEGER',
|
||||
bitWidth: logicalType.field_10.field_1,
|
||||
isSigned: logicalType.field_10.field_2,
|
||||
}
|
||||
}
|
||||
if (logicalType) {
|
||||
return logicalType
|
||||
}
|
||||
}
|
||||
|
||||
35
src/types.d.ts
vendored
35
src/types.d.ts
vendored
@ -42,6 +42,7 @@ export interface SchemaElement {
|
||||
scale?: number
|
||||
precision?: number
|
||||
field_id?: number
|
||||
logicalType?: LogicalType
|
||||
}
|
||||
|
||||
export type ParquetType =
|
||||
@ -83,6 +84,40 @@ export type ConvertedType =
|
||||
'BSON' |
|
||||
'INTERVAL'
|
||||
|
||||
type LogicalDecimalType = {
|
||||
logicalType: 'DECIMAL'
|
||||
precision: number
|
||||
scale: number
|
||||
}
|
||||
|
||||
type LogicalIntType = {
|
||||
logicalType: 'INTEGER'
|
||||
bitWidth: number
|
||||
isSigned: boolean
|
||||
}
|
||||
|
||||
export type LogicalType =
|
||||
{ logicalType: LogicalTypeType } |
|
||||
LogicalDecimalType |
|
||||
LogicalIntType
|
||||
|
||||
export type LogicalTypeType =
|
||||
'STRING' | // convertedType UTF8
|
||||
'MAP' | // convertedType MAP
|
||||
'LIST' | // convertedType LIST
|
||||
'ENUM' | // convertedType ENUM
|
||||
'DECIMAL' | // convertedType DECIMAL + precision/scale
|
||||
'DATE' | // convertedType DATE
|
||||
'TIME' | // convertedType TIME_MILLIS or TIME_MICROS
|
||||
'TIMESTAMP' | // convertedType TIMESTAMP_MILLIS or TIMESTAMP_MICROS
|
||||
'INTEGER' | // convertedType INT or UINT
|
||||
'INTERVAL' | // convertedType INT or UINT
|
||||
'NULL' | // no convertedType
|
||||
'JSON' | // convertedType JSON
|
||||
'BSON' | // convertedType BSON
|
||||
'UUID' | // no convertedType
|
||||
'FLOAT16' // no convertedType
|
||||
|
||||
export interface RowGroup {
|
||||
columns: ColumnChunk[]
|
||||
total_byte_size: number
|
||||
|
||||
@ -36,6 +36,11 @@
|
||||
},
|
||||
{
|
||||
"converted_type": "UINT_64",
|
||||
"logical_type": {
|
||||
"logicalType": "INTEGER",
|
||||
"bitWidth": 64,
|
||||
"isSigned": false
|
||||
},
|
||||
"name": "long_col",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"type": "INT64"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user