hyparquet/src/types.ts

145 lines
2.6 KiB
TypeScript
Raw Normal View History

2024-01-03 17:56:17 +00:00
/**
* Represents a decoded value, and includes the number of bytes read.
* This is used to read data from the file and advance a virtual file pointer.
*/
export interface Decoded<T> {
value: T
byteLength: number
}
// Parquet file metadata types
export interface FileMetaData {
version: number
schema: SchemaElement[]
num_rows: number
row_groups: RowGroup[]
key_value_metadata?: KeyValue[]
created_by?: string
}
export interface SchemaElement {
type?: ParquetType
type_length?: number
repetition_type?: FieldRepetitionType
name: string
num_children?: number
converted_type?: ConvertedType
scale?: number
precision?: number
field_id?: number
}
export enum ParquetType {
BOOLEAN = 0,
INT32 = 1,
INT64 = 2,
INT96 = 3,
FLOAT = 4,
DOUBLE = 5,
BYTE_ARRAY = 6,
FIXED_LEN_BYTE_ARRAY = 7,
}
export enum FieldRepetitionType {
REQUIRED = 0,
OPTIONAL = 1,
REPEATED = 2,
}
enum ConvertedType {
UTF8 = 0,
MAP = 1,
MAP_KEY_VALUE = 2,
LIST = 3,
ENUM = 4,
DECIMAL = 5,
DATE = 6,
TIME_MILLIS = 7,
TIME_MICROS = 8,
TIMESTAMP_MILLIS = 9,
TIMESTAMP_MICROS = 10,
}
export interface RowGroup {
columns: ColumnChunk[]
total_byte_size: number
num_rows: number
sorting_columns?: SortingColumn[]
}
export interface ColumnChunk {
file_path?: string
file_offset: number
meta_data?: ColumnMetaData
}
export interface ColumnMetaData {
type: ParquetType
encodings: Encoding[]
path_in_schema: string[]
codec: CompressionCodec
num_values: number
total_uncompressed_size: number
total_compressed_size: number
key_value_metadata?: KeyValue[]
data_page_offset: number
index_page_offset?: number
dictionary_page_offset?: number
statistics?: Statistics
encoding_stats?: PageEncodingStats[]
}
export enum Encoding {
PLAIN = 0,
PLAIN_DICTIONARY = 2,
RLE = 3,
BIT_PACKED = 4,
DELTA_BINARY_PACKED = 5,
DELTA_LENGTH_BYTE_ARRAY = 6,
DELTA_BYTE_ARRAY = 7,
RLE_DICTIONARY = 8,
BYTE_STREAM_SPLIT = 9,
}
export enum CompressionCodec {
UNCOMPRESSED = 0,
SNAPPY = 1,
GZIP = 2,
LZO = 3,
BROTLI = 4,
LZ4 = 5,
ZSTD = 6,
LZ4_RAW = 7,
}
interface KeyValue {
key: string
value?: string
}
export interface Statistics {
max?: Uint8Array // binary representation
min?: Uint8Array // binary representation
null_count?: number
distinct_count?: number
}
interface PageEncodingStats {
page_type: PageType
encoding: Encoding
count: number
}
export enum PageType {
DATA_PAGE = 0,
INDEX_PAGE = 1,
DICTIONARY_PAGE = 2,
DATA_PAGE_V2 = 3,
}
interface SortingColumn {
column_idx: number
descending: boolean
nulls_first: boolean
}