/** * File-like object that can read slices of a file asynchronously. */ export interface AsyncBuffer { byteLength: number slice(start: number, end?: number): Promise } /** * Just like an ArrayBuffer, but an interface */ export interface ArrayBufferLike { byteLength: number slice(start: number, end?: number): ArrayBuffer } /** * Represents a decoded value, and includes the number of bytes read. * This is used to read data from the file and advance a virtual file pointer. */ export interface Decoded { value: T byteLength: number } // Parquet file metadata types export interface FileMetaData { version: number schema: SchemaElement[] num_rows: number row_groups: RowGroup[] key_value_metadata?: KeyValue[] created_by?: string metadata_length: number } export interface SchemaTree { element: SchemaElement children: SchemaTree[] count: number } export interface SchemaElement { type?: ParquetType type_length?: number repetition_type?: FieldRepetitionType name: string num_children?: number converted_type?: ConvertedType scale?: number precision?: number field_id?: number } export enum ParquetType { BOOLEAN = 0, INT32 = 1, INT64 = 2, INT96 = 3, // deprecated FLOAT = 4, DOUBLE = 5, BYTE_ARRAY = 6, FIXED_LEN_BYTE_ARRAY = 7, } export enum FieldRepetitionType { REQUIRED = 0, OPTIONAL = 1, REPEATED = 2, } export enum ConvertedType { UTF8 = 0, MAP = 1, MAP_KEY_VALUE = 2, LIST = 3, ENUM = 4, DECIMAL = 5, DATE = 6, TIME_MILLIS = 7, TIME_MICROS = 8, TIMESTAMP_MILLIS = 9, TIMESTAMP_MICROS = 10, UINT_8 = 11, UINT_16 = 12, UINT_32 = 13, UINT_64 = 14, INT_8 = 15, INT_16 = 16, INT_32 = 17, INT_64 = 18, JSON = 19, BSON = 20, INTERVAL = 21, } export interface RowGroup { columns: ColumnChunk[] total_byte_size: number num_rows: number sorting_columns?: SortingColumn[] } export interface ColumnChunk { file_path?: string file_offset: number meta_data?: ColumnMetaData } export interface ColumnMetaData { type: ParquetType encodings: Encoding[] path_in_schema: string[] codec: CompressionCodec num_values: number total_uncompressed_size: number total_compressed_size: number key_value_metadata?: KeyValue[] data_page_offset: number index_page_offset?: number dictionary_page_offset?: number statistics?: Statistics encoding_stats?: PageEncodingStats[] } export enum Encoding { PLAIN = 0, PLAIN_DICTIONARY = 2, RLE = 3, BIT_PACKED = 4, // deprecated DELTA_BINARY_PACKED = 5, DELTA_LENGTH_BYTE_ARRAY = 6, DELTA_BYTE_ARRAY = 7, RLE_DICTIONARY = 8, BYTE_STREAM_SPLIT = 9, } export enum CompressionCodec { UNCOMPRESSED = 0, SNAPPY = 1, GZIP = 2, LZO = 3, BROTLI = 4, LZ4 = 5, ZSTD = 6, LZ4_RAW = 7, } interface KeyValue { key: string value?: string } export interface Statistics { max?: Uint8Array // binary representation min?: Uint8Array // binary representation null_count?: number distinct_count?: number } interface PageEncodingStats { page_type: PageType encoding: Encoding count: number } export enum PageType { DATA_PAGE = 0, INDEX_PAGE = 1, DICTIONARY_PAGE = 2, DATA_PAGE_V2 = 3, } interface SortingColumn { column_idx: number descending: boolean nulls_first: boolean } // Parquet file header types export interface PageHeader { type: PageType uncompressed_page_size: number compressed_page_size: number crc?: number data_page_header?: DataPageHeader index_page_header?: IndexPageHeader dictionary_page_header?: DictionaryPageHeader data_page_header_v2?: DataPageHeaderV2 } export interface DataPageHeader { num_values: number encoding: Encoding definition_level_encoding: Encoding repetition_level_encoding: Encoding statistics?: Statistics } interface IndexPageHeader {} export interface DictionaryPageHeader { num_values: number encoding: Encoding is_sorted?: boolean } interface DataPageHeaderV2 { num_values: number num_nulls: number num_rows: number encoding: Encoding definition_levels_byte_length: number repetition_levels_byte_length: number is_compressed?: boolean statistics?: Statistics }