mirror of
https://github.com/asadbek064/hyparquet.git
synced 2026-01-05 10:56:38 +00:00
PageType enum to string
This commit is contained in:
parent
f826bff757
commit
86273b110c
@ -1,5 +1,4 @@
|
||||
import { assembleObjects } from './assemble.js'
|
||||
import { PageType } from './constants.js'
|
||||
import { convert } from './convert.js'
|
||||
import { readDataPage, readDictionaryPage } from './datapage.js'
|
||||
import { readDataPageV2 } from './datapageV2.js'
|
||||
@ -49,7 +48,7 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
)
|
||||
|
||||
// parse page data by type
|
||||
if (header.type === PageType.DATA_PAGE) {
|
||||
if (header.type === 'DATA_PAGE') {
|
||||
const daph = header.data_page_header
|
||||
if (!daph) throw new Error('parquet data page header is undefined')
|
||||
|
||||
@ -95,7 +94,7 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
// you need the total number of children, not the number of top-level values.
|
||||
|
||||
concat(rowData, values)
|
||||
} else if (header.type === PageType.DICTIONARY_PAGE) {
|
||||
} else if (header.type === 'DICTIONARY_PAGE') {
|
||||
const diph = header.dictionary_page_header
|
||||
if (!diph) throw new Error('parquet dictionary page header is undefined')
|
||||
|
||||
@ -103,7 +102,7 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
compressedBytes, Number(header.uncompressed_page_size), columnMetadata.codec, compressors
|
||||
)
|
||||
dictionary = readDictionaryPage(page, diph, schema, columnMetadata)
|
||||
} else if (header.type === PageType.DATA_PAGE_V2) {
|
||||
} else if (header.type === 'DATA_PAGE_V2') {
|
||||
const daph2 = header.data_page_header_v2
|
||||
if (!daph2) throw new Error('parquet data page header v2 is undefined')
|
||||
|
||||
@ -200,6 +199,7 @@ export function decompressPage(compressedBytes, uncompressed_page_size, codec, c
|
||||
|
||||
/**
|
||||
* Expand data page list with nulls and convert to utf8.
|
||||
*
|
||||
* @param {number[]} definitionLevels
|
||||
* @param {number} maxDefinitionLevel
|
||||
* @param {ArrayLike<any>} dataPage
|
||||
|
||||
@ -94,9 +94,13 @@ export const CompressionCodec = [
|
||||
'LZ4_RAW',
|
||||
]
|
||||
|
||||
export const PageType = {
|
||||
DATA_PAGE: 0,
|
||||
INDEX_PAGE: 1,
|
||||
DICTIONARY_PAGE: 2,
|
||||
DATA_PAGE_V2: 3,
|
||||
}
|
||||
/**
|
||||
* @typedef {import('./types.js').PageType} PageType
|
||||
* @type {PageType[]}
|
||||
*/
|
||||
export const PageType = [
|
||||
'DATA_PAGE',
|
||||
'INDEX_PAGE',
|
||||
'DICTIONARY_PAGE',
|
||||
'DATA_PAGE_V2',
|
||||
]
|
||||
|
||||
@ -1,26 +1,16 @@
|
||||
import { readData, readPlain, readRleBitPackedHybrid, widthFromMaxInt } from './encoding.js'
|
||||
import {
|
||||
getMaxDefinitionLevel,
|
||||
getMaxRepetitionLevel,
|
||||
isRequired,
|
||||
schemaElement,
|
||||
skipDefinitionBytes,
|
||||
} from './schema.js'
|
||||
import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired, schemaElement, skipDefinitionBytes } from './schema.js'
|
||||
|
||||
const skipNulls = false // TODO
|
||||
|
||||
/**
|
||||
* Read a data page from the given Uint8Array.
|
||||
*
|
||||
* @typedef {{ definitionLevels: number[], numNulls: number }} DefinitionLevels
|
||||
* @typedef {import("./types.d.ts").DataPage} DataPage
|
||||
* @typedef {import("./types.d.ts").ColumnMetaData} ColumnMetaData
|
||||
* @typedef {import("./types.d.ts").DataPageHeader} DataPageHeader
|
||||
* @typedef {import("./types.d.ts").DictionaryPageHeader} DictionaryPageHeader
|
||||
* @typedef {import("./types.d.ts").SchemaElement} SchemaElement
|
||||
*/
|
||||
|
||||
/**
|
||||
* Read a data page from the given Uint8Array.
|
||||
*
|
||||
* @param {Uint8Array} bytes raw page data (should already be decompressed)
|
||||
* @param {DataPageHeader} daph data page header
|
||||
* @param {SchemaElement[]} schema schema for the file
|
||||
@ -92,6 +82,7 @@ export function readDataPage(bytes, daph, schema, columnMetadata) {
|
||||
/**
|
||||
* Read a page containing dictionary data.
|
||||
*
|
||||
* @typedef {import("./types.d.ts").DictionaryPageHeader} DictionaryPageHeader
|
||||
* @param {Uint8Array} bytes raw page data
|
||||
* @param {DictionaryPageHeader} diph dictionary page header
|
||||
* @param {SchemaElement[]} schema schema for the file
|
||||
|
||||
@ -331,9 +331,6 @@ function readBitPacked(reader, header, bitWidth, remaining) {
|
||||
reader.offset++
|
||||
left += 8
|
||||
} else {
|
||||
// otherwise, read bitWidth number of bits
|
||||
// don't write more than remaining number of rows
|
||||
// even if there are still bits to read
|
||||
if (remaining > 0) {
|
||||
// emit value by shifting off to the right and masking
|
||||
value.push((data >> right) & mask)
|
||||
@ -344,7 +341,6 @@ function readBitPacked(reader, header, bitWidth, remaining) {
|
||||
}
|
||||
}
|
||||
|
||||
// return values and number of bytes read
|
||||
return value
|
||||
}
|
||||
|
||||
|
||||
@ -1,10 +1,7 @@
|
||||
import { Encoding } from './constants.js'
|
||||
import { Encoding, PageType } from './constants.js'
|
||||
import { deserializeTCompactProtocol } from './thrift.js'
|
||||
|
||||
/**
|
||||
* Return type with bytes read.
|
||||
* This is useful to advance an offset through a buffer.
|
||||
*
|
||||
* @typedef {import("./types.d.ts").Decoded<T>} Decoded
|
||||
* @template T
|
||||
*/
|
||||
@ -21,7 +18,7 @@ export function parquetHeader(arrayBuffer, offset) {
|
||||
const { value: header, byteLength } = deserializeTCompactProtocol(arrayBuffer, offset)
|
||||
|
||||
// Parse parquet header from thrift data
|
||||
const type = header.field_1
|
||||
const type = PageType[header.field_1]
|
||||
const uncompressed_page_size = header.field_2
|
||||
const compressed_page_size = header.field_3
|
||||
const crc = header.field_4
|
||||
@ -52,7 +49,7 @@ export function parquetHeader(arrayBuffer, offset) {
|
||||
encoding: Encoding[header.field_8.field_4],
|
||||
definition_levels_byte_length: header.field_8.field_5,
|
||||
repetition_levels_byte_length: header.field_8.field_6,
|
||||
is_compressed: header.field_8.field_7 === undefined ? true : header.field_8.field_7, // default to true
|
||||
is_compressed: header.field_8.field_7 === undefined ? true : header.field_8.field_7, // default true
|
||||
statistics: header.field_8.field_8,
|
||||
}
|
||||
|
||||
|
||||
@ -67,7 +67,6 @@ export async function parquetRead(options) {
|
||||
|
||||
/**
|
||||
* Read a row group from a file-like object.
|
||||
* Reads the minimal number of columns to satisfy the request.
|
||||
*
|
||||
* @typedef {import('./types.js').RowGroup} RowGroup
|
||||
* @param {object} options read options
|
||||
|
||||
11
src/types.d.ts
vendored
11
src/types.d.ts
vendored
@ -195,12 +195,11 @@ interface PageEncodingStats {
|
||||
count: number
|
||||
}
|
||||
|
||||
export enum PageType {
|
||||
DATA_PAGE = 0,
|
||||
INDEX_PAGE = 1,
|
||||
DICTIONARY_PAGE = 2,
|
||||
DATA_PAGE_V2 = 3,
|
||||
}
|
||||
export type PageType =
|
||||
'DATA_PAGE' |
|
||||
'INDEX_PAGE' |
|
||||
'DICTIONARY_PAGE' |
|
||||
'DATA_PAGE_V2'
|
||||
|
||||
interface SortingColumn {
|
||||
column_idx: number
|
||||
|
||||
Loading…
Reference in New Issue
Block a user