diff --git a/src/column.js b/src/column.js index 52a66f9..008fedc 100644 --- a/src/column.js +++ b/src/column.js @@ -1,5 +1,5 @@ import { assembleLists } from './assemble.js' -import { Encoding, PageType } from './constants.js' +import { Encodings, PageTypes } from './constants.js' import { convert, convertWithDictionary } from './convert.js' import { decompressPage, readDataPage, readDataPageV2 } from './datapage.js' import { readPlain } from './plain.js' @@ -156,15 +156,15 @@ function parquetHeader(reader) { const header = deserializeTCompactProtocol(reader) // Parse parquet header from thrift data - const type = PageType[header.field_1] + const type = PageTypes[header.field_1] const uncompressed_page_size = header.field_2 const compressed_page_size = header.field_3 const crc = header.field_4 const data_page_header = header.field_5 && { num_values: header.field_5.field_1, - encoding: Encoding[header.field_5.field_2], - definition_level_encoding: Encoding[header.field_5.field_3], - repetition_level_encoding: Encoding[header.field_5.field_4], + encoding: Encodings[header.field_5.field_2], + definition_level_encoding: Encodings[header.field_5.field_3], + repetition_level_encoding: Encodings[header.field_5.field_4], statistics: header.field_5.field_5 && { max: header.field_5.field_5.field_1, min: header.field_5.field_5.field_2, @@ -177,14 +177,14 @@ function parquetHeader(reader) { const index_page_header = header.field_6 const dictionary_page_header = header.field_7 && { num_values: header.field_7.field_1, - encoding: Encoding[header.field_7.field_2], + encoding: Encodings[header.field_7.field_2], is_sorted: header.field_7.field_3, } const data_page_header_v2 = header.field_8 && { num_values: header.field_8.field_1, num_nulls: header.field_8.field_2, num_rows: header.field_8.field_3, - encoding: Encoding[header.field_8.field_4], + encoding: Encodings[header.field_8.field_4], definition_levels_byte_length: header.field_8.field_5, repetition_levels_byte_length: header.field_8.field_6, is_compressed: header.field_8.field_7 === undefined ? true : header.field_8.field_7, // default true diff --git a/src/constants.js b/src/constants.js index c926860..4c1f9fc 100644 --- a/src/constants.js +++ b/src/constants.js @@ -1,5 +1,6 @@ + /** @type {import('../src/types.d.ts').ParquetType[]} */ -export const ParquetType = [ +export const ParquetTypes = [ 'BOOLEAN', 'INT32', 'INT64', @@ -11,7 +12,7 @@ export const ParquetType = [ ] /** @type {import('../src/types.d.ts').Encoding[]} */ -export const Encoding = [ +export const Encodings = [ 'PLAIN', 'GROUP_VAR_INT', // deprecated 'PLAIN_DICTIONARY', @@ -25,14 +26,14 @@ export const Encoding = [ ] /** @type {import('../src/types.d.ts').FieldRepetitionType[]} */ -export const FieldRepetitionType = [ +export const FieldRepetitionTypes = [ 'REQUIRED', 'OPTIONAL', 'REPEATED', ] /** @type {import('../src/types.d.ts').ConvertedType[]} */ -export const ConvertedType = [ +export const ConvertedTypes = [ 'UTF8', 'MAP', 'MAP_KEY_VALUE', @@ -58,7 +59,7 @@ export const ConvertedType = [ ] /** @type {import('../src/types.d.ts').CompressionCodec[]} */ -export const CompressionCodec = [ +export const CompressionCodecs = [ 'UNCOMPRESSED', 'SNAPPY', 'GZIP', @@ -70,7 +71,7 @@ export const CompressionCodec = [ ] /** @type {import('../src/types.d.ts').PageType[]} */ -export const PageType = [ +export const PageTypes = [ 'DATA_PAGE', 'INDEX_PAGE', 'DICTIONARY_PAGE', @@ -78,14 +79,14 @@ export const PageType = [ ] /** @type {import('../src/types.d.ts').BoundaryOrder[]} */ -export const BoundaryOrder = [ +export const BoundaryOrders = [ 'UNORDERED', 'ASCENDING', 'DESCENDING', ] /** @type {import('../src/types.d.ts').EdgeInterpolationAlgorithm[]} */ -export const EdgeInterpolationAlgorithm = [ +export const EdgeInterpolationAlgorithms = [ 'SPHERICAL', 'VINCENTY', 'THOMAS', diff --git a/src/indexes.js b/src/indexes.js index e6f0933..c49647e 100644 --- a/src/indexes.js +++ b/src/indexes.js @@ -1,4 +1,4 @@ -import { BoundaryOrder } from './constants.js' +import { BoundaryOrders } from './constants.js' import { DEFAULT_PARSERS } from './convert.js' import { convertMetadata } from './metadata.js' import { deserializeTCompactProtocol } from './thrift.js' @@ -17,7 +17,7 @@ export function readColumnIndex(reader, schema, parsers = undefined) { null_pages: thrift.field_1, min_values: thrift.field_2.map((/** @type {any} */ m) => convertMetadata(m, schema, parsers)), max_values: thrift.field_3.map((/** @type {any} */ m) => convertMetadata(m, schema, parsers)), - boundary_order: BoundaryOrder[thrift.field_4], + boundary_order: BoundaryOrders[thrift.field_4], null_counts: thrift.field_5, repetition_level_histograms: thrift.field_6, definition_level_histograms: thrift.field_7, diff --git a/src/metadata.js b/src/metadata.js index 5e0d1ea..2f39852 100644 --- a/src/metadata.js +++ b/src/metadata.js @@ -1,4 +1,4 @@ -import { CompressionCodec, ConvertedType, EdgeInterpolationAlgorithm, Encoding, FieldRepetitionType, PageType, ParquetType } from './constants.js' +import { CompressionCodecs, ConvertedTypes, EdgeInterpolationAlgorithms, Encodings, FieldRepetitionTypes, PageTypes, ParquetTypes } from './constants.js' import { DEFAULT_PARSERS, parseDecimal, parseFloat16 } from './convert.js' import { getSchemaPath } from './schema.js' import { deserializeTCompactProtocol } from './thrift.js' @@ -112,12 +112,12 @@ export function parquetMetadata(arrayBuffer, { parsers, geoparquet = true } = {} const version = metadata.field_1 /** @type {SchemaElement[]} */ const schema = metadata.field_2.map((/** @type {any} */ field) => ({ - type: ParquetType[field.field_1], + type: ParquetTypes[field.field_1], type_length: field.field_2, - repetition_type: FieldRepetitionType[field.field_3], + repetition_type: FieldRepetitionTypes[field.field_3], name: decode(field.field_4), num_children: field.field_5, - converted_type: ConvertedType[field.field_6], + converted_type: ConvertedTypes[field.field_6], scale: field.field_7, precision: field.field_8, field_id: field.field_9, @@ -131,10 +131,10 @@ export function parquetMetadata(arrayBuffer, { parsers, geoparquet = true } = {} file_path: decode(column.field_1), file_offset: column.field_2, meta_data: column.field_3 && { - type: ParquetType[column.field_3.field_1], - encodings: column.field_3.field_2?.map((/** @type {number} */ e) => Encoding[e]), + type: ParquetTypes[column.field_3.field_1], + encodings: column.field_3.field_2?.map((/** @type {number} */ e) => Encodings[e]), path_in_schema: column.field_3.field_3.map(decode), - codec: CompressionCodec[column.field_3.field_4], + codec: CompressionCodecs[column.field_3.field_4], num_values: column.field_3.field_5, total_uncompressed_size: column.field_3.field_6, total_compressed_size: column.field_3.field_7, @@ -147,8 +147,8 @@ export function parquetMetadata(arrayBuffer, { parsers, geoparquet = true } = {} dictionary_page_offset: column.field_3.field_11, statistics: convertStats(column.field_3.field_12, columnSchema[columnIndex], parsers), encoding_stats: column.field_3.field_13?.map((/** @type {any} */ encodingStat) => ({ - page_type: PageType[encodingStat.field_1], - encoding: Encoding[encodingStat.field_2], + page_type: PageTypes[encodingStat.field_1], + encoding: Encodings[encodingStat.field_2], count: encodingStat.field_3, })), bloom_filter_offset: column.field_3.field_14, @@ -268,7 +268,7 @@ function logicalType(logicalType) { if (logicalType?.field_18) return { type: 'GEOGRAPHY', crs: decode(logicalType.field_18.field_1), - algorithm: EdgeInterpolationAlgorithm[logicalType.field_18.field_2], + algorithm: EdgeInterpolationAlgorithms[logicalType.field_18.field_2], } return logicalType }