From 2f00330527cd1a6411bf799202db8a67b89d5105 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Sat, 27 Sep 2025 16:31:16 -0700 Subject: [PATCH] Parse geospatial_statistics (#130) --- package.json | 2 +- src/metadata.js | 24 ++++++- src/types.d.ts | 187 +++++++++++++++++++++--------------------------- 3 files changed, 103 insertions(+), 110 deletions(-) diff --git a/package.json b/package.json index 3c0c55c..89afce1 100644 --- a/package.json +++ b/package.json @@ -57,7 +57,7 @@ "@types/node": "24.5.2", "@vitest/coverage-v8": "3.2.4", "eslint": "9.36.0", - "eslint-plugin-jsdoc": "60.4.0", + "eslint-plugin-jsdoc": "60.4.1", "hyparquet-compressors": "1.1.1", "typescript": "5.9.2", "vitest": "3.2.4" diff --git a/src/metadata.js b/src/metadata.js index 98c4e47..e4e4900 100644 --- a/src/metadata.js +++ b/src/metadata.js @@ -152,6 +152,19 @@ export function parquetMetadata(arrayBuffer, { parsers } = {}) { repetition_level_histogram: column.field_3.field_16.field_2, definition_level_histogram: column.field_3.field_16.field_3, }, + geospatial_statistics: column.field_3.field_17 && { + bbox: column.field_3.field_17.field_1 && { + xmin: column.field_3.field_17.field_1.field_1, + xmax: column.field_3.field_17.field_1.field_2, + ymin: column.field_3.field_17.field_1.field_3, + ymax: column.field_3.field_17.field_1.field_4, + zmin: column.field_3.field_17.field_1.field_5, + zmax: column.field_3.field_17.field_1.field_6, + mmin: column.field_3.field_17.field_1.field_7, + mmax: column.field_3.field_17.field_1.field_8, + }, + geospatial_types: column.field_3.field_17.field_2, + }, }, offset_index_offset: column.field_4, offset_index_length: column.field_5, @@ -234,8 +247,15 @@ function logicalType(logicalType) { if (logicalType?.field_14) return { type: 'UUID' } if (logicalType?.field_15) return { type: 'FLOAT16' } if (logicalType?.field_16) return { type: 'VARIANT' } - if (logicalType?.field_17) return { type: 'GEOMETRY' } - if (logicalType?.field_18) return { type: 'GEOGRAPHY' } + if (logicalType?.field_17) return { + type: 'GEOMETRY', + crs: logicalType.field_17.field_1, + } + if (logicalType?.field_18) return { + type: 'GEOGRAPHY', + crs: logicalType.field_18.field_1, + algorithm: logicalType.field_18.field_2, + } return logicalType } diff --git a/src/types.d.ts b/src/types.d.ts index 23a7b45..bc42633 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -1,3 +1,4 @@ + /** * Custom parsers for columns */ @@ -127,97 +128,69 @@ export interface SchemaElement { } export type ParquetType = - 'BOOLEAN' | - 'INT32' | - 'INT64' | - 'INT96' | // deprecated - 'FLOAT' | - 'DOUBLE' | - 'BYTE_ARRAY' | - 'FIXED_LEN_BYTE_ARRAY' + | 'BOOLEAN' + | 'INT32' + | 'INT64' + | 'INT96' // deprecated + | 'FLOAT' + | 'DOUBLE' + | 'BYTE_ARRAY' + | 'FIXED_LEN_BYTE_ARRAY' export type FieldRepetitionType = - 'REQUIRED' | - 'OPTIONAL' | - 'REPEATED' + | 'REQUIRED' + | 'OPTIONAL' + | 'REPEATED' export type ConvertedType = - 'UTF8' | - 'MAP' | - 'MAP_KEY_VALUE' | - 'LIST' | - 'ENUM' | - 'DECIMAL' | - 'DATE' | - 'TIME_MILLIS' | - 'TIME_MICROS' | - 'TIMESTAMP_MILLIS' | - 'TIMESTAMP_MICROS' | - 'UINT_8' | - 'UINT_16' | - 'UINT_32' | - 'UINT_64' | - 'INT_8' | - 'INT_16' | - 'INT_32' | - 'INT_64' | - 'JSON' | - 'BSON' | - 'INTERVAL' - -type LogicalDecimalType = { - type: 'DECIMAL' - precision: number - scale: number -} + | 'UTF8' + | 'MAP' + | 'MAP_KEY_VALUE' + | 'LIST' + | 'ENUM' + | 'DECIMAL' + | 'DATE' + | 'TIME_MILLIS' + | 'TIME_MICROS' + | 'TIMESTAMP_MILLIS' + | 'TIMESTAMP_MICROS' + | 'UINT_8' + | 'UINT_16' + | 'UINT_32' + | 'UINT_64' + | 'INT_8' + | 'INT_16' + | 'INT_32' + | 'INT_64' + | 'JSON' + | 'BSON' + | 'INTERVAL' export type TimeUnit = 'MILLIS' | 'MICROS' | 'NANOS' -type LogicalTimeType = { - type: 'TIME' - isAdjustedToUTC: boolean - unit: TimeUnit -} - -type LogicalTimestampType = { - type: 'TIMESTAMP' - isAdjustedToUTC: boolean - unit: TimeUnit -} - -type LogicalIntType = { - type: 'INTEGER' - bitWidth: number - isSigned: boolean -} +type EdgeInterpolationAlgorithm = 'SPHERICAL' | 'VINCENTY' | 'THOMAS' | 'ANDOYER' | 'KARNEY' export type LogicalType = - { type: LogicalTypeSimple } | - LogicalDecimalType | - LogicalTimeType | - LogicalTimestampType | - LogicalIntType + | { type: 'STRING' } + | { type: 'MAP' } + | { type: 'LIST' } + | { type: 'ENUM' } + | { type: 'DATE' } + | { type: 'INTERVAL' } + | { type: 'NULL' } + | { type: 'JSON' } + | { type: 'BSON' } + | { type: 'UUID' } + | { type: 'FLOAT16' } + | { type: 'VARIANT' } + | { type: 'DECIMAL', precision: number, scale: number } + | { type: 'TIME', isAdjustedToUTC: boolean, unit: TimeUnit } + | { type: 'TIMESTAMP', isAdjustedToUTC: boolean, unit: TimeUnit } + | { type: 'INTEGER', bitWidth: number, isSigned: boolean } + | { type: 'GEOMETRY', crs?: string } + | { type: 'GEOGRAPHY', crs?: string, algorithm?: EdgeInterpolationAlgorithm } -type LogicalTypeSimple = - 'STRING' | - 'MAP' | - 'LIST' | - 'ENUM' | - 'DATE' | - 'INTERVAL' | - 'NULL' | - 'JSON' | - 'BSON' | - 'UUID' | - 'FLOAT16' | - 'VARIANT' | - 'GEOMETRY' | - 'GEOGRAPHY' - -export type LogicalTypeType = LogicalTypeSimple | - 'TIME' | // convertedType TIME_MILLIS or TIME_MICROS - 'TIMESTAMP' | // convertedType TIMESTAMP_MILLIS or TIMESTAMP_MICROS - 'INTEGER' // convertedType INT or UINT +export type LogicalTypeType = LogicalType['type'] export interface RowGroup { columns: ColumnChunk[] @@ -263,26 +236,26 @@ export interface ColumnMetaData { type ColumnCryptoMetaData = Record export type Encoding = - 'PLAIN' | - 'GROUP_VAR_INT' | // deprecated - 'PLAIN_DICTIONARY' | - 'RLE' | - 'BIT_PACKED' | // deprecated - 'DELTA_BINARY_PACKED' | - 'DELTA_LENGTH_BYTE_ARRAY' | - 'DELTA_BYTE_ARRAY' | - 'RLE_DICTIONARY' | - 'BYTE_STREAM_SPLIT' + | 'PLAIN' + | 'GROUP_VAR_INT' // deprecated + | 'PLAIN_DICTIONARY' + | 'RLE' + | 'BIT_PACKED' // deprecated + | 'DELTA_BINARY_PACKED' + | 'DELTA_LENGTH_BYTE_ARRAY' + | 'DELTA_BYTE_ARRAY' + | 'RLE_DICTIONARY' + | 'BYTE_STREAM_SPLIT' export type CompressionCodec = - 'UNCOMPRESSED' | - 'SNAPPY' | - 'GZIP' | - 'LZO' | - 'BROTLI' | - 'LZ4' | - 'ZSTD' | - 'LZ4_RAW' + | 'UNCOMPRESSED' + | 'SNAPPY' + | 'GZIP' + | 'LZO' + | 'BROTLI' + | 'LZ4' + | 'ZSTD' + | 'LZ4_RAW' export type Compressors = { [K in CompressionCodec]?: (input: Uint8Array, outputLength: number) => Uint8Array @@ -376,14 +349,14 @@ interface DataPage { } export type DecodedArray = - Uint8Array | - Uint32Array | - Int32Array | - BigInt64Array | - BigUint64Array | - Float32Array | - Float64Array | - any[] + | Uint8Array + | Uint32Array + | Int32Array + | BigInt64Array + | BigUint64Array + | Float32Array + | Float64Array + | any[] export interface OffsetIndex { page_locations: PageLocation[]