mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Parse geospatial_statistics (#130)
This commit is contained in:
parent
49a3193c5c
commit
2f00330527
@ -57,7 +57,7 @@
|
||||
"@types/node": "24.5.2",
|
||||
"@vitest/coverage-v8": "3.2.4",
|
||||
"eslint": "9.36.0",
|
||||
"eslint-plugin-jsdoc": "60.4.0",
|
||||
"eslint-plugin-jsdoc": "60.4.1",
|
||||
"hyparquet-compressors": "1.1.1",
|
||||
"typescript": "5.9.2",
|
||||
"vitest": "3.2.4"
|
||||
|
||||
@ -152,6 +152,19 @@ export function parquetMetadata(arrayBuffer, { parsers } = {}) {
|
||||
repetition_level_histogram: column.field_3.field_16.field_2,
|
||||
definition_level_histogram: column.field_3.field_16.field_3,
|
||||
},
|
||||
geospatial_statistics: column.field_3.field_17 && {
|
||||
bbox: column.field_3.field_17.field_1 && {
|
||||
xmin: column.field_3.field_17.field_1.field_1,
|
||||
xmax: column.field_3.field_17.field_1.field_2,
|
||||
ymin: column.field_3.field_17.field_1.field_3,
|
||||
ymax: column.field_3.field_17.field_1.field_4,
|
||||
zmin: column.field_3.field_17.field_1.field_5,
|
||||
zmax: column.field_3.field_17.field_1.field_6,
|
||||
mmin: column.field_3.field_17.field_1.field_7,
|
||||
mmax: column.field_3.field_17.field_1.field_8,
|
||||
},
|
||||
geospatial_types: column.field_3.field_17.field_2,
|
||||
},
|
||||
},
|
||||
offset_index_offset: column.field_4,
|
||||
offset_index_length: column.field_5,
|
||||
@ -234,8 +247,15 @@ function logicalType(logicalType) {
|
||||
if (logicalType?.field_14) return { type: 'UUID' }
|
||||
if (logicalType?.field_15) return { type: 'FLOAT16' }
|
||||
if (logicalType?.field_16) return { type: 'VARIANT' }
|
||||
if (logicalType?.field_17) return { type: 'GEOMETRY' }
|
||||
if (logicalType?.field_18) return { type: 'GEOGRAPHY' }
|
||||
if (logicalType?.field_17) return {
|
||||
type: 'GEOMETRY',
|
||||
crs: logicalType.field_17.field_1,
|
||||
}
|
||||
if (logicalType?.field_18) return {
|
||||
type: 'GEOGRAPHY',
|
||||
crs: logicalType.field_18.field_1,
|
||||
algorithm: logicalType.field_18.field_2,
|
||||
}
|
||||
return logicalType
|
||||
}
|
||||
|
||||
|
||||
187
src/types.d.ts
vendored
187
src/types.d.ts
vendored
@ -1,3 +1,4 @@
|
||||
|
||||
/**
|
||||
* Custom parsers for columns
|
||||
*/
|
||||
@ -127,97 +128,69 @@ export interface SchemaElement {
|
||||
}
|
||||
|
||||
export type ParquetType =
|
||||
'BOOLEAN' |
|
||||
'INT32' |
|
||||
'INT64' |
|
||||
'INT96' | // deprecated
|
||||
'FLOAT' |
|
||||
'DOUBLE' |
|
||||
'BYTE_ARRAY' |
|
||||
'FIXED_LEN_BYTE_ARRAY'
|
||||
| 'BOOLEAN'
|
||||
| 'INT32'
|
||||
| 'INT64'
|
||||
| 'INT96' // deprecated
|
||||
| 'FLOAT'
|
||||
| 'DOUBLE'
|
||||
| 'BYTE_ARRAY'
|
||||
| 'FIXED_LEN_BYTE_ARRAY'
|
||||
|
||||
export type FieldRepetitionType =
|
||||
'REQUIRED' |
|
||||
'OPTIONAL' |
|
||||
'REPEATED'
|
||||
| 'REQUIRED'
|
||||
| 'OPTIONAL'
|
||||
| 'REPEATED'
|
||||
|
||||
export type ConvertedType =
|
||||
'UTF8' |
|
||||
'MAP' |
|
||||
'MAP_KEY_VALUE' |
|
||||
'LIST' |
|
||||
'ENUM' |
|
||||
'DECIMAL' |
|
||||
'DATE' |
|
||||
'TIME_MILLIS' |
|
||||
'TIME_MICROS' |
|
||||
'TIMESTAMP_MILLIS' |
|
||||
'TIMESTAMP_MICROS' |
|
||||
'UINT_8' |
|
||||
'UINT_16' |
|
||||
'UINT_32' |
|
||||
'UINT_64' |
|
||||
'INT_8' |
|
||||
'INT_16' |
|
||||
'INT_32' |
|
||||
'INT_64' |
|
||||
'JSON' |
|
||||
'BSON' |
|
||||
'INTERVAL'
|
||||
|
||||
type LogicalDecimalType = {
|
||||
type: 'DECIMAL'
|
||||
precision: number
|
||||
scale: number
|
||||
}
|
||||
| 'UTF8'
|
||||
| 'MAP'
|
||||
| 'MAP_KEY_VALUE'
|
||||
| 'LIST'
|
||||
| 'ENUM'
|
||||
| 'DECIMAL'
|
||||
| 'DATE'
|
||||
| 'TIME_MILLIS'
|
||||
| 'TIME_MICROS'
|
||||
| 'TIMESTAMP_MILLIS'
|
||||
| 'TIMESTAMP_MICROS'
|
||||
| 'UINT_8'
|
||||
| 'UINT_16'
|
||||
| 'UINT_32'
|
||||
| 'UINT_64'
|
||||
| 'INT_8'
|
||||
| 'INT_16'
|
||||
| 'INT_32'
|
||||
| 'INT_64'
|
||||
| 'JSON'
|
||||
| 'BSON'
|
||||
| 'INTERVAL'
|
||||
|
||||
export type TimeUnit = 'MILLIS' | 'MICROS' | 'NANOS'
|
||||
|
||||
type LogicalTimeType = {
|
||||
type: 'TIME'
|
||||
isAdjustedToUTC: boolean
|
||||
unit: TimeUnit
|
||||
}
|
||||
|
||||
type LogicalTimestampType = {
|
||||
type: 'TIMESTAMP'
|
||||
isAdjustedToUTC: boolean
|
||||
unit: TimeUnit
|
||||
}
|
||||
|
||||
type LogicalIntType = {
|
||||
type: 'INTEGER'
|
||||
bitWidth: number
|
||||
isSigned: boolean
|
||||
}
|
||||
type EdgeInterpolationAlgorithm = 'SPHERICAL' | 'VINCENTY' | 'THOMAS' | 'ANDOYER' | 'KARNEY'
|
||||
|
||||
export type LogicalType =
|
||||
{ type: LogicalTypeSimple } |
|
||||
LogicalDecimalType |
|
||||
LogicalTimeType |
|
||||
LogicalTimestampType |
|
||||
LogicalIntType
|
||||
| { type: 'STRING' }
|
||||
| { type: 'MAP' }
|
||||
| { type: 'LIST' }
|
||||
| { type: 'ENUM' }
|
||||
| { type: 'DATE' }
|
||||
| { type: 'INTERVAL' }
|
||||
| { type: 'NULL' }
|
||||
| { type: 'JSON' }
|
||||
| { type: 'BSON' }
|
||||
| { type: 'UUID' }
|
||||
| { type: 'FLOAT16' }
|
||||
| { type: 'VARIANT' }
|
||||
| { type: 'DECIMAL', precision: number, scale: number }
|
||||
| { type: 'TIME', isAdjustedToUTC: boolean, unit: TimeUnit }
|
||||
| { type: 'TIMESTAMP', isAdjustedToUTC: boolean, unit: TimeUnit }
|
||||
| { type: 'INTEGER', bitWidth: number, isSigned: boolean }
|
||||
| { type: 'GEOMETRY', crs?: string }
|
||||
| { type: 'GEOGRAPHY', crs?: string, algorithm?: EdgeInterpolationAlgorithm }
|
||||
|
||||
type LogicalTypeSimple =
|
||||
'STRING' |
|
||||
'MAP' |
|
||||
'LIST' |
|
||||
'ENUM' |
|
||||
'DATE' |
|
||||
'INTERVAL' |
|
||||
'NULL' |
|
||||
'JSON' |
|
||||
'BSON' |
|
||||
'UUID' |
|
||||
'FLOAT16' |
|
||||
'VARIANT' |
|
||||
'GEOMETRY' |
|
||||
'GEOGRAPHY'
|
||||
|
||||
export type LogicalTypeType = LogicalTypeSimple |
|
||||
'TIME' | // convertedType TIME_MILLIS or TIME_MICROS
|
||||
'TIMESTAMP' | // convertedType TIMESTAMP_MILLIS or TIMESTAMP_MICROS
|
||||
'INTEGER' // convertedType INT or UINT
|
||||
export type LogicalTypeType = LogicalType['type']
|
||||
|
||||
export interface RowGroup {
|
||||
columns: ColumnChunk[]
|
||||
@ -263,26 +236,26 @@ export interface ColumnMetaData {
|
||||
type ColumnCryptoMetaData = Record<string, never>
|
||||
|
||||
export type Encoding =
|
||||
'PLAIN' |
|
||||
'GROUP_VAR_INT' | // deprecated
|
||||
'PLAIN_DICTIONARY' |
|
||||
'RLE' |
|
||||
'BIT_PACKED' | // deprecated
|
||||
'DELTA_BINARY_PACKED' |
|
||||
'DELTA_LENGTH_BYTE_ARRAY' |
|
||||
'DELTA_BYTE_ARRAY' |
|
||||
'RLE_DICTIONARY' |
|
||||
'BYTE_STREAM_SPLIT'
|
||||
| 'PLAIN'
|
||||
| 'GROUP_VAR_INT' // deprecated
|
||||
| 'PLAIN_DICTIONARY'
|
||||
| 'RLE'
|
||||
| 'BIT_PACKED' // deprecated
|
||||
| 'DELTA_BINARY_PACKED'
|
||||
| 'DELTA_LENGTH_BYTE_ARRAY'
|
||||
| 'DELTA_BYTE_ARRAY'
|
||||
| 'RLE_DICTIONARY'
|
||||
| 'BYTE_STREAM_SPLIT'
|
||||
|
||||
export type CompressionCodec =
|
||||
'UNCOMPRESSED' |
|
||||
'SNAPPY' |
|
||||
'GZIP' |
|
||||
'LZO' |
|
||||
'BROTLI' |
|
||||
'LZ4' |
|
||||
'ZSTD' |
|
||||
'LZ4_RAW'
|
||||
| 'UNCOMPRESSED'
|
||||
| 'SNAPPY'
|
||||
| 'GZIP'
|
||||
| 'LZO'
|
||||
| 'BROTLI'
|
||||
| 'LZ4'
|
||||
| 'ZSTD'
|
||||
| 'LZ4_RAW'
|
||||
|
||||
export type Compressors = {
|
||||
[K in CompressionCodec]?: (input: Uint8Array, outputLength: number) => Uint8Array
|
||||
@ -376,14 +349,14 @@ interface DataPage {
|
||||
}
|
||||
|
||||
export type DecodedArray =
|
||||
Uint8Array |
|
||||
Uint32Array |
|
||||
Int32Array |
|
||||
BigInt64Array |
|
||||
BigUint64Array |
|
||||
Float32Array |
|
||||
Float64Array |
|
||||
any[]
|
||||
| Uint8Array
|
||||
| Uint32Array
|
||||
| Int32Array
|
||||
| BigInt64Array
|
||||
| BigUint64Array
|
||||
| Float32Array
|
||||
| Float64Array
|
||||
| any[]
|
||||
|
||||
export interface OffsetIndex {
|
||||
page_locations: PageLocation[]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user