Geospatial (#12)

This commit is contained in:
Kenny Daniel 2025-10-23 12:11:25 -07:00 committed by GitHub
parent a56c78de39
commit 4715a4a429
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 241 additions and 6 deletions

@ -33,7 +33,7 @@ export function writeColumn(writer, column, values, stats) {
const encodings = []
// Compute statistics
const statistics = stats ? getStatistics(values) : undefined
const statistics = stats ? getStatistics(values, element) : undefined
// dictionary encoding
let dictionary_page_offset
@ -135,9 +135,14 @@ function writeDictionaryPage(writer, column, dictionary) {
* @import {ColumnMetaData, DecodedArray, Encoding, ParquetType, SchemaElement, Statistics} from 'hyparquet'
* @import {ColumnEncoder, ListValues, Writer} from '../src/types.js'
* @param {DecodedArray} values
* @returns {Statistics}
* @param {SchemaElement} element
* @returns {Statistics | undefined}
*/
function getStatistics(values) {
function getStatistics(values, element) {
const ltype = element?.logical_type?.type
const isGeospatial = ltype === 'GEOMETRY' || ltype === 'GEOGRAPHY'
if (isGeospatial) return
let min_value = undefined
let max_value = undefined
let null_count = 0n

@ -144,8 +144,13 @@ export function logicalType(type) {
if (type.type === 'UUID') return { field_14: {} }
if (type.type === 'FLOAT16') return { field_15: {} }
if (type.type === 'VARIANT') return { field_16: {} }
if (type.type === 'GEOMETRY') return { field_17: {} }
if (type.type === 'GEOGRAPHY') return { field_18: {} }
if (type.type === 'GEOMETRY') return { field_17: {
field_1: type.crs,
} }
if (type.type === 'GEOGRAPHY') return { field_18: {
field_1: type.crs,
field_2: type.algorithm && edgeAlgorithm[type.algorithm],
} }
}
/**
@ -157,3 +162,15 @@ function timeUnit(unit) {
if (unit === 'MICROS') return { field_2: {} }
return { field_1: {} }
}
/**
* @import {EdgeInterpolationAlgorithm} from 'hyparquet/src/types.js'
* @type {Record<EdgeInterpolationAlgorithm, number>}
*/
const edgeAlgorithm = {
SPHERICAL: 0,
VINCENTY: 1,
THOMAS: 2,
ANDOYER: 3,
KARNEY: 4,
}

@ -66,6 +66,12 @@ function basicTypeToSchemaElement(name, type, nullable) {
if (type === 'FLOAT16') {
return { name, type: 'FIXED_LEN_BYTE_ARRAY', type_length: 2, logical_type: { type: 'FLOAT16' }, repetition_type }
}
if (type === 'GEOMETRY') {
return { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOMETRY' }, repetition_type }
}
if (type === 'GEOGRAPHY') {
return { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOGRAPHY' }, repetition_type }
}
return { name, type, repetition_type }
}

4
src/types.d.ts vendored

@ -12,7 +12,9 @@ export type BasicType =
'JSON' |
'TIMESTAMP' |
'UUID' |
'FLOAT16'
'FLOAT16' |
'GEOMETRY' |
'GEOGRAPHY'
export interface ParquetWriteOptions {
writer: Writer

@ -1,3 +1,5 @@
import { geojsonToWkb } from './wkb.js'
const dayMillis = 86400000 // 1 day in milliseconds
/**
@ -48,6 +50,10 @@ export function unconvert(element, values) {
if (element.type_length !== 16) throw new Error('UUID expected type_length to be 16 bytes')
return values.map(unconvertUuid)
}
if (ltype?.type === 'GEOMETRY' || ltype?.type === 'GEOGRAPHY') {
if (!Array.isArray(values)) throw new Error('geometry must be an array')
return values.map(v => v && geojsonToWkb(v))
}
return values
}

147
src/wkb.js Normal file

@ -0,0 +1,147 @@
import { ByteWriter } from './bytewriter.js'
/**
* Serialize a GeoJSON geometry into ISO WKB.
*
* @import {Geometry, Position} from 'hyparquet/src/types.js'
* @param {Geometry} geometry
* @returns {Uint8Array}
*/
export function geojsonToWkb(geometry) {
const writer = new ByteWriter()
writeGeometry(writer, geometry)
return new Uint8Array(writer.getBuffer())
}
/**
* @param {ByteWriter} writer
* @param {Geometry} geometry
*/
function writeGeometry(writer, geometry) {
const typeCode = geometryTypeCode(geometry.type)
// infer dimensions
const dim = inferGeometryDimensions(geometry)
let flag = 0
if (dim === 3) flag = 1
else if (dim === 4) flag = 3
else if (dim > 4) throw new Error(`unsupported geometry dimensions: ${dim}`)
writer.appendUint8(1) // little endian
writer.appendUint32(typeCode + flag * 1000)
if (geometry.type === 'Point') {
writePosition(writer, geometry.coordinates, dim)
} else if (geometry.type === 'LineString') {
writeLine(writer, geometry.coordinates, dim)
} else if (geometry.type === 'Polygon') {
writePolygon(writer, geometry.coordinates, dim)
} else if (geometry.type === 'MultiPoint') {
writer.appendUint32(geometry.coordinates.length)
for (const coordinates of geometry.coordinates) {
writeGeometry(writer, { type: 'Point', coordinates })
}
} else if (geometry.type === 'MultiLineString') {
writer.appendUint32(geometry.coordinates.length)
for (const coordinates of geometry.coordinates) {
writeGeometry(writer, { type: 'LineString', coordinates })
}
} else if (geometry.type === 'MultiPolygon') {
writer.appendUint32(geometry.coordinates.length)
for (const coordinates of geometry.coordinates) {
writeGeometry(writer, { type: 'Polygon', coordinates })
}
} else if (geometry.type === 'GeometryCollection') {
writer.appendUint32(geometry.geometries.length)
for (const child of geometry.geometries) {
writeGeometry(writer, child)
}
} else {
throw new Error('unsupported geometry type')
}
}
/**
* @param {ByteWriter} writer
* @param {Position} position
* @param {number} dim
*/
function writePosition(writer, position, dim) {
if (position.length < dim) {
throw new Error('geometry position dimensions mismatch')
}
for (let i = 0; i < dim; i++) {
writer.appendFloat64(position[i])
}
}
/**
* @param {ByteWriter} writer
* @param {Position[]} coordinates
* @param {number} dim
*/
function writeLine(writer, coordinates, dim) {
writer.appendUint32(coordinates.length)
for (const position of coordinates) {
writePosition(writer, position, dim)
}
}
/**
* @param {ByteWriter} writer
* @param {Position[][]} rings
* @param {number} dimensions
*/
function writePolygon(writer, rings, dimensions) {
writer.appendUint32(rings.length)
for (const ring of rings) {
writeLine(writer, ring, dimensions)
}
}
/**
* @param {Geometry['type']} type
* @returns {number}
*/
function geometryTypeCode(type) {
if (type === 'Point') return 1
if (type === 'LineString') return 2
if (type === 'Polygon') return 3
if (type === 'MultiPoint') return 4
if (type === 'MultiLineString') return 5
if (type === 'MultiPolygon') return 6
if (type === 'GeometryCollection') return 7
throw new Error(`unknown geometry type: ${type}`)
}
/**
* Determine the maximum coordinate dimensions for the geometry.
*
* @param {Geometry} geometry
* @returns {number}
*/
function inferGeometryDimensions(geometry) {
if (geometry.type === 'GeometryCollection') {
let maxDim = 0
for (const child of geometry.geometries) {
maxDim = Math.max(maxDim, inferGeometryDimensions(child))
}
return maxDim || 2
}
return inferCoordinateDimensions(geometry.coordinates)
}
/**
* @param {any} value
* @returns {number}
*/
function inferCoordinateDimensions(value) {
if (!Array.isArray(value)) return 2
if (!value.length) return 2
if (typeof value[0] === 'number') return value.length
let maxDim = 0
for (const item of value) {
maxDim = Math.max(maxDim, inferCoordinateDimensions(item))
}
return maxDim || 2
}

Binary file not shown.

52
test/wkb.test.js Normal file

@ -0,0 +1,52 @@
import { describe, expect, it } from 'vitest'
import { geojsonToWkb } from '../src/wkb.js'
import { wkbToGeojson } from 'hyparquet/src/wkb.js'
/** @import {Geometry} from 'hyparquet/src/types.js' */
describe('geojsonToWkb', () => {
it('encodes point geometries', () => {
/** @type {Geometry} */
const geometry = { type: 'Point', coordinates: [30, 10] }
const decoded = decode(geojsonToWkb(geometry))
expect(decoded).toEqual(geometry)
})
it('encodes polygons with holes', () => {
/** @type {Geometry} */
const geometry = {
type: 'Polygon',
coordinates: [
[[35, 10], [45, 45], [15, 40], [10, 20], [35, 10]],
[[20, 30], [35, 35], [30, 20], [20, 30]],
],
}
const decoded = decode(geojsonToWkb(geometry))
expect(decoded).toEqual(geometry)
})
it('encodes geometry collections with mixed dimensions', () => {
/** @type {Geometry} */
const geometry = {
type: 'GeometryCollection',
geometries: [
{ type: 'Point', coordinates: [30, 10, 5] },
{ type: 'LineString', coordinates: [[30, 10, 5], [40, 40, 5], [20, 40, 5], [10, 20, 5]] },
],
}
const decoded = decode(geojsonToWkb(geometry))
expect(decoded).toEqual(geometry)
})
})
/**
* Decode WKB using the hyparquet reader for verification.
*
* @param {Uint8Array} wkb
* @returns {Geometry}
*/
function decode(wkb) {
const view = new DataView(wkb.buffer, wkb.byteOffset, wkb.byteLength)
const reader = { view, offset: 0 }
return wkbToGeojson(reader)
}