mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Geospatial (#12)
This commit is contained in:
parent
a56c78de39
commit
4715a4a429
@ -33,7 +33,7 @@ export function writeColumn(writer, column, values, stats) {
|
||||
const encodings = []
|
||||
|
||||
// Compute statistics
|
||||
const statistics = stats ? getStatistics(values) : undefined
|
||||
const statistics = stats ? getStatistics(values, element) : undefined
|
||||
|
||||
// dictionary encoding
|
||||
let dictionary_page_offset
|
||||
@ -135,9 +135,14 @@ function writeDictionaryPage(writer, column, dictionary) {
|
||||
* @import {ColumnMetaData, DecodedArray, Encoding, ParquetType, SchemaElement, Statistics} from 'hyparquet'
|
||||
* @import {ColumnEncoder, ListValues, Writer} from '../src/types.js'
|
||||
* @param {DecodedArray} values
|
||||
* @returns {Statistics}
|
||||
* @param {SchemaElement} element
|
||||
* @returns {Statistics | undefined}
|
||||
*/
|
||||
function getStatistics(values) {
|
||||
function getStatistics(values, element) {
|
||||
const ltype = element?.logical_type?.type
|
||||
const isGeospatial = ltype === 'GEOMETRY' || ltype === 'GEOGRAPHY'
|
||||
if (isGeospatial) return
|
||||
|
||||
let min_value = undefined
|
||||
let max_value = undefined
|
||||
let null_count = 0n
|
||||
|
||||
@ -144,8 +144,13 @@ export function logicalType(type) {
|
||||
if (type.type === 'UUID') return { field_14: {} }
|
||||
if (type.type === 'FLOAT16') return { field_15: {} }
|
||||
if (type.type === 'VARIANT') return { field_16: {} }
|
||||
if (type.type === 'GEOMETRY') return { field_17: {} }
|
||||
if (type.type === 'GEOGRAPHY') return { field_18: {} }
|
||||
if (type.type === 'GEOMETRY') return { field_17: {
|
||||
field_1: type.crs,
|
||||
} }
|
||||
if (type.type === 'GEOGRAPHY') return { field_18: {
|
||||
field_1: type.crs,
|
||||
field_2: type.algorithm && edgeAlgorithm[type.algorithm],
|
||||
} }
|
||||
}
|
||||
|
||||
/**
|
||||
@ -157,3 +162,15 @@ function timeUnit(unit) {
|
||||
if (unit === 'MICROS') return { field_2: {} }
|
||||
return { field_1: {} }
|
||||
}
|
||||
|
||||
/**
|
||||
* @import {EdgeInterpolationAlgorithm} from 'hyparquet/src/types.js'
|
||||
* @type {Record<EdgeInterpolationAlgorithm, number>}
|
||||
*/
|
||||
const edgeAlgorithm = {
|
||||
SPHERICAL: 0,
|
||||
VINCENTY: 1,
|
||||
THOMAS: 2,
|
||||
ANDOYER: 3,
|
||||
KARNEY: 4,
|
||||
}
|
||||
|
||||
@ -66,6 +66,12 @@ function basicTypeToSchemaElement(name, type, nullable) {
|
||||
if (type === 'FLOAT16') {
|
||||
return { name, type: 'FIXED_LEN_BYTE_ARRAY', type_length: 2, logical_type: { type: 'FLOAT16' }, repetition_type }
|
||||
}
|
||||
if (type === 'GEOMETRY') {
|
||||
return { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOMETRY' }, repetition_type }
|
||||
}
|
||||
if (type === 'GEOGRAPHY') {
|
||||
return { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOGRAPHY' }, repetition_type }
|
||||
}
|
||||
return { name, type, repetition_type }
|
||||
}
|
||||
|
||||
|
||||
4
src/types.d.ts
vendored
4
src/types.d.ts
vendored
@ -12,7 +12,9 @@ export type BasicType =
|
||||
'JSON' |
|
||||
'TIMESTAMP' |
|
||||
'UUID' |
|
||||
'FLOAT16'
|
||||
'FLOAT16' |
|
||||
'GEOMETRY' |
|
||||
'GEOGRAPHY'
|
||||
|
||||
export interface ParquetWriteOptions {
|
||||
writer: Writer
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
import { geojsonToWkb } from './wkb.js'
|
||||
|
||||
const dayMillis = 86400000 // 1 day in milliseconds
|
||||
|
||||
/**
|
||||
@ -48,6 +50,10 @@ export function unconvert(element, values) {
|
||||
if (element.type_length !== 16) throw new Error('UUID expected type_length to be 16 bytes')
|
||||
return values.map(unconvertUuid)
|
||||
}
|
||||
if (ltype?.type === 'GEOMETRY' || ltype?.type === 'GEOGRAPHY') {
|
||||
if (!Array.isArray(values)) throw new Error('geometry must be an array')
|
||||
return values.map(v => v && geojsonToWkb(v))
|
||||
}
|
||||
return values
|
||||
}
|
||||
|
||||
|
||||
147
src/wkb.js
Normal file
147
src/wkb.js
Normal file
@ -0,0 +1,147 @@
|
||||
import { ByteWriter } from './bytewriter.js'
|
||||
|
||||
/**
|
||||
* Serialize a GeoJSON geometry into ISO WKB.
|
||||
*
|
||||
* @import {Geometry, Position} from 'hyparquet/src/types.js'
|
||||
* @param {Geometry} geometry
|
||||
* @returns {Uint8Array}
|
||||
*/
|
||||
export function geojsonToWkb(geometry) {
|
||||
const writer = new ByteWriter()
|
||||
writeGeometry(writer, geometry)
|
||||
return new Uint8Array(writer.getBuffer())
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ByteWriter} writer
|
||||
* @param {Geometry} geometry
|
||||
*/
|
||||
function writeGeometry(writer, geometry) {
|
||||
const typeCode = geometryTypeCode(geometry.type)
|
||||
|
||||
// infer dimensions
|
||||
const dim = inferGeometryDimensions(geometry)
|
||||
let flag = 0
|
||||
if (dim === 3) flag = 1
|
||||
else if (dim === 4) flag = 3
|
||||
else if (dim > 4) throw new Error(`unsupported geometry dimensions: ${dim}`)
|
||||
|
||||
writer.appendUint8(1) // little endian
|
||||
writer.appendUint32(typeCode + flag * 1000)
|
||||
|
||||
if (geometry.type === 'Point') {
|
||||
writePosition(writer, geometry.coordinates, dim)
|
||||
} else if (geometry.type === 'LineString') {
|
||||
writeLine(writer, geometry.coordinates, dim)
|
||||
} else if (geometry.type === 'Polygon') {
|
||||
writePolygon(writer, geometry.coordinates, dim)
|
||||
} else if (geometry.type === 'MultiPoint') {
|
||||
writer.appendUint32(geometry.coordinates.length)
|
||||
for (const coordinates of geometry.coordinates) {
|
||||
writeGeometry(writer, { type: 'Point', coordinates })
|
||||
}
|
||||
} else if (geometry.type === 'MultiLineString') {
|
||||
writer.appendUint32(geometry.coordinates.length)
|
||||
for (const coordinates of geometry.coordinates) {
|
||||
writeGeometry(writer, { type: 'LineString', coordinates })
|
||||
}
|
||||
} else if (geometry.type === 'MultiPolygon') {
|
||||
writer.appendUint32(geometry.coordinates.length)
|
||||
for (const coordinates of geometry.coordinates) {
|
||||
writeGeometry(writer, { type: 'Polygon', coordinates })
|
||||
}
|
||||
} else if (geometry.type === 'GeometryCollection') {
|
||||
writer.appendUint32(geometry.geometries.length)
|
||||
for (const child of geometry.geometries) {
|
||||
writeGeometry(writer, child)
|
||||
}
|
||||
} else {
|
||||
throw new Error('unsupported geometry type')
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ByteWriter} writer
|
||||
* @param {Position} position
|
||||
* @param {number} dim
|
||||
*/
|
||||
function writePosition(writer, position, dim) {
|
||||
if (position.length < dim) {
|
||||
throw new Error('geometry position dimensions mismatch')
|
||||
}
|
||||
for (let i = 0; i < dim; i++) {
|
||||
writer.appendFloat64(position[i])
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ByteWriter} writer
|
||||
* @param {Position[]} coordinates
|
||||
* @param {number} dim
|
||||
*/
|
||||
function writeLine(writer, coordinates, dim) {
|
||||
writer.appendUint32(coordinates.length)
|
||||
for (const position of coordinates) {
|
||||
writePosition(writer, position, dim)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ByteWriter} writer
|
||||
* @param {Position[][]} rings
|
||||
* @param {number} dimensions
|
||||
*/
|
||||
function writePolygon(writer, rings, dimensions) {
|
||||
writer.appendUint32(rings.length)
|
||||
for (const ring of rings) {
|
||||
writeLine(writer, ring, dimensions)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Geometry['type']} type
|
||||
* @returns {number}
|
||||
*/
|
||||
function geometryTypeCode(type) {
|
||||
if (type === 'Point') return 1
|
||||
if (type === 'LineString') return 2
|
||||
if (type === 'Polygon') return 3
|
||||
if (type === 'MultiPoint') return 4
|
||||
if (type === 'MultiLineString') return 5
|
||||
if (type === 'MultiPolygon') return 6
|
||||
if (type === 'GeometryCollection') return 7
|
||||
throw new Error(`unknown geometry type: ${type}`)
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the maximum coordinate dimensions for the geometry.
|
||||
*
|
||||
* @param {Geometry} geometry
|
||||
* @returns {number}
|
||||
*/
|
||||
function inferGeometryDimensions(geometry) {
|
||||
if (geometry.type === 'GeometryCollection') {
|
||||
let maxDim = 0
|
||||
for (const child of geometry.geometries) {
|
||||
maxDim = Math.max(maxDim, inferGeometryDimensions(child))
|
||||
}
|
||||
return maxDim || 2
|
||||
}
|
||||
return inferCoordinateDimensions(geometry.coordinates)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {any} value
|
||||
* @returns {number}
|
||||
*/
|
||||
function inferCoordinateDimensions(value) {
|
||||
if (!Array.isArray(value)) return 2
|
||||
if (!value.length) return 2
|
||||
if (typeof value[0] === 'number') return value.length
|
||||
let maxDim = 0
|
||||
for (const item of value) {
|
||||
maxDim = Math.max(maxDim, inferCoordinateDimensions(item))
|
||||
}
|
||||
return maxDim || 2
|
||||
}
|
||||
BIN
test/files/geospatial.parquet
Normal file
BIN
test/files/geospatial.parquet
Normal file
Binary file not shown.
52
test/wkb.test.js
Normal file
52
test/wkb.test.js
Normal file
@ -0,0 +1,52 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { geojsonToWkb } from '../src/wkb.js'
|
||||
import { wkbToGeojson } from 'hyparquet/src/wkb.js'
|
||||
|
||||
/** @import {Geometry} from 'hyparquet/src/types.js' */
|
||||
|
||||
describe('geojsonToWkb', () => {
|
||||
it('encodes point geometries', () => {
|
||||
/** @type {Geometry} */
|
||||
const geometry = { type: 'Point', coordinates: [30, 10] }
|
||||
const decoded = decode(geojsonToWkb(geometry))
|
||||
expect(decoded).toEqual(geometry)
|
||||
})
|
||||
|
||||
it('encodes polygons with holes', () => {
|
||||
/** @type {Geometry} */
|
||||
const geometry = {
|
||||
type: 'Polygon',
|
||||
coordinates: [
|
||||
[[35, 10], [45, 45], [15, 40], [10, 20], [35, 10]],
|
||||
[[20, 30], [35, 35], [30, 20], [20, 30]],
|
||||
],
|
||||
}
|
||||
const decoded = decode(geojsonToWkb(geometry))
|
||||
expect(decoded).toEqual(geometry)
|
||||
})
|
||||
|
||||
it('encodes geometry collections with mixed dimensions', () => {
|
||||
/** @type {Geometry} */
|
||||
const geometry = {
|
||||
type: 'GeometryCollection',
|
||||
geometries: [
|
||||
{ type: 'Point', coordinates: [30, 10, 5] },
|
||||
{ type: 'LineString', coordinates: [[30, 10, 5], [40, 40, 5], [20, 40, 5], [10, 20, 5]] },
|
||||
],
|
||||
}
|
||||
const decoded = decode(geojsonToWkb(geometry))
|
||||
expect(decoded).toEqual(geometry)
|
||||
})
|
||||
})
|
||||
|
||||
/**
|
||||
* Decode WKB using the hyparquet reader for verification.
|
||||
*
|
||||
* @param {Uint8Array} wkb
|
||||
* @returns {Geometry}
|
||||
*/
|
||||
function decode(wkb) {
|
||||
const view = new DataView(wkb.buffer, wkb.byteOffset, wkb.byteLength)
|
||||
const reader = { view, offset: 0 }
|
||||
return wkbToGeojson(reader)
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user