From d70190425326cde6eba7e844b7b5d9a76258ff07 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Tue, 30 Sep 2025 11:45:39 -0700 Subject: [PATCH] Add well-known-binary decoder for geometry and geography (#131) --- src/convert.js | 51 +- src/types.d.ts | 64 ++ src/wkb.js | 125 +++ test/convert.test.js | 68 +- test/files/geospatial.json | 1142 +++++++++++++++++++++++++++ test/files/geospatial.metadata.json | 221 ++++++ test/files/geospatial.parquet | Bin 0 -> 8016 bytes test/wkb.test.js | 211 +++++ 8 files changed, 1851 insertions(+), 31 deletions(-) create mode 100644 src/wkb.js create mode 100644 test/files/geospatial.json create mode 100644 test/files/geospatial.metadata.json create mode 100644 test/files/geospatial.parquet create mode 100644 test/wkb.test.js diff --git a/src/convert.js b/src/convert.js index 927741c..21555a3 100644 --- a/src/convert.js +++ b/src/convert.js @@ -1,5 +1,7 @@ +import { wkbToGeojson } from './wkb.js' + /** - * @import {ColumnDecoder, DecodedArray, Encoding, ParquetParsers} from '../src/types.d.ts' + * @import {ColumnDecoder, DecodedArray, Encoding, ParquetParsers} from '../src/types.js' */ const decoder = new TextDecoder() @@ -19,12 +21,17 @@ export const DEFAULT_PARSERS = { return new Date(Number(nanos / 1000000n)) }, dateFromDays(days) { - const dayInMillis = 86400000 - return new Date(days * dayInMillis) + return new Date(days * 86400000) }, stringFromBytes(bytes) { return bytes && decoder.decode(bytes) }, + geometryFromBytes(bytes) { + return bytes && wkbToGeojson({ view: new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength), offset: 0 }) + }, + geographyFromBytes(bytes) { + return bytes && wkbToGeojson({ view: new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength), offset: 0 }) + }, } /** @@ -76,35 +83,18 @@ export function convert(data, columnDecoder) { return arr } if (!ctype && type === 'INT96') { - const arr = new Array(data.length) - for (let i = 0; i < arr.length; i++) { - arr[i] = parsers.timestampFromNanoseconds(parseInt96Nanos(data[i])) - } - return arr + return Array.from(data).map(v => parsers.timestampFromNanoseconds(parseInt96Nanos(v))) } if (ctype === 'DATE') { - const arr = new Array(data.length) - for (let i = 0; i < arr.length; i++) { - arr[i] = parsers.dateFromDays(data[i]) - } - return arr + return Array.from(data).map(v => parsers.dateFromDays(v)) } if (ctype === 'TIMESTAMP_MILLIS') { - const arr = new Array(data.length) - for (let i = 0; i < arr.length; i++) { - arr[i] = parsers.timestampFromMilliseconds(data[i]) - } - return arr + return Array.from(data).map(v => parsers.timestampFromMilliseconds(v)) } if (ctype === 'TIMESTAMP_MICROS') { - const arr = new Array(data.length) - for (let i = 0; i < arr.length; i++) { - arr[i] = parsers.timestampFromMicroseconds(data[i]) - } - return arr + return Array.from(data).map(v => parsers.timestampFromMicroseconds(v)) } if (ctype === 'JSON') { - const decoder = new TextDecoder() return data.map(v => JSON.parse(decoder.decode(v))) } if (ctype === 'BSON') { @@ -113,13 +103,14 @@ export function convert(data, columnDecoder) { if (ctype === 'INTERVAL') { throw new Error('parquet interval not supported') } + if (ltype?.type === 'GEOMETRY') { + return data.map(v => parsers.geometryFromBytes(v)) + } + if (ltype?.type === 'GEOGRAPHY') { + return data.map(v => parsers.geographyFromBytes(v)) + } if (ctype === 'UTF8' || ltype?.type === 'STRING' || utf8 && type === 'BYTE_ARRAY') { - const arr = new Array(data.length) - for (let i = 0; i < arr.length; i++) { - const value = data[i] - arr[i] = value instanceof Uint8Array ? parsers.stringFromBytes(value) : value - } - return arr + return data.map(v => parsers.stringFromBytes(v)) } if (ctype === 'UINT_64' || ltype?.type === 'INTEGER' && ltype.bitWidth === 64 && !ltype.isSigned) { if (data instanceof BigInt64Array) { diff --git a/src/types.d.ts b/src/types.d.ts index bc42633..b78d287 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -8,6 +8,8 @@ export interface ParquetParsers { timestampFromNanoseconds(nanos: bigint): any dateFromDays(days: number): any stringFromBytes(bytes: Uint8Array): any + geometryFromBytes(bytes: Uint8Array): any + geographyFromBytes(bytes: Uint8Array): any } /** @@ -432,3 +434,65 @@ export interface AsyncRowGroup { groupRows: number asyncColumns: AsyncColumn[] } + +/** + * Geometry types based on the GeoJSON specification (RFC 7946) + */ +export type Geometry = + | Point + | MultiPoint + | LineString + | MultiLineString + | Polygon + | MultiPolygon + | GeometryCollection + +/** + * Position is an array of at least two numbers. + * The order should be [longitude, latitude] with optional properties (eg- altitude). + */ +export type Position = number[] + +export interface Point { + type: 'Point' + coordinates: Position +} + +export interface MultiPoint { + type: 'MultiPoint' + coordinates: Position[] +} + +export interface LineString { + type: 'LineString' + coordinates: Position[] +} + +/** + * Each element is one LineString. + */ +export interface MultiLineString { + type: 'MultiLineString' + coordinates: Position[][] +} + +/** + * Each element is a linear ring. + */ +export interface Polygon { + type: 'Polygon' + coordinates: Position[][] +} + +/** + * Each element is one Polygon. + */ +export interface MultiPolygon { + type: 'MultiPolygon' + coordinates: Position[][][] +} + +export interface GeometryCollection { + type: 'GeometryCollection' + geometries: Geometry[] +} diff --git a/src/wkb.js b/src/wkb.js new file mode 100644 index 0000000..5be4344 --- /dev/null +++ b/src/wkb.js @@ -0,0 +1,125 @@ +/** + * WKB (Well-Known Binary) decoder for geometry objects. + * + * @import {DataReader, Geometry} from '../src/types.js' + * @param {DataReader} reader + * @returns {Geometry} geometry object + */ +export function wkbToGeojson(reader) { + const flags = getFlags(reader) + + if (flags.type === 1) { // Point + return { type: 'Point', coordinates: readPosition(reader, flags) } + } else if (flags.type === 2) { // LineString + return { type: 'LineString', coordinates: readLine(reader, flags) } + } else if (flags.type === 3) { // Polygon + return { type: 'Polygon', coordinates: readPolygon(reader, flags) } + } else if (flags.type === 4) { // MultiPoint + const points = [] + for (let i = 0; i < flags.count; i++) { + points.push(readPosition(reader, getFlags(reader))) + } + return { type: 'MultiPoint', coordinates: points } + } else if (flags.type === 5) { // MultiLineString + const lines = [] + for (let i = 0; i < flags.count; i++) { + lines.push(readLine(reader, getFlags(reader))) + } + return { type: 'MultiLineString', coordinates: lines } + } else if (flags.type === 6) { // MultiPolygon + const polygons = [] + for (let i = 0; i < flags.count; i++) { + polygons.push(readPolygon(reader, getFlags(reader))) + } + return { type: 'MultiPolygon', coordinates: polygons } + } else if (flags.type === 7) { // GeometryCollection + const geometries = [] + for (let i = 0; i < flags.count; i++) { + geometries.push(wkbToGeojson(reader)) + } + return { type: 'GeometryCollection', geometries } + } else { + throw new Error(`Unsupported geometry type: ${flags.type}`) + } +} + +/** + * @typedef {object} WkbFlags + * @property {boolean} littleEndian + * @property {number} type + * @property {number} dim + * @property {number} count + */ + +/** + * Extract ISO WKB flags and base geometry type. + * + * @param {DataReader} reader + * @returns {WkbFlags} + */ +function getFlags(reader) { + const { view } = reader + const littleEndian = view.getUint8(reader.offset++) === 1 + const rawType = view.getUint32(reader.offset, littleEndian) + reader.offset += 4 + + const type = rawType % 1000 + const flags = Math.floor(rawType / 1000) + + let count = 0 + if (type > 1 && type <= 7) { + count = view.getUint32(reader.offset, littleEndian) + reader.offset += 4 + } + + // XY, XYZ, XYM, XYZM + let dim = 2 + if (flags) dim++ + if (flags === 3) dim++ + + return { littleEndian, type, dim, count } +} + +/** + * @param {DataReader} reader + * @param {WkbFlags} flags + * @returns {number[]} + */ +function readPosition(reader, flags) { + const points = [] + for (let i = 0; i < flags.dim; i++) { + const coord = reader.view.getFloat64(reader.offset, flags.littleEndian) + reader.offset += 8 + points.push(coord) + } + return points +} + +/** + * @param {DataReader} reader + * @param {WkbFlags} flags + * @returns {number[][]} + */ +function readLine(reader, flags) { + const points = [] + for (let i = 0; i < flags.count; i++) { + points.push(readPosition(reader, flags)) + } + return points +} + +/** + * @param {DataReader} reader + * @param {WkbFlags} flags + * @returns {number[][][]} + */ +function readPolygon(reader, flags) { + const { view } = reader + const rings = [] + for (let r = 0; r < flags.count; r++) { + const count = view.getUint32(reader.offset, flags.littleEndian) + reader.offset += 4 + rings.push(readLine(reader, { ...flags, count })) + } + return rings +} diff --git a/test/convert.test.js b/test/convert.test.js index 00cddcd..68db786 100644 --- a/test/convert.test.js +++ b/test/convert.test.js @@ -38,6 +38,32 @@ describe('convert function', () => { ]) }) + it('decodes geometry logical type with default parser', () => { + const pointWkb = new Uint8Array([ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 128, 89, 64, 0, 0, 0, 0, 0, 0, 224, + 63, + ]) + const data = [pointWkb] + /** @type {SchemaElement} */ + const element = { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOMETRY' } } + expect(convert(data, { element, parsers })).toEqual([ + { type: 'Point', coordinates: [102, 0.5] }, + ]) + }) + + it('decodes geography logical type with default parser', () => { + const pointWkb = new Uint8Array([ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 128, 89, 64, 0, 0, 0, 0, 0, 0, 224, + 63, + ]) + const data = [pointWkb] + /** @type {SchemaElement} */ + const element = { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOGRAPHY' } } + expect(convert(data, { element, parsers })).toEqual([ + { type: 'Point', coordinates: [102, 0.5] }, + ]) + }) + it('converts numbers to DECIMAL', () => { const data = [100, 200] /** @type {SchemaElement} */ @@ -236,13 +262,53 @@ describe('convert function', () => { parsers: { ...parsers, stringFromBytes(/** @type {Uint8Array} */ bytes) { - return `custom-${new TextDecoder().decode(bytes)}` + return bytes && `custom-${new TextDecoder().decode(bytes)}` }, }, } expect(convert(data, columnParser)).toEqual(['custom-foo', undefined]) }) + + it('respects custom parsers - geometryFromBytes', () => { + const pointWkb = new Uint8Array([ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 128, 89, 64, 0, 0, 0, 0, 0, 0, 224, + 63, + ]) + const data = [pointWkb] + /** @type {SchemaElement} */ + const element = { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOMETRY' } } + /** @type {Pick} */ + const columnParser = { + element, + parsers: { + ...parsers, + geometryFromBytes: () => 'custom-geometry', + }, + } + + expect(convert(data, columnParser)).toEqual(['custom-geometry']) + }) + + it('respects custom parsers - geographyFromBytes', () => { + const pointWkb = new Uint8Array([ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 128, 89, 64, 0, 0, 0, 0, 0, 0, 224, + 63, + ]) + const data = [pointWkb] + /** @type {SchemaElement} */ + const element = { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOGRAPHY' } } + /** @type {Pick} */ + const columnParser = { + element, + parsers: { + ...parsers, + geographyFromBytes: () => 'custom-geojson', + }, + } + + expect(convert(data, columnParser)).toEqual(['custom-geojson']) + }) }) describe('parseFloat16', () => { diff --git a/test/files/geospatial.json b/test/files/geospatial.json new file mode 100644 index 0000000..0408b27 --- /dev/null +++ b/test/files/geospatial.json @@ -0,0 +1,1142 @@ +[ + [ + "all", + "POINT (30 10)", + { + "type": "Point", + "coordinates": [ + 30, + 10 + ] + } + ], + [ + "all", + "LINESTRING (30 10, 10 30, 40 40)", + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10 + ], + [ + 10, + 30 + ], + [ + 40, + 40 + ] + ] + } + ], + [ + "all", + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10 + ], + [ + 40, + 40 + ], + [ + 20, + 40 + ], + [ + 10, + 20 + ], + [ + 30, + 10 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOINT ((30 10))", + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10 + ] + ] + } + ], + [ + "all", + "MULTILINESTRING ((30 10, 10 30, 40 40))", + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10 + ], + [ + 10, + 30 + ], + [ + 40, + 40 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))", + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10 + ], + [ + 40, + 40 + ], + [ + 20, + 40 + ], + [ + 10, + 20 + ], + [ + 30, + 10 + ] + ] + ] + ] + } + ], + [ + "all", + "GEOMETRYCOLLECTION (POINT (30 10), LINESTRING (30 10, 10 30, 40 40), POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10)), MULTIPOINT ((30 10)), MULTILINESTRING ((30 10, 10 30, 40 40)), MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10))))", + { + "type": "GeometryCollection", + "geometries": [ + { + "type": "Point", + "coordinates": [ + 30, + 10 + ] + }, + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10 + ], + [ + 10, + 30 + ], + [ + 40, + 40 + ] + ] + }, + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10 + ], + [ + 40, + 40 + ], + [ + 20, + 40 + ], + [ + 10, + 20 + ], + [ + 30, + 10 + ] + ] + ] + }, + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10 + ] + ] + }, + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10 + ], + [ + 10, + 30 + ], + [ + 40, + 40 + ] + ] + ] + }, + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10 + ], + [ + 40, + 40 + ], + [ + 20, + 40 + ], + [ + 10, + 20 + ], + [ + 30, + 10 + ] + ] + ] + ] + } + ] + } + ], + [ + "all", + "POINT Z (30 10 40)", + { + "type": "Point", + "coordinates": [ + 30, + 10, + 40 + ] + } + ], + [ + "all", + "LINESTRING Z (30 10 40, 10 30 40, 40 40 80)", + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 40 + ], + [ + 10, + 30, + 40 + ], + [ + 40, + 40, + 80 + ] + ] + } + ], + [ + "all", + "POLYGON Z ((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40))", + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 40 + ], + [ + 40, + 40, + 80 + ], + [ + 20, + 40, + 60 + ], + [ + 10, + 20, + 30 + ], + [ + 30, + 10, + 40 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOINT Z ((30 10 40))", + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 40 + ] + ] + } + ], + [ + "all", + "MULTILINESTRING Z ((30 10 40, 10 30 40, 40 40 80))", + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 40 + ], + [ + 10, + 30, + 40 + ], + [ + 40, + 40, + 80 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOLYGON Z (((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40)))", + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 40 + ], + [ + 40, + 40, + 80 + ], + [ + 20, + 40, + 60 + ], + [ + 10, + 20, + 30 + ], + [ + 30, + 10, + 40 + ] + ] + ] + ] + } + ], + [ + "all", + "GEOMETRYCOLLECTION Z (POINT Z (30 10 40), LINESTRING Z (30 10 40, 10 30 40, 40 40 80), POLYGON Z ((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40)), MULTIPOINT Z ((30 10 40)), MULTILINESTRING Z ((30 10 40, 10 30 40, 40 40 80)), MULTIPOLYGON Z (((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40))))", + { + "type": "GeometryCollection", + "geometries": [ + { + "type": "Point", + "coordinates": [ + 30, + 10, + 40 + ] + }, + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 40 + ], + [ + 10, + 30, + 40 + ], + [ + 40, + 40, + 80 + ] + ] + }, + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 40 + ], + [ + 40, + 40, + 80 + ], + [ + 20, + 40, + 60 + ], + [ + 10, + 20, + 30 + ], + [ + 30, + 10, + 40 + ] + ] + ] + }, + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 40 + ] + ] + }, + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 40 + ], + [ + 10, + 30, + 40 + ], + [ + 40, + 40, + 80 + ] + ] + ] + }, + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 40 + ], + [ + 40, + 40, + 80 + ], + [ + 20, + 40, + 60 + ], + [ + 10, + 20, + 30 + ], + [ + 30, + 10, + 40 + ] + ] + ] + ] + } + ] + } + ], + [ + "all", + "POINT M (30 10 300)", + { + "type": "Point", + "coordinates": [ + 30, + 10, + 300 + ] + } + ], + [ + "all", + "LINESTRING M (30 10 300, 10 30 300, 40 40 1600)", + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 300 + ], + [ + 10, + 30, + 300 + ], + [ + 40, + 40, + 1600 + ] + ] + } + ], + [ + "all", + "POLYGON M ((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300))", + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 300 + ], + [ + 40, + 40, + 1600 + ], + [ + 20, + 40, + 800 + ], + [ + 10, + 20, + 200 + ], + [ + 30, + 10, + 300 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOINT M ((30 10 300))", + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 300 + ] + ] + } + ], + [ + "all", + "MULTILINESTRING M ((30 10 300, 10 30 300, 40 40 1600))", + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 300 + ], + [ + 10, + 30, + 300 + ], + [ + 40, + 40, + 1600 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOLYGON M (((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300)))", + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 300 + ], + [ + 40, + 40, + 1600 + ], + [ + 20, + 40, + 800 + ], + [ + 10, + 20, + 200 + ], + [ + 30, + 10, + 300 + ] + ] + ] + ] + } + ], + [ + "all", + "GEOMETRYCOLLECTION M (POINT M (30 10 300), LINESTRING M (30 10 300, 10 30 300, 40 40 1600), POLYGON M ((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300)), MULTIPOINT M ((30 10 300)), MULTILINESTRING M ((30 10 300, 10 30 300, 40 40 1600)), MULTIPOLYGON M (((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300))))", + { + "type": "GeometryCollection", + "geometries": [ + { + "type": "Point", + "coordinates": [ + 30, + 10, + 300 + ] + }, + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 300 + ], + [ + 10, + 30, + 300 + ], + [ + 40, + 40, + 1600 + ] + ] + }, + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 300 + ], + [ + 40, + 40, + 1600 + ], + [ + 20, + 40, + 800 + ], + [ + 10, + 20, + 200 + ], + [ + 30, + 10, + 300 + ] + ] + ] + }, + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 300 + ] + ] + }, + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 300 + ], + [ + 10, + 30, + 300 + ], + [ + 40, + 40, + 1600 + ] + ] + ] + }, + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 300 + ], + [ + 40, + 40, + 1600 + ], + [ + 20, + 40, + 800 + ], + [ + 10, + 20, + 200 + ], + [ + 30, + 10, + 300 + ] + ] + ] + ] + } + ] + } + ], + [ + "all", + "POINT ZM (30 10 40 300)", + { + "type": "Point", + "coordinates": [ + 30, + 10, + 40, + 300 + ] + } + ], + [ + "all", + "LINESTRING ZM (30 10 40 300, 10 30 40 300, 40 40 80 1600)", + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 40, + 300 + ], + [ + 10, + 30, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ] + ] + } + ], + [ + "all", + "POLYGON ZM ((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300))", + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ], + [ + 20, + 40, + 60, + 800 + ], + [ + 10, + 20, + 30, + 200 + ], + [ + 30, + 10, + 40, + 300 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOINT ZM ((30 10 40 300))", + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 40, + 300 + ] + ] + } + ], + [ + "all", + "MULTILINESTRING ZM ((30 10 40 300, 10 30 40 300, 40 40 80 1600))", + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 10, + 30, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOLYGON ZM (((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300)))", + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ], + [ + 20, + 40, + 60, + 800 + ], + [ + 10, + 20, + 30, + 200 + ], + [ + 30, + 10, + 40, + 300 + ] + ] + ] + ] + } + ], + [ + "all", + "GEOMETRYCOLLECTION ZM (POINT ZM (30 10 40 300), LINESTRING ZM (30 10 40 300, 10 30 40 300, 40 40 80 1600), POLYGON ZM ((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300)), MULTIPOINT ZM ((30 10 40 300)), MULTILINESTRING ZM ((30 10 40 300, 10 30 40 300, 40 40 80 1600)), MULTIPOLYGON ZM (((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300))))", + { + "type": "GeometryCollection", + "geometries": [ + { + "type": "Point", + "coordinates": [ + 30, + 10, + 40, + 300 + ] + }, + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 40, + 300 + ], + [ + 10, + 30, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ] + ] + }, + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ], + [ + 20, + 40, + 60, + 800 + ], + [ + 10, + 20, + 30, + 200 + ], + [ + 30, + 10, + 40, + 300 + ] + ] + ] + }, + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 40, + 300 + ] + ] + }, + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 10, + 30, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ] + ] + ] + }, + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ], + [ + 20, + 40, + 60, + 800 + ], + [ + 10, + 20, + 30, + 200 + ], + [ + 30, + 10, + 40, + 300 + ] + ] + ] + ] + } + ] + } + ] +] diff --git a/test/files/geospatial.metadata.json b/test/files/geospatial.metadata.json new file mode 100644 index 0000000..a37300e --- /dev/null +++ b/test/files/geospatial.metadata.json @@ -0,0 +1,221 @@ +{ + "version": 2, + "schema": [ + { + "repetition_type": "REQUIRED", + "name": "schema", + "num_children": 3 + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "group", + "converted_type": "UTF8", + "logical_type": { + "type": "STRING" + } + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "wkt", + "converted_type": "UTF8", + "logical_type": { + "type": "STRING" + } + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "geometry", + "logical_type": { + "type": "GEOMETRY" + } + } + ], + "num_rows": 28, + "row_groups": [ + { + "columns": [ + { + "file_offset": 0, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "PLAIN", + "RLE", + "RLE_DICTIONARY" + ], + "path_in_schema": [ + "group" + ], + "codec": "UNCOMPRESSED", + "num_values": 28, + "total_uncompressed_size": 61, + "total_compressed_size": 61, + "data_page_offset": 25, + "dictionary_page_offset": 4, + "statistics": { + "null_count": 0, + "max_value": "all", + "min_value": "all" + }, + "encoding_stats": [ + { + "page_type": "DICTIONARY_PAGE", + "encoding": "PLAIN", + "count": 1 + }, + { + "page_type": "DATA_PAGE", + "encoding": "RLE_DICTIONARY", + "count": 1 + } + ], + "size_statistics": { + "unencoded_byte_array_data_bytes": 84, + "repetition_level_histogram": [], + "definition_level_histogram": [ + 0, + 28 + ] + } + } + }, + { + "file_offset": 0, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "PLAIN", + "RLE", + "RLE_DICTIONARY" + ], + "path_in_schema": [ + "wkt" + ], + "codec": "UNCOMPRESSED", + "num_values": 28, + "total_uncompressed_size": 2841, + "total_compressed_size": 2841, + "data_page_offset": 2536, + "dictionary_page_offset": 65, + "statistics": { + "null_count": 0, + "max_value": "POLYGON ZM ((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300))", + "min_value": "GEOMETRYCOLLECTION (POINT (30 10), LINESTRING (30 10, 10 30, 40 40), POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10)), MULTIPOINT ((30 10)), MULTILINESTRING ((30 10, 10 30, 40 40)), MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10))))" + }, + "encoding_stats": [ + { + "page_type": "DICTIONARY_PAGE", + "encoding": "PLAIN", + "count": 1 + }, + { + "page_type": "DATA_PAGE", + "encoding": "RLE_DICTIONARY", + "count": 1 + } + ], + "size_statistics": { + "unencoded_byte_array_data_bytes": 2343, + "repetition_level_histogram": [], + "definition_level_histogram": [ + 0, + 28 + ] + } + } + }, + { + "file_offset": 0, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "PLAIN", + "RLE", + "RLE_DICTIONARY" + ], + "path_in_schema": [ + "geometry" + ], + "codec": "UNCOMPRESSED", + "num_values": 28, + "total_uncompressed_size": 4315, + "total_compressed_size": 4315, + "data_page_offset": 7174, + "dictionary_page_offset": 2906, + "encoding_stats": [ + { + "page_type": "DICTIONARY_PAGE", + "encoding": "PLAIN", + "count": 1 + }, + { + "page_type": "DATA_PAGE", + "encoding": "RLE_DICTIONARY", + "count": 1 + } + ], + "size_statistics": { + "unencoded_byte_array_data_bytes": 4140, + "repetition_level_histogram": [], + "definition_level_histogram": [ + 0, + 28 + ] + }, + "geospatial_statistics": { + "bbox": { + "xmin": 10, + "xmax": 40, + "ymin": 10, + "ymax": 40, + "zmin": 30, + "zmax": 80, + "mmin": 200, + "mmax": 1600 + }, + "geospatial_types": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 1001, + 1002, + 1003, + 1004, + 1005, + 1006, + 1007, + 2001, + 2002, + 2003, + 2004, + 2005, + 2006, + 2007, + 3001, + 3002, + 3003, + 3004, + 3005, + 3006, + 3007 + ] + } + } + } + ], + "total_byte_size": 7217, + "num_rows": 28, + "file_offset": 4, + "total_compressed_size": 7217 + } + ], + "created_by": "parquet-cpp-arrow version 21.0.0", + "metadata_length": 787 +} diff --git a/test/files/geospatial.parquet b/test/files/geospatial.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9b13f057f69ad2556f2a286b1f73167760e8be18 GIT binary patch literal 8016 zcmeHMTWB0r7(SEB8f(mk)LFB#x`S+{Ik;ITn>I`#glV!NEN(8w^`Q|TLWwn)nsu96 zj1Ph!SP|dTM-i#FJ`@RFTJ^;jA4F@dMZ^c6s`Ub*A|m4dpL715%U-g%vO$O0nc4H- zzW@Kfb8<2}ePpIzZ_zvSj-1}4vo6M(8Dn#ci#pT0^sc0C>z#T?55)#qZ!-*c;{`OD z;A3l=&A|A)31<#8(fn2rOiyJe^G5H0W%OIzfPq|ga_oWpOm=cyjV3{23|L9ypam!I z6GpkC<5QDHue4L5q>)Aqp`V7E^vQ1ngNX-o`K$`1V(|ks?u|$eeIfZlQk|Znz@#WZ zRf){6gWdSp)WleR=IH2DE;lxs&mwGJ(n-S$K!6UUoHX3HN?t2$(x{ZK8uiAa9Q?_a z%r}%OhkmMN&Z@>?x9)*aPe_kQc{IuInKywH)W7*9GV0wDHO-q9L2DmW%HfY(PvCRG9(G;`hR^r-5D>;Q=+uzJCx* zGQyMO73ixH1+=J2HW~vFU^PkOqpT-J=6ASMP_SHAoDY)JkIq%829MuImQ*tY>9vy_ z!(8e>$ww*^4V+4%c}oV}^205qTMeztwyl)X$2p&)N~H5CR09?y9vW@Fi2Qaz8Zjln z6U{21gT)Vn$Wyi1fSdqA*E}4v;cpz0F)#e8Mb1sEqB3=R(1K}Pq9tJQFjE?GewRxk z0_{i59nuJ3_o>we&mSTmL{&_l%Dm+q_%oHMi$T;P8K||NzZ>g>%Vd#+x5c!4sHSbC z4?bp>Ij+R+GNpBTP>SFoQJ{y=2{<=_o^#mF*}Co9&gFg0-ljTrU)TOCnpGx>gP=KL@#%2?F z1RaFP%T8vr0ywpsAd-kyDWgqyV@Eqty9-SM*uF`}{9AeM>Gag`QF zU?P1~l*`0rPyoiLqg+V}X!a>N7|ZcY;JzxCi7&uu&1Is71L-)pOq@}H2nfj+)Oa~? z6C7z*6cDXe9mF#6C24MVLBuj~mAp4XWyCV^6(9;>iE33w+wNj$jgWtRtk8Ch5n4mB zp$AFu?k#W?M_?ksZbA&hgk69K?LAPex?Fgag;BNLMb+6)n}k*8Ee!TP1gp7>lm%31 zX`HzZB_LWhNRM_OXByksPArdQs6+lr~ubh&mvk}B8%NB>>5V>fTHfD z3ZKQ@&4++Igela+*j;1mo?`08Si1WR-J9$Tz8wON;{WlsR_)bBG{L+c}<>FI|?RSGt#E!LouQ|#? zEQYV=;Yc{@Awv)4Bf})h)58KitU4_Ed#tG?)V8m^<8t^)_-goC`0MaD;cvq)$6txR z8b23*J^p6=eEiqcZ>c|0f2RIQ{hj&;{|H9DD@1Iw4Q{O0D9ja~I5ofAcdSt8n=2NV hP8(0o7f&uMJ#M7?Q&!4~4gu5U$@as(Vj6x+{}+JK9B2Rl literal 0 HcmV?d00001 diff --git a/test/wkb.test.js b/test/wkb.test.js new file mode 100644 index 0000000..dce7377 --- /dev/null +++ b/test/wkb.test.js @@ -0,0 +1,211 @@ +import { describe, expect, it } from 'vitest' +import { wkbToGeojson } from '../src/wkb.js' + +/** + * @param {Uint8Array} buffer + * @returns {import('../src/types.d.ts').DataReader} + */ +function makeReader(buffer) { + return { + view: new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength), + offset: 0, + } +} + +describe('wkbToGeojson', () => { + it('decodes little-endian Point', () => { + const buffer = new Uint8Array([ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 128, 89, 64, 0, 0, 0, 0, 0, 0, 224, + 63, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Point', + coordinates: [102, 0.5], + }) + }) + + it('decodes big-endian LineString', () => { + const buffer = new Uint8Array([ + 0, 0, 0, 0, 2, 0, 0, 0, 2, 63, 248, 0, 0, 0, 0, 0, 0, 192, 12, 0, + 0, 0, 0, 0, 0, 64, 17, 0, 0, 0, 0, 0, 0, 64, 23, 0, 0, 0, 0, 0, + 0, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'LineString', + coordinates: [ + [1.5, -3.5], + [4.25, 5.75], + ], + }) + }) + + it('decodes little-endian Polygon', () => { + const buffer = new Uint8Array([ + 1, 3, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 63, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 63, 0, 0, 0, 0, 0, 0, 240, + 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Polygon', + coordinates: [ + [ + [0, 0], + [1, 0], + [1, 1], + [0, 0], + ], + ], + }) + }) + + it('decodes little-endian MultiLineString', () => { + const buffer = new Uint8Array([ + 1, 5, 0, 0, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 240, 63, 0, 0, 0, 0, 0, 0, 240, 63, 0, 0, 0, 0, 0, 0, + 0, 64, 0, 0, 0, 0, 0, 0, 0, 64, 1, 2, 0, 0, 0, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, + 0, 16, 64, 0, 0, 0, 0, 0, 0, 16, 64, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'MultiLineString', + coordinates: [ + [ + [1, 1], + [2, 2], + ], + [ + [3, 3], + [4, 4], + ], + ], + }) + }) + + it('decodes mixed-endian MultiPoint', () => { + const buffer = new Uint8Array([ + 1, 4, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 64, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 1, 191, 240, 0, 0, 0, + 0, 0, 0, 63, 224, 0, 0, 0, 0, 0, 0, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'MultiPoint', + coordinates: [ + [2, 3], + [-1, 0.5], + ], + }) + }) + + it('decodes nested MultiPolygon', () => { + const buffer = new Uint8Array([ + 1, 6, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, + 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, + 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'MultiPolygon', + coordinates: [ + [ + [ + [0, 0], + [0, 2], + [2, 2], + [0, 0], + ], + ], + ], + }) + }) + + it('decodes GeometryCollection', () => { + const buffer = new Uint8Array([ + 1, 7, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, + 63, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 2, 0, 0, 0, 2, 64, 8, 0, + 0, 0, 0, 0, 0, 64, 16, 0, 0, 0, 0, 0, 0, 64, 20, 0, 0, 0, 0, 0, 0, + 64, 24, 0, 0, 0, 0, 0, 0, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'GeometryCollection', + geometries: [ + { type: 'Point', coordinates: [1, 2] }, + { + type: 'LineString', + coordinates: [ + [3, 4], + [5, 6], + ], + }, + ], + }) + }) + + it('throws on unsupported geometry type', () => { + const buffer = new Uint8Array([ + 1, 99, 0, 0, 0, + ]) + + expect(() => wkbToGeojson(makeReader(buffer))).toThrowError('Unsupported geometry type: 99') + }) + + it('decodes ISO WKB Point with Z/M flags', () => { + const buffer = new Uint8Array([ + 1, + 185, 11, 0, 0, + 0, 0, 0, 0, 0, 0, 240, 63, + 0, 0, 0, 0, 0, 0, 0, 64, + 0, 0, 0, 0, 0, 0, 8, 64, + 0, 0, 0, 0, 0, 0, 16, 64, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Point', + coordinates: [1, 2, 3, 4], + }) + }) + + it('decodes point encoded with dimensional offsets', () => { + const buffer = new Uint8Array([ + 1, 185, 11, 0, 0, 0, 0, 0, 0, 0, 0, 20, 64, 0, 0, 0, 0, 0, + 0, 24, 64, 0, 0, 0, 0, 0, 0, 28, 64, 0, 0, 0, 0, 0, 0, 32, 64, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Point', + coordinates: [5, 6, 7, 8], + }) + }) + + it('decodes point with M-only dimensional offset', () => { + const buffer = new Uint8Array([ + 1, 209, 7, 0, 0, 0, 0, 0, 0, 0, 0, 34, 64, 0, 0, 0, 0, 0, + 0, 36, 64, 0, 0, 0, 0, 0, 0, 38, 64, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Point', + coordinates: [9, 10, 11], + }) + }) + + it('decodes point with Z-only dimensional offset', () => { + const buffer = new Uint8Array([ + 1, 233, 3, 0, 0, 0, 0, 0, 0, 0, 0, 40, 64, 0, 0, 0, 0, 0, + 0, 42, 64, 0, 0, 0, 0, 0, 0, 44, 64, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Point', + coordinates: [12, 13, 14], + }) + }) +})