diff --git a/src/convert.js b/src/convert.js index 927741c..21555a3 100644 --- a/src/convert.js +++ b/src/convert.js @@ -1,5 +1,7 @@ +import { wkbToGeojson } from './wkb.js' + /** - * @import {ColumnDecoder, DecodedArray, Encoding, ParquetParsers} from '../src/types.d.ts' + * @import {ColumnDecoder, DecodedArray, Encoding, ParquetParsers} from '../src/types.js' */ const decoder = new TextDecoder() @@ -19,12 +21,17 @@ export const DEFAULT_PARSERS = { return new Date(Number(nanos / 1000000n)) }, dateFromDays(days) { - const dayInMillis = 86400000 - return new Date(days * dayInMillis) + return new Date(days * 86400000) }, stringFromBytes(bytes) { return bytes && decoder.decode(bytes) }, + geometryFromBytes(bytes) { + return bytes && wkbToGeojson({ view: new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength), offset: 0 }) + }, + geographyFromBytes(bytes) { + return bytes && wkbToGeojson({ view: new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength), offset: 0 }) + }, } /** @@ -76,35 +83,18 @@ export function convert(data, columnDecoder) { return arr } if (!ctype && type === 'INT96') { - const arr = new Array(data.length) - for (let i = 0; i < arr.length; i++) { - arr[i] = parsers.timestampFromNanoseconds(parseInt96Nanos(data[i])) - } - return arr + return Array.from(data).map(v => parsers.timestampFromNanoseconds(parseInt96Nanos(v))) } if (ctype === 'DATE') { - const arr = new Array(data.length) - for (let i = 0; i < arr.length; i++) { - arr[i] = parsers.dateFromDays(data[i]) - } - return arr + return Array.from(data).map(v => parsers.dateFromDays(v)) } if (ctype === 'TIMESTAMP_MILLIS') { - const arr = new Array(data.length) - for (let i = 0; i < arr.length; i++) { - arr[i] = parsers.timestampFromMilliseconds(data[i]) - } - return arr + return Array.from(data).map(v => parsers.timestampFromMilliseconds(v)) } if (ctype === 'TIMESTAMP_MICROS') { - const arr = new Array(data.length) - for (let i = 0; i < arr.length; i++) { - arr[i] = parsers.timestampFromMicroseconds(data[i]) - } - return arr + return Array.from(data).map(v => parsers.timestampFromMicroseconds(v)) } if (ctype === 'JSON') { - const decoder = new TextDecoder() return data.map(v => JSON.parse(decoder.decode(v))) } if (ctype === 'BSON') { @@ -113,13 +103,14 @@ export function convert(data, columnDecoder) { if (ctype === 'INTERVAL') { throw new Error('parquet interval not supported') } + if (ltype?.type === 'GEOMETRY') { + return data.map(v => parsers.geometryFromBytes(v)) + } + if (ltype?.type === 'GEOGRAPHY') { + return data.map(v => parsers.geographyFromBytes(v)) + } if (ctype === 'UTF8' || ltype?.type === 'STRING' || utf8 && type === 'BYTE_ARRAY') { - const arr = new Array(data.length) - for (let i = 0; i < arr.length; i++) { - const value = data[i] - arr[i] = value instanceof Uint8Array ? parsers.stringFromBytes(value) : value - } - return arr + return data.map(v => parsers.stringFromBytes(v)) } if (ctype === 'UINT_64' || ltype?.type === 'INTEGER' && ltype.bitWidth === 64 && !ltype.isSigned) { if (data instanceof BigInt64Array) { diff --git a/src/types.d.ts b/src/types.d.ts index bc42633..b78d287 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -8,6 +8,8 @@ export interface ParquetParsers { timestampFromNanoseconds(nanos: bigint): any dateFromDays(days: number): any stringFromBytes(bytes: Uint8Array): any + geometryFromBytes(bytes: Uint8Array): any + geographyFromBytes(bytes: Uint8Array): any } /** @@ -432,3 +434,65 @@ export interface AsyncRowGroup { groupRows: number asyncColumns: AsyncColumn[] } + +/** + * Geometry types based on the GeoJSON specification (RFC 7946) + */ +export type Geometry = + | Point + | MultiPoint + | LineString + | MultiLineString + | Polygon + | MultiPolygon + | GeometryCollection + +/** + * Position is an array of at least two numbers. + * The order should be [longitude, latitude] with optional properties (eg- altitude). + */ +export type Position = number[] + +export interface Point { + type: 'Point' + coordinates: Position +} + +export interface MultiPoint { + type: 'MultiPoint' + coordinates: Position[] +} + +export interface LineString { + type: 'LineString' + coordinates: Position[] +} + +/** + * Each element is one LineString. + */ +export interface MultiLineString { + type: 'MultiLineString' + coordinates: Position[][] +} + +/** + * Each element is a linear ring. + */ +export interface Polygon { + type: 'Polygon' + coordinates: Position[][] +} + +/** + * Each element is one Polygon. + */ +export interface MultiPolygon { + type: 'MultiPolygon' + coordinates: Position[][][] +} + +export interface GeometryCollection { + type: 'GeometryCollection' + geometries: Geometry[] +} diff --git a/src/wkb.js b/src/wkb.js new file mode 100644 index 0000000..5be4344 --- /dev/null +++ b/src/wkb.js @@ -0,0 +1,125 @@ +/** + * WKB (Well-Known Binary) decoder for geometry objects. + * + * @import {DataReader, Geometry} from '../src/types.js' + * @param {DataReader} reader + * @returns {Geometry} geometry object + */ +export function wkbToGeojson(reader) { + const flags = getFlags(reader) + + if (flags.type === 1) { // Point + return { type: 'Point', coordinates: readPosition(reader, flags) } + } else if (flags.type === 2) { // LineString + return { type: 'LineString', coordinates: readLine(reader, flags) } + } else if (flags.type === 3) { // Polygon + return { type: 'Polygon', coordinates: readPolygon(reader, flags) } + } else if (flags.type === 4) { // MultiPoint + const points = [] + for (let i = 0; i < flags.count; i++) { + points.push(readPosition(reader, getFlags(reader))) + } + return { type: 'MultiPoint', coordinates: points } + } else if (flags.type === 5) { // MultiLineString + const lines = [] + for (let i = 0; i < flags.count; i++) { + lines.push(readLine(reader, getFlags(reader))) + } + return { type: 'MultiLineString', coordinates: lines } + } else if (flags.type === 6) { // MultiPolygon + const polygons = [] + for (let i = 0; i < flags.count; i++) { + polygons.push(readPolygon(reader, getFlags(reader))) + } + return { type: 'MultiPolygon', coordinates: polygons } + } else if (flags.type === 7) { // GeometryCollection + const geometries = [] + for (let i = 0; i < flags.count; i++) { + geometries.push(wkbToGeojson(reader)) + } + return { type: 'GeometryCollection', geometries } + } else { + throw new Error(`Unsupported geometry type: ${flags.type}`) + } +} + +/** + * @typedef {object} WkbFlags + * @property {boolean} littleEndian + * @property {number} type + * @property {number} dim + * @property {number} count + */ + +/** + * Extract ISO WKB flags and base geometry type. + * + * @param {DataReader} reader + * @returns {WkbFlags} + */ +function getFlags(reader) { + const { view } = reader + const littleEndian = view.getUint8(reader.offset++) === 1 + const rawType = view.getUint32(reader.offset, littleEndian) + reader.offset += 4 + + const type = rawType % 1000 + const flags = Math.floor(rawType / 1000) + + let count = 0 + if (type > 1 && type <= 7) { + count = view.getUint32(reader.offset, littleEndian) + reader.offset += 4 + } + + // XY, XYZ, XYM, XYZM + let dim = 2 + if (flags) dim++ + if (flags === 3) dim++ + + return { littleEndian, type, dim, count } +} + +/** + * @param {DataReader} reader + * @param {WkbFlags} flags + * @returns {number[]} + */ +function readPosition(reader, flags) { + const points = [] + for (let i = 0; i < flags.dim; i++) { + const coord = reader.view.getFloat64(reader.offset, flags.littleEndian) + reader.offset += 8 + points.push(coord) + } + return points +} + +/** + * @param {DataReader} reader + * @param {WkbFlags} flags + * @returns {number[][]} + */ +function readLine(reader, flags) { + const points = [] + for (let i = 0; i < flags.count; i++) { + points.push(readPosition(reader, flags)) + } + return points +} + +/** + * @param {DataReader} reader + * @param {WkbFlags} flags + * @returns {number[][][]} + */ +function readPolygon(reader, flags) { + const { view } = reader + const rings = [] + for (let r = 0; r < flags.count; r++) { + const count = view.getUint32(reader.offset, flags.littleEndian) + reader.offset += 4 + rings.push(readLine(reader, { ...flags, count })) + } + return rings +} diff --git a/test/convert.test.js b/test/convert.test.js index 00cddcd..68db786 100644 --- a/test/convert.test.js +++ b/test/convert.test.js @@ -38,6 +38,32 @@ describe('convert function', () => { ]) }) + it('decodes geometry logical type with default parser', () => { + const pointWkb = new Uint8Array([ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 128, 89, 64, 0, 0, 0, 0, 0, 0, 224, + 63, + ]) + const data = [pointWkb] + /** @type {SchemaElement} */ + const element = { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOMETRY' } } + expect(convert(data, { element, parsers })).toEqual([ + { type: 'Point', coordinates: [102, 0.5] }, + ]) + }) + + it('decodes geography logical type with default parser', () => { + const pointWkb = new Uint8Array([ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 128, 89, 64, 0, 0, 0, 0, 0, 0, 224, + 63, + ]) + const data = [pointWkb] + /** @type {SchemaElement} */ + const element = { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOGRAPHY' } } + expect(convert(data, { element, parsers })).toEqual([ + { type: 'Point', coordinates: [102, 0.5] }, + ]) + }) + it('converts numbers to DECIMAL', () => { const data = [100, 200] /** @type {SchemaElement} */ @@ -236,13 +262,53 @@ describe('convert function', () => { parsers: { ...parsers, stringFromBytes(/** @type {Uint8Array} */ bytes) { - return `custom-${new TextDecoder().decode(bytes)}` + return bytes && `custom-${new TextDecoder().decode(bytes)}` }, }, } expect(convert(data, columnParser)).toEqual(['custom-foo', undefined]) }) + + it('respects custom parsers - geometryFromBytes', () => { + const pointWkb = new Uint8Array([ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 128, 89, 64, 0, 0, 0, 0, 0, 0, 224, + 63, + ]) + const data = [pointWkb] + /** @type {SchemaElement} */ + const element = { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOMETRY' } } + /** @type {Pick} */ + const columnParser = { + element, + parsers: { + ...parsers, + geometryFromBytes: () => 'custom-geometry', + }, + } + + expect(convert(data, columnParser)).toEqual(['custom-geometry']) + }) + + it('respects custom parsers - geographyFromBytes', () => { + const pointWkb = new Uint8Array([ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 128, 89, 64, 0, 0, 0, 0, 0, 0, 224, + 63, + ]) + const data = [pointWkb] + /** @type {SchemaElement} */ + const element = { name, type: 'BYTE_ARRAY', logical_type: { type: 'GEOGRAPHY' } } + /** @type {Pick} */ + const columnParser = { + element, + parsers: { + ...parsers, + geographyFromBytes: () => 'custom-geojson', + }, + } + + expect(convert(data, columnParser)).toEqual(['custom-geojson']) + }) }) describe('parseFloat16', () => { diff --git a/test/files/geospatial.json b/test/files/geospatial.json new file mode 100644 index 0000000..0408b27 --- /dev/null +++ b/test/files/geospatial.json @@ -0,0 +1,1142 @@ +[ + [ + "all", + "POINT (30 10)", + { + "type": "Point", + "coordinates": [ + 30, + 10 + ] + } + ], + [ + "all", + "LINESTRING (30 10, 10 30, 40 40)", + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10 + ], + [ + 10, + 30 + ], + [ + 40, + 40 + ] + ] + } + ], + [ + "all", + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10 + ], + [ + 40, + 40 + ], + [ + 20, + 40 + ], + [ + 10, + 20 + ], + [ + 30, + 10 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOINT ((30 10))", + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10 + ] + ] + } + ], + [ + "all", + "MULTILINESTRING ((30 10, 10 30, 40 40))", + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10 + ], + [ + 10, + 30 + ], + [ + 40, + 40 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))", + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10 + ], + [ + 40, + 40 + ], + [ + 20, + 40 + ], + [ + 10, + 20 + ], + [ + 30, + 10 + ] + ] + ] + ] + } + ], + [ + "all", + "GEOMETRYCOLLECTION (POINT (30 10), LINESTRING (30 10, 10 30, 40 40), POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10)), MULTIPOINT ((30 10)), MULTILINESTRING ((30 10, 10 30, 40 40)), MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10))))", + { + "type": "GeometryCollection", + "geometries": [ + { + "type": "Point", + "coordinates": [ + 30, + 10 + ] + }, + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10 + ], + [ + 10, + 30 + ], + [ + 40, + 40 + ] + ] + }, + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10 + ], + [ + 40, + 40 + ], + [ + 20, + 40 + ], + [ + 10, + 20 + ], + [ + 30, + 10 + ] + ] + ] + }, + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10 + ] + ] + }, + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10 + ], + [ + 10, + 30 + ], + [ + 40, + 40 + ] + ] + ] + }, + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10 + ], + [ + 40, + 40 + ], + [ + 20, + 40 + ], + [ + 10, + 20 + ], + [ + 30, + 10 + ] + ] + ] + ] + } + ] + } + ], + [ + "all", + "POINT Z (30 10 40)", + { + "type": "Point", + "coordinates": [ + 30, + 10, + 40 + ] + } + ], + [ + "all", + "LINESTRING Z (30 10 40, 10 30 40, 40 40 80)", + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 40 + ], + [ + 10, + 30, + 40 + ], + [ + 40, + 40, + 80 + ] + ] + } + ], + [ + "all", + "POLYGON Z ((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40))", + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 40 + ], + [ + 40, + 40, + 80 + ], + [ + 20, + 40, + 60 + ], + [ + 10, + 20, + 30 + ], + [ + 30, + 10, + 40 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOINT Z ((30 10 40))", + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 40 + ] + ] + } + ], + [ + "all", + "MULTILINESTRING Z ((30 10 40, 10 30 40, 40 40 80))", + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 40 + ], + [ + 10, + 30, + 40 + ], + [ + 40, + 40, + 80 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOLYGON Z (((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40)))", + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 40 + ], + [ + 40, + 40, + 80 + ], + [ + 20, + 40, + 60 + ], + [ + 10, + 20, + 30 + ], + [ + 30, + 10, + 40 + ] + ] + ] + ] + } + ], + [ + "all", + "GEOMETRYCOLLECTION Z (POINT Z (30 10 40), LINESTRING Z (30 10 40, 10 30 40, 40 40 80), POLYGON Z ((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40)), MULTIPOINT Z ((30 10 40)), MULTILINESTRING Z ((30 10 40, 10 30 40, 40 40 80)), MULTIPOLYGON Z (((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40))))", + { + "type": "GeometryCollection", + "geometries": [ + { + "type": "Point", + "coordinates": [ + 30, + 10, + 40 + ] + }, + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 40 + ], + [ + 10, + 30, + 40 + ], + [ + 40, + 40, + 80 + ] + ] + }, + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 40 + ], + [ + 40, + 40, + 80 + ], + [ + 20, + 40, + 60 + ], + [ + 10, + 20, + 30 + ], + [ + 30, + 10, + 40 + ] + ] + ] + }, + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 40 + ] + ] + }, + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 40 + ], + [ + 10, + 30, + 40 + ], + [ + 40, + 40, + 80 + ] + ] + ] + }, + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 40 + ], + [ + 40, + 40, + 80 + ], + [ + 20, + 40, + 60 + ], + [ + 10, + 20, + 30 + ], + [ + 30, + 10, + 40 + ] + ] + ] + ] + } + ] + } + ], + [ + "all", + "POINT M (30 10 300)", + { + "type": "Point", + "coordinates": [ + 30, + 10, + 300 + ] + } + ], + [ + "all", + "LINESTRING M (30 10 300, 10 30 300, 40 40 1600)", + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 300 + ], + [ + 10, + 30, + 300 + ], + [ + 40, + 40, + 1600 + ] + ] + } + ], + [ + "all", + "POLYGON M ((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300))", + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 300 + ], + [ + 40, + 40, + 1600 + ], + [ + 20, + 40, + 800 + ], + [ + 10, + 20, + 200 + ], + [ + 30, + 10, + 300 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOINT M ((30 10 300))", + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 300 + ] + ] + } + ], + [ + "all", + "MULTILINESTRING M ((30 10 300, 10 30 300, 40 40 1600))", + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 300 + ], + [ + 10, + 30, + 300 + ], + [ + 40, + 40, + 1600 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOLYGON M (((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300)))", + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 300 + ], + [ + 40, + 40, + 1600 + ], + [ + 20, + 40, + 800 + ], + [ + 10, + 20, + 200 + ], + [ + 30, + 10, + 300 + ] + ] + ] + ] + } + ], + [ + "all", + "GEOMETRYCOLLECTION M (POINT M (30 10 300), LINESTRING M (30 10 300, 10 30 300, 40 40 1600), POLYGON M ((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300)), MULTIPOINT M ((30 10 300)), MULTILINESTRING M ((30 10 300, 10 30 300, 40 40 1600)), MULTIPOLYGON M (((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300))))", + { + "type": "GeometryCollection", + "geometries": [ + { + "type": "Point", + "coordinates": [ + 30, + 10, + 300 + ] + }, + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 300 + ], + [ + 10, + 30, + 300 + ], + [ + 40, + 40, + 1600 + ] + ] + }, + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 300 + ], + [ + 40, + 40, + 1600 + ], + [ + 20, + 40, + 800 + ], + [ + 10, + 20, + 200 + ], + [ + 30, + 10, + 300 + ] + ] + ] + }, + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 300 + ] + ] + }, + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 300 + ], + [ + 10, + 30, + 300 + ], + [ + 40, + 40, + 1600 + ] + ] + ] + }, + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 300 + ], + [ + 40, + 40, + 1600 + ], + [ + 20, + 40, + 800 + ], + [ + 10, + 20, + 200 + ], + [ + 30, + 10, + 300 + ] + ] + ] + ] + } + ] + } + ], + [ + "all", + "POINT ZM (30 10 40 300)", + { + "type": "Point", + "coordinates": [ + 30, + 10, + 40, + 300 + ] + } + ], + [ + "all", + "LINESTRING ZM (30 10 40 300, 10 30 40 300, 40 40 80 1600)", + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 40, + 300 + ], + [ + 10, + 30, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ] + ] + } + ], + [ + "all", + "POLYGON ZM ((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300))", + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ], + [ + 20, + 40, + 60, + 800 + ], + [ + 10, + 20, + 30, + 200 + ], + [ + 30, + 10, + 40, + 300 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOINT ZM ((30 10 40 300))", + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 40, + 300 + ] + ] + } + ], + [ + "all", + "MULTILINESTRING ZM ((30 10 40 300, 10 30 40 300, 40 40 80 1600))", + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 10, + 30, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ] + ] + ] + } + ], + [ + "all", + "MULTIPOLYGON ZM (((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300)))", + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ], + [ + 20, + 40, + 60, + 800 + ], + [ + 10, + 20, + 30, + 200 + ], + [ + 30, + 10, + 40, + 300 + ] + ] + ] + ] + } + ], + [ + "all", + "GEOMETRYCOLLECTION ZM (POINT ZM (30 10 40 300), LINESTRING ZM (30 10 40 300, 10 30 40 300, 40 40 80 1600), POLYGON ZM ((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300)), MULTIPOINT ZM ((30 10 40 300)), MULTILINESTRING ZM ((30 10 40 300, 10 30 40 300, 40 40 80 1600)), MULTIPOLYGON ZM (((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300))))", + { + "type": "GeometryCollection", + "geometries": [ + { + "type": "Point", + "coordinates": [ + 30, + 10, + 40, + 300 + ] + }, + { + "type": "LineString", + "coordinates": [ + [ + 30, + 10, + 40, + 300 + ], + [ + 10, + 30, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ] + ] + }, + { + "type": "Polygon", + "coordinates": [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ], + [ + 20, + 40, + 60, + 800 + ], + [ + 10, + 20, + 30, + 200 + ], + [ + 30, + 10, + 40, + 300 + ] + ] + ] + }, + { + "type": "MultiPoint", + "coordinates": [ + [ + 30, + 10, + 40, + 300 + ] + ] + }, + { + "type": "MultiLineString", + "coordinates": [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 10, + 30, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ] + ] + ] + }, + { + "type": "MultiPolygon", + "coordinates": [ + [ + [ + [ + 30, + 10, + 40, + 300 + ], + [ + 40, + 40, + 80, + 1600 + ], + [ + 20, + 40, + 60, + 800 + ], + [ + 10, + 20, + 30, + 200 + ], + [ + 30, + 10, + 40, + 300 + ] + ] + ] + ] + } + ] + } + ] +] diff --git a/test/files/geospatial.metadata.json b/test/files/geospatial.metadata.json new file mode 100644 index 0000000..a37300e --- /dev/null +++ b/test/files/geospatial.metadata.json @@ -0,0 +1,221 @@ +{ + "version": 2, + "schema": [ + { + "repetition_type": "REQUIRED", + "name": "schema", + "num_children": 3 + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "group", + "converted_type": "UTF8", + "logical_type": { + "type": "STRING" + } + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "wkt", + "converted_type": "UTF8", + "logical_type": { + "type": "STRING" + } + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "geometry", + "logical_type": { + "type": "GEOMETRY" + } + } + ], + "num_rows": 28, + "row_groups": [ + { + "columns": [ + { + "file_offset": 0, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "PLAIN", + "RLE", + "RLE_DICTIONARY" + ], + "path_in_schema": [ + "group" + ], + "codec": "UNCOMPRESSED", + "num_values": 28, + "total_uncompressed_size": 61, + "total_compressed_size": 61, + "data_page_offset": 25, + "dictionary_page_offset": 4, + "statistics": { + "null_count": 0, + "max_value": "all", + "min_value": "all" + }, + "encoding_stats": [ + { + "page_type": "DICTIONARY_PAGE", + "encoding": "PLAIN", + "count": 1 + }, + { + "page_type": "DATA_PAGE", + "encoding": "RLE_DICTIONARY", + "count": 1 + } + ], + "size_statistics": { + "unencoded_byte_array_data_bytes": 84, + "repetition_level_histogram": [], + "definition_level_histogram": [ + 0, + 28 + ] + } + } + }, + { + "file_offset": 0, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "PLAIN", + "RLE", + "RLE_DICTIONARY" + ], + "path_in_schema": [ + "wkt" + ], + "codec": "UNCOMPRESSED", + "num_values": 28, + "total_uncompressed_size": 2841, + "total_compressed_size": 2841, + "data_page_offset": 2536, + "dictionary_page_offset": 65, + "statistics": { + "null_count": 0, + "max_value": "POLYGON ZM ((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 300))", + "min_value": "GEOMETRYCOLLECTION (POINT (30 10), LINESTRING (30 10, 10 30, 40 40), POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10)), MULTIPOINT ((30 10)), MULTILINESTRING ((30 10, 10 30, 40 40)), MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10))))" + }, + "encoding_stats": [ + { + "page_type": "DICTIONARY_PAGE", + "encoding": "PLAIN", + "count": 1 + }, + { + "page_type": "DATA_PAGE", + "encoding": "RLE_DICTIONARY", + "count": 1 + } + ], + "size_statistics": { + "unencoded_byte_array_data_bytes": 2343, + "repetition_level_histogram": [], + "definition_level_histogram": [ + 0, + 28 + ] + } + } + }, + { + "file_offset": 0, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "PLAIN", + "RLE", + "RLE_DICTIONARY" + ], + "path_in_schema": [ + "geometry" + ], + "codec": "UNCOMPRESSED", + "num_values": 28, + "total_uncompressed_size": 4315, + "total_compressed_size": 4315, + "data_page_offset": 7174, + "dictionary_page_offset": 2906, + "encoding_stats": [ + { + "page_type": "DICTIONARY_PAGE", + "encoding": "PLAIN", + "count": 1 + }, + { + "page_type": "DATA_PAGE", + "encoding": "RLE_DICTIONARY", + "count": 1 + } + ], + "size_statistics": { + "unencoded_byte_array_data_bytes": 4140, + "repetition_level_histogram": [], + "definition_level_histogram": [ + 0, + 28 + ] + }, + "geospatial_statistics": { + "bbox": { + "xmin": 10, + "xmax": 40, + "ymin": 10, + "ymax": 40, + "zmin": 30, + "zmax": 80, + "mmin": 200, + "mmax": 1600 + }, + "geospatial_types": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 1001, + 1002, + 1003, + 1004, + 1005, + 1006, + 1007, + 2001, + 2002, + 2003, + 2004, + 2005, + 2006, + 2007, + 3001, + 3002, + 3003, + 3004, + 3005, + 3006, + 3007 + ] + } + } + } + ], + "total_byte_size": 7217, + "num_rows": 28, + "file_offset": 4, + "total_compressed_size": 7217 + } + ], + "created_by": "parquet-cpp-arrow version 21.0.0", + "metadata_length": 787 +} diff --git a/test/files/geospatial.parquet b/test/files/geospatial.parquet new file mode 100644 index 0000000..9b13f05 Binary files /dev/null and b/test/files/geospatial.parquet differ diff --git a/test/wkb.test.js b/test/wkb.test.js new file mode 100644 index 0000000..dce7377 --- /dev/null +++ b/test/wkb.test.js @@ -0,0 +1,211 @@ +import { describe, expect, it } from 'vitest' +import { wkbToGeojson } from '../src/wkb.js' + +/** + * @param {Uint8Array} buffer + * @returns {import('../src/types.d.ts').DataReader} + */ +function makeReader(buffer) { + return { + view: new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength), + offset: 0, + } +} + +describe('wkbToGeojson', () => { + it('decodes little-endian Point', () => { + const buffer = new Uint8Array([ + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 128, 89, 64, 0, 0, 0, 0, 0, 0, 224, + 63, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Point', + coordinates: [102, 0.5], + }) + }) + + it('decodes big-endian LineString', () => { + const buffer = new Uint8Array([ + 0, 0, 0, 0, 2, 0, 0, 0, 2, 63, 248, 0, 0, 0, 0, 0, 0, 192, 12, 0, + 0, 0, 0, 0, 0, 64, 17, 0, 0, 0, 0, 0, 0, 64, 23, 0, 0, 0, 0, 0, + 0, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'LineString', + coordinates: [ + [1.5, -3.5], + [4.25, 5.75], + ], + }) + }) + + it('decodes little-endian Polygon', () => { + const buffer = new Uint8Array([ + 1, 3, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 63, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 63, 0, 0, 0, 0, 0, 0, 240, + 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Polygon', + coordinates: [ + [ + [0, 0], + [1, 0], + [1, 1], + [0, 0], + ], + ], + }) + }) + + it('decodes little-endian MultiLineString', () => { + const buffer = new Uint8Array([ + 1, 5, 0, 0, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 240, 63, 0, 0, 0, 0, 0, 0, 240, 63, 0, 0, 0, 0, 0, 0, + 0, 64, 0, 0, 0, 0, 0, 0, 0, 64, 1, 2, 0, 0, 0, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, + 0, 16, 64, 0, 0, 0, 0, 0, 0, 16, 64, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'MultiLineString', + coordinates: [ + [ + [1, 1], + [2, 2], + ], + [ + [3, 3], + [4, 4], + ], + ], + }) + }) + + it('decodes mixed-endian MultiPoint', () => { + const buffer = new Uint8Array([ + 1, 4, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 64, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 1, 191, 240, 0, 0, 0, + 0, 0, 0, 63, 224, 0, 0, 0, 0, 0, 0, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'MultiPoint', + coordinates: [ + [2, 3], + [-1, 0.5], + ], + }) + }) + + it('decodes nested MultiPolygon', () => { + const buffer = new Uint8Array([ + 1, 6, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, + 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, + 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'MultiPolygon', + coordinates: [ + [ + [ + [0, 0], + [0, 2], + [2, 2], + [0, 0], + ], + ], + ], + }) + }) + + it('decodes GeometryCollection', () => { + const buffer = new Uint8Array([ + 1, 7, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, + 63, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 2, 0, 0, 0, 2, 64, 8, 0, + 0, 0, 0, 0, 0, 64, 16, 0, 0, 0, 0, 0, 0, 64, 20, 0, 0, 0, 0, 0, 0, + 64, 24, 0, 0, 0, 0, 0, 0, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'GeometryCollection', + geometries: [ + { type: 'Point', coordinates: [1, 2] }, + { + type: 'LineString', + coordinates: [ + [3, 4], + [5, 6], + ], + }, + ], + }) + }) + + it('throws on unsupported geometry type', () => { + const buffer = new Uint8Array([ + 1, 99, 0, 0, 0, + ]) + + expect(() => wkbToGeojson(makeReader(buffer))).toThrowError('Unsupported geometry type: 99') + }) + + it('decodes ISO WKB Point with Z/M flags', () => { + const buffer = new Uint8Array([ + 1, + 185, 11, 0, 0, + 0, 0, 0, 0, 0, 0, 240, 63, + 0, 0, 0, 0, 0, 0, 0, 64, + 0, 0, 0, 0, 0, 0, 8, 64, + 0, 0, 0, 0, 0, 0, 16, 64, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Point', + coordinates: [1, 2, 3, 4], + }) + }) + + it('decodes point encoded with dimensional offsets', () => { + const buffer = new Uint8Array([ + 1, 185, 11, 0, 0, 0, 0, 0, 0, 0, 0, 20, 64, 0, 0, 0, 0, 0, + 0, 24, 64, 0, 0, 0, 0, 0, 0, 28, 64, 0, 0, 0, 0, 0, 0, 32, 64, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Point', + coordinates: [5, 6, 7, 8], + }) + }) + + it('decodes point with M-only dimensional offset', () => { + const buffer = new Uint8Array([ + 1, 209, 7, 0, 0, 0, 0, 0, 0, 0, 0, 34, 64, 0, 0, 0, 0, 0, + 0, 36, 64, 0, 0, 0, 0, 0, 0, 38, 64, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Point', + coordinates: [9, 10, 11], + }) + }) + + it('decodes point with Z-only dimensional offset', () => { + const buffer = new Uint8Array([ + 1, 233, 3, 0, 0, 0, 0, 0, 0, 0, 0, 40, 64, 0, 0, 0, 0, 0, + 0, 42, 64, 0, 0, 0, 0, 0, 0, 44, 64, + ]) + + expect(wkbToGeojson(makeReader(buffer))).toEqual({ + type: 'Point', + coordinates: [12, 13, 14], + }) + }) +})