mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Geospatial stats (#13)
This commit is contained in:
parent
3a2e0203aa
commit
bfb1d74bf8
10
README.md
10
README.md
@ -6,7 +6,7 @@
|
||||
[](https://www.npmjs.com/package/hyparquet-writer)
|
||||
[](https://github.com/hyparam/hyparquet-writer/actions)
|
||||
[](https://opensource.org/licenses/MIT)
|
||||

|
||||

|
||||
[](https://www.npmjs.com/package/hyparquet-writer?activeTab=dependencies)
|
||||
|
||||
Hyparquet Writer is a JavaScript library for writing [Apache Parquet](https://parquet.apache.org) files. It is designed to be lightweight, fast and store data very efficiently. It is a companion to the [hyparquet](https://github.com/hyparam/hyparquet) library, which is a JavaScript library for reading parquet files.
|
||||
@ -30,8 +30,10 @@ const arrayBuffer = parquetWriteBuffer({
|
||||
})
|
||||
```
|
||||
|
||||
Note: if `type` is not provided, the type will be guessed from the data. The supported types are a superset of the parquet types:
|
||||
Note: if `type` is not provided, the type will be guessed from the data. The supported `BasicType` are a superset of the parquet primitive types:
|
||||
|
||||
| Basic Type | Equivalent Schema Element |
|
||||
|------|----------------|
|
||||
| `BOOLEAN` | `{ type: 'BOOLEAN' }` |
|
||||
| `INT32` | `{ type: 'INT32' }` |
|
||||
| `INT64` | `{ type: 'INT64' }` |
|
||||
@ -43,10 +45,12 @@ Note: if `type` is not provided, the type will be guessed from the data. The sup
|
||||
| `TIMESTAMP` | `{ type: 'INT64', converted_type: 'TIMESTAMP_MILLIS' }` |
|
||||
| `UUID` | `{ type: 'FIXED_LEN_BYTE_ARRAY', type_length: 16, logical_type: { type: 'UUID' } }` |
|
||||
| `FLOAT16` | `{ type: 'FIXED_LEN_BYTE_ARRAY', type_length: 2, logical_type: { type: 'FLOAT16' } }` |
|
||||
| `GEOMETRY` | `{ type: 'BYTE_ARRAY', logical_type: { type: 'GEOMETRY' } }` |
|
||||
| `GEOGRAPHY` | `{ type: 'BYTE_ARRAY', logical_type: { type: 'GEOGRAPHY' } }` |
|
||||
|
||||
More types are supported but require defining the `schema` explicitly. See the [advanced usage](#advanced-usage) section for more details.
|
||||
|
||||
### Node.js Write to Local Parquet File
|
||||
### Write to Local Parquet File (nodejs)
|
||||
|
||||
To write a local parquet file in node.js use `parquetWriteFile` with arguments `filename` and `columnData`:
|
||||
|
||||
|
||||
@ -52,15 +52,15 @@
|
||||
"test": "vitest run"
|
||||
},
|
||||
"dependencies": {
|
||||
"hyparquet": "1.20.0"
|
||||
"hyparquet": "1.20.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/eslint-parser": "7.28.5",
|
||||
"@types/node": "24.9.1",
|
||||
"@vitest/coverage-v8": "4.0.2",
|
||||
"@vitest/coverage-v8": "4.0.3",
|
||||
"eslint": "9.38.0",
|
||||
"eslint-plugin-jsdoc": "61.1.7",
|
||||
"eslint-plugin-jsdoc": "61.1.9",
|
||||
"typescript": "5.9.3",
|
||||
"vitest": "4.0.2"
|
||||
"vitest": "4.0.3"
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import { ByteWriter } from './bytewriter.js'
|
||||
import { writeDataPageV2, writePageHeader } from './datapage.js'
|
||||
import { encodeListValues } from './dremel.js'
|
||||
import { geospatialStatistics } from './geospatial.js'
|
||||
import { writePlain } from './plain.js'
|
||||
import { snappyCompress } from './snappy.js'
|
||||
import { unconvert } from './unconvert.js'
|
||||
@ -32,8 +33,11 @@ export function writeColumn(writer, column, values, stats) {
|
||||
/** @type {Encoding[]} */
|
||||
const encodings = []
|
||||
|
||||
const isGeospatial = element?.logical_type?.type === 'GEOMETRY' || element?.logical_type?.type === 'GEOGRAPHY'
|
||||
|
||||
// Compute statistics
|
||||
const statistics = stats ? getStatistics(values, element) : undefined
|
||||
const statistics = stats ? getStatistics(values) : undefined
|
||||
const geospatial_statistics = stats && isGeospatial ? geospatialStatistics(values) : undefined
|
||||
|
||||
// dictionary encoding
|
||||
let dictionary_page_offset
|
||||
@ -80,6 +84,7 @@ export function writeColumn(writer, column, values, stats) {
|
||||
data_page_offset,
|
||||
dictionary_page_offset,
|
||||
statistics,
|
||||
geospatial_statistics,
|
||||
}
|
||||
}
|
||||
|
||||
@ -135,14 +140,9 @@ function writeDictionaryPage(writer, column, dictionary) {
|
||||
* @import {ColumnMetaData, DecodedArray, Encoding, ParquetType, SchemaElement, Statistics} from 'hyparquet'
|
||||
* @import {ColumnEncoder, ListValues, Writer} from '../src/types.js'
|
||||
* @param {DecodedArray} values
|
||||
* @param {SchemaElement} element
|
||||
* @returns {Statistics | undefined}
|
||||
* @returns {Statistics}
|
||||
*/
|
||||
function getStatistics(values, element) {
|
||||
const ltype = element?.logical_type?.type
|
||||
const isGeospatial = ltype === 'GEOMETRY' || ltype === 'GEOGRAPHY'
|
||||
if (isGeospatial) return
|
||||
|
||||
function getStatistics(values) {
|
||||
let min_value = undefined
|
||||
let max_value = undefined
|
||||
let null_count = 0n
|
||||
@ -151,12 +151,9 @@ function getStatistics(values, element) {
|
||||
null_count++
|
||||
continue
|
||||
}
|
||||
if (min_value === undefined || value < min_value) {
|
||||
min_value = value
|
||||
}
|
||||
if (max_value === undefined || value > max_value) {
|
||||
max_value = value
|
||||
}
|
||||
if (typeof value === 'object') continue // skip objects
|
||||
if (min_value === undefined || value < min_value) min_value = value
|
||||
if (max_value === undefined || value > max_value) max_value = value
|
||||
}
|
||||
return { min_value, max_value, null_count }
|
||||
}
|
||||
|
||||
149
src/geospatial.js
Normal file
149
src/geospatial.js
Normal file
@ -0,0 +1,149 @@
|
||||
/**
|
||||
* Compute geospatial statistics for GEOMETRY and GEOGRAPHY columns.
|
||||
*
|
||||
* @import {BoundingBox, DecodedArray, Geometry, GeospatialStatistics} from 'hyparquet/src/types.js'
|
||||
* @param {DecodedArray} values
|
||||
* @returns {GeospatialStatistics | undefined}
|
||||
*/
|
||||
export function geospatialStatistics(values) {
|
||||
/** @type {Set<number>} */
|
||||
const typeCodes = new Set()
|
||||
/** @type {BoundingBox | undefined} */
|
||||
let bbox
|
||||
|
||||
for (const value of values) {
|
||||
if (value === null || value === undefined) continue
|
||||
if (typeof value !== 'object') {
|
||||
throw new Error('geospatial column expects GeoJSON geometries')
|
||||
}
|
||||
bbox = extendBoundsFromGeometry(bbox, value)
|
||||
typeCodes.add(geometryTypeCodeWithDimension(value))
|
||||
}
|
||||
|
||||
if (typeCodes.size || bbox) {
|
||||
return {
|
||||
bbox,
|
||||
// Geospatial type codes of all instances, or an empty list if not known
|
||||
geospatial_types: typeCodes.size ? Array.from(typeCodes).sort((a, b) => a - b) : [],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {BoundingBox | undefined} bbox
|
||||
* @param {Geometry} geometry
|
||||
* @returns {BoundingBox | undefined}
|
||||
*/
|
||||
function extendBoundsFromGeometry(bbox, geometry) {
|
||||
if (geometry.type === 'GeometryCollection') {
|
||||
for (const child of geometry.geometries || []) {
|
||||
bbox = extendBoundsFromGeometry(bbox, child)
|
||||
}
|
||||
return bbox
|
||||
}
|
||||
return extendBoundsFromCoordinates(bbox, geometry.coordinates)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {BoundingBox | undefined} bbox
|
||||
* @param {any[]} coordinates
|
||||
* @returns {BoundingBox | undefined}
|
||||
*/
|
||||
function extendBoundsFromCoordinates(bbox, coordinates) {
|
||||
if (typeof coordinates[0] === 'number') {
|
||||
return grow(bbox, coordinates)
|
||||
}
|
||||
for (const child of coordinates) {
|
||||
bbox = extendBoundsFromCoordinates(bbox, child)
|
||||
}
|
||||
return bbox
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize or expand bbox with a single position [x,y,(z),(m)].
|
||||
* @param {BoundingBox | undefined} bbox
|
||||
* @param {number[]} position
|
||||
* @returns {BoundingBox | undefined}
|
||||
*/
|
||||
function grow(bbox, position) {
|
||||
const x = position[0]
|
||||
const y = position[1]
|
||||
if (!Number.isFinite(x) || !Number.isFinite(y)) return bbox
|
||||
|
||||
if (!bbox) {
|
||||
bbox = { xmin: x, ymin: y, xmax: x, ymax: y }
|
||||
} else {
|
||||
updateAxis(bbox, 'xmin', 'xmax', x)
|
||||
updateAxis(bbox, 'ymin', 'ymax', y)
|
||||
}
|
||||
|
||||
if (position.length > 2) updateAxis(bbox, 'zmin', 'zmax', position[2])
|
||||
if (position.length > 3) updateAxis(bbox, 'mmin', 'mmax', position[3])
|
||||
return bbox
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {BoundingBox} bbox
|
||||
* @param {'xmin' | 'ymin' | 'zmin' | 'mmin'} minKey
|
||||
* @param {'xmax' | 'ymax' | 'zmax' | 'mmax'} maxKey
|
||||
* @param {number | undefined} value
|
||||
*/
|
||||
function updateAxis(bbox, minKey, maxKey, value) {
|
||||
if (value === undefined || !Number.isFinite(value)) return
|
||||
if (bbox[minKey] === undefined || value < bbox[minKey]) bbox[minKey] = value
|
||||
if (bbox[maxKey] === undefined || value > bbox[maxKey]) bbox[maxKey] = value
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Geometry} geometry
|
||||
* @returns {number}
|
||||
*/
|
||||
function geometryTypeCodeWithDimension(geometry) {
|
||||
const base = geometryTypeCodes[geometry.type]
|
||||
if (base === undefined) throw new Error(`unknown geometry type: ${geometry.type}`)
|
||||
const dim = inferGeometryDimensions(geometry)
|
||||
if (dim === 2) return base
|
||||
if (dim === 3) return base + 1000
|
||||
if (dim === 4) return base + 3000
|
||||
throw new Error(`unsupported geometry dimensions: ${dim}`)
|
||||
}
|
||||
|
||||
const geometryTypeCodes = {
|
||||
Point: 1,
|
||||
LineString: 2,
|
||||
Polygon: 3,
|
||||
MultiPoint: 4,
|
||||
MultiLineString: 5,
|
||||
MultiPolygon: 6,
|
||||
GeometryCollection: 7,
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the maximum coordinate dimensions for the geometry.
|
||||
* @param {Geometry} geometry
|
||||
* @returns {number}
|
||||
*/
|
||||
function inferGeometryDimensions(geometry) {
|
||||
if (geometry.type === 'GeometryCollection') {
|
||||
let maxDim = 0
|
||||
for (const child of geometry.geometries || []) {
|
||||
maxDim = Math.max(maxDim, inferGeometryDimensions(child))
|
||||
}
|
||||
return maxDim || 2
|
||||
}
|
||||
return inferCoordinateDimensions(geometry.coordinates)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {any[]} value
|
||||
* @returns {number}
|
||||
*/
|
||||
function inferCoordinateDimensions(value) {
|
||||
if (!value.length) return 2
|
||||
if (typeof value[0] === 'number') return value.length
|
||||
let maxDim = 0
|
||||
for (const item of value) {
|
||||
maxDim = Math.max(maxDim, inferCoordinateDimensions(item))
|
||||
}
|
||||
return maxDim || 2
|
||||
}
|
||||
@ -61,6 +61,19 @@ export function writeMetadata(writer, metadata) {
|
||||
field_2: c.meta_data.size_statistics.repetition_level_histogram,
|
||||
field_3: c.meta_data.size_statistics.definition_level_histogram,
|
||||
},
|
||||
field_17: c.meta_data.geospatial_statistics && {
|
||||
field_1: c.meta_data.geospatial_statistics.bbox && {
|
||||
field_1: c.meta_data.geospatial_statistics.bbox.xmin,
|
||||
field_2: c.meta_data.geospatial_statistics.bbox.xmax,
|
||||
field_3: c.meta_data.geospatial_statistics.bbox.ymin,
|
||||
field_4: c.meta_data.geospatial_statistics.bbox.ymax,
|
||||
field_5: c.meta_data.geospatial_statistics.bbox.zmin,
|
||||
field_6: c.meta_data.geospatial_statistics.bbox.zmax,
|
||||
field_7: c.meta_data.geospatial_statistics.bbox.mmin,
|
||||
field_8: c.meta_data.geospatial_statistics.bbox.mmax,
|
||||
},
|
||||
field_2: c.meta_data.geospatial_statistics.geospatial_types,
|
||||
},
|
||||
},
|
||||
field_4: c.offset_index_offset,
|
||||
field_5: c.offset_index_length,
|
||||
|
||||
17
src/wkb.js
17
src/wkb.js
@ -35,7 +35,10 @@ function writeGeometry(writer, geometry) {
|
||||
} else if (geometry.type === 'LineString') {
|
||||
writeLine(writer, geometry.coordinates, dim)
|
||||
} else if (geometry.type === 'Polygon') {
|
||||
writePolygon(writer, geometry.coordinates, dim)
|
||||
writer.appendUint32(geometry.coordinates.length)
|
||||
for (const ring of geometry.coordinates) {
|
||||
writeLine(writer, ring, dim)
|
||||
}
|
||||
} else if (geometry.type === 'MultiPoint') {
|
||||
writer.appendUint32(geometry.coordinates.length)
|
||||
for (const coordinates of geometry.coordinates) {
|
||||
@ -87,18 +90,6 @@ function writeLine(writer, coordinates, dim) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ByteWriter} writer
|
||||
* @param {Position[][]} rings
|
||||
* @param {number} dimensions
|
||||
*/
|
||||
function writePolygon(writer, rings, dimensions) {
|
||||
writer.appendUint32(rings.length)
|
||||
for (const ring of rings) {
|
||||
writeLine(writer, ring, dimensions)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Geometry['type']} type
|
||||
* @returns {number}
|
||||
|
||||
93
test/geospatial.test.js
Normal file
93
test/geospatial.test.js
Normal file
@ -0,0 +1,93 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { geospatialStatistics } from '../src/geospatial.js'
|
||||
|
||||
describe('geospatialStatistics', () => {
|
||||
it('computes bounding boxes and geospatial type codes for nested inputs', () => {
|
||||
const result = geospatialStatistics([
|
||||
null,
|
||||
undefined,
|
||||
{ type: 'Point', coordinates: [1, 2] },
|
||||
{
|
||||
type: 'LineString',
|
||||
coordinates: [
|
||||
[5, -1, 10],
|
||||
[0, 3, -5],
|
||||
[2, 2, undefined],
|
||||
[6, 1, Infinity],
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'Polygon',
|
||||
coordinates: [
|
||||
[
|
||||
[9, 9, 1, 5],
|
||||
[9, 10, 3, 5],
|
||||
[8, 9, -4, 8],
|
||||
[7, 8, Infinity, Infinity],
|
||||
],
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'MultiPoint',
|
||||
coordinates: [
|
||||
[-5, -5, 0, -10],
|
||||
[4, 4, 12, undefined],
|
||||
],
|
||||
},
|
||||
{ type: 'MultiPolygon', coordinates: [] },
|
||||
{
|
||||
type: 'MultiLineString',
|
||||
coordinates: [
|
||||
[
|
||||
[
|
||||
[Infinity, 0],
|
||||
],
|
||||
],
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'GeometryCollection',
|
||||
geometries: [
|
||||
{ type: 'Point', coordinates: [2, -3, 7, 9] },
|
||||
{ type: 'MultiPoint', coordinates: [[60, 10, 0, 11], [3, 6]] },
|
||||
],
|
||||
},
|
||||
{ type: 'GeometryCollection', geometries: [] },
|
||||
])
|
||||
|
||||
expect(result).toEqual({
|
||||
bbox: {
|
||||
xmin: -5,
|
||||
xmax: 60,
|
||||
ymin: -5,
|
||||
ymax: 10,
|
||||
zmin: -5,
|
||||
zmax: 12,
|
||||
mmin: -10,
|
||||
mmax: 11,
|
||||
},
|
||||
geospatial_types: [1, 5, 6, 7, 1002, 3003, 3004, 3007],
|
||||
})
|
||||
})
|
||||
|
||||
it('omits geospatial statistics when only null-like values are present', () => {
|
||||
const result = geospatialStatistics([null, undefined, null])
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
it('tracks type codes even when coordinates are empty', () => {
|
||||
const result = geospatialStatistics([
|
||||
{ type: 'Point', coordinates: [] },
|
||||
])
|
||||
expect(result).toEqual({
|
||||
bbox: undefined,
|
||||
geospatial_types: [1],
|
||||
})
|
||||
})
|
||||
|
||||
it('throws on invalid value types and geometry definitions', () => {
|
||||
expect(() => geospatialStatistics(['oops'])).toThrow('geospatial column expects GeoJSON geometries')
|
||||
expect(() => geospatialStatistics([{ type: 'Unknown', coordinates: [] }])).toThrow('unknown geometry type: Unknown')
|
||||
expect(() => geospatialStatistics([{ type: 'Point', coordinates: [0, 0, 0, 0, 0] }])).toThrow('unsupported geometry dimensions: 5')
|
||||
})
|
||||
})
|
||||
@ -36,6 +36,94 @@ describe('writeMetadata', () => {
|
||||
|
||||
expect(outputMetadata).toEqual(withKvMetadata)
|
||||
})
|
||||
|
||||
it('writes extended column metadata fields', () => {
|
||||
const writer = new ByteWriter()
|
||||
writer.appendUint32(0x31524150)
|
||||
|
||||
/** @type {FileMetaData} */
|
||||
const extendedMetadata = {
|
||||
version: 2,
|
||||
created_by: 'hyparquet',
|
||||
schema: [
|
||||
{ name: 'root', num_children: 1 },
|
||||
{
|
||||
name: 'geo',
|
||||
type: 'BYTE_ARRAY',
|
||||
repetition_type: 'REQUIRED',
|
||||
logical_type: { type: 'GEOGRAPHY', crs: 'EPSG:4326', algorithm: 'KARNEY' },
|
||||
},
|
||||
],
|
||||
num_rows: 1n,
|
||||
row_groups: [{
|
||||
columns: [{
|
||||
file_path: 'part-0.parquet',
|
||||
file_offset: 4n,
|
||||
meta_data: {
|
||||
type: 'BYTE_ARRAY',
|
||||
encodings: ['PLAIN', 'RLE'],
|
||||
path_in_schema: [],
|
||||
codec: 'SNAPPY',
|
||||
num_values: 1n,
|
||||
total_uncompressed_size: 10n,
|
||||
total_compressed_size: 8n,
|
||||
key_value_metadata: [{ key: 'chunk', value: 'value' }],
|
||||
data_page_offset: 4n,
|
||||
index_page_offset: 12n,
|
||||
dictionary_page_offset: 20n,
|
||||
statistics: {
|
||||
null_count: 0n,
|
||||
min_value: 'a',
|
||||
max_value: 'z',
|
||||
},
|
||||
encoding_stats: [{ page_type: 'DATA_PAGE', encoding: 'PLAIN', count: 1 }],
|
||||
bloom_filter_offset: 30n,
|
||||
bloom_filter_length: 4,
|
||||
size_statistics: {
|
||||
unencoded_byte_array_data_bytes: 5n,
|
||||
repetition_level_histogram: [1n, 0n],
|
||||
definition_level_histogram: [2n, 0n],
|
||||
},
|
||||
geospatial_statistics: {
|
||||
bbox: {
|
||||
xmin: 0,
|
||||
xmax: 10,
|
||||
ymin: -5,
|
||||
ymax: 5,
|
||||
zmin: 1,
|
||||
zmax: 2,
|
||||
mmin: 3,
|
||||
mmax: 4,
|
||||
},
|
||||
geospatial_types: [0, 1],
|
||||
},
|
||||
},
|
||||
offset_index_offset: 40n,
|
||||
offset_index_length: 16,
|
||||
column_index_offset: 60n,
|
||||
column_index_length: 24,
|
||||
encrypted_column_metadata: new Uint8Array([7, 8, 9]),
|
||||
}],
|
||||
total_byte_size: 64n,
|
||||
num_rows: 1n,
|
||||
sorting_columns: [{
|
||||
column_idx: 0,
|
||||
descending: true,
|
||||
nulls_first: false,
|
||||
}],
|
||||
file_offset: 4n,
|
||||
total_compressed_size: 8n,
|
||||
}],
|
||||
key_value_metadata: [{ key: 'meta', value: 'data' }],
|
||||
metadata_length: 223,
|
||||
}
|
||||
|
||||
writeMetadata(writer, extendedMetadata)
|
||||
writer.appendUint32(0x31524150)
|
||||
|
||||
const outputMetadata = parquetMetadata(writer.getBuffer())
|
||||
expect(outputMetadata).toEqual(extendedMetadata)
|
||||
})
|
||||
})
|
||||
|
||||
describe('logicalType', () => {
|
||||
|
||||
61
test/write.geospatial.test.js
Normal file
61
test/write.geospatial.test.js
Normal file
@ -0,0 +1,61 @@
|
||||
import { parquetMetadata } from 'hyparquet'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { parquetWriteBuffer } from '../src/index.js'
|
||||
|
||||
/**
|
||||
* @import {ColumnSource} from '../src/types.js'
|
||||
*/
|
||||
|
||||
describe('geospatial statistics', () => {
|
||||
it('writes geospatial statistics into column metadata', () => {
|
||||
/** @type {ColumnSource[]} */
|
||||
const columnData = [{
|
||||
name: 'geometry',
|
||||
type: 'GEOMETRY',
|
||||
data: [
|
||||
{ type: 'Point', coordinates: [10, 5, 100, 2] },
|
||||
null,
|
||||
{
|
||||
type: 'LineString',
|
||||
coordinates: [
|
||||
[-20, -10, 50, 5],
|
||||
[40, 30, 75, -5],
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'GeometryCollection',
|
||||
geometries: [
|
||||
{ type: 'Point', coordinates: [5, 15] },
|
||||
{
|
||||
type: 'MultiPoint',
|
||||
coordinates: [
|
||||
[0, -5],
|
||||
[60, 10],
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}]
|
||||
|
||||
const buffer = parquetWriteBuffer({ columnData })
|
||||
const metadata = parquetMetadata(buffer)
|
||||
const columnMeta = metadata.row_groups[0].columns[0].meta_data
|
||||
|
||||
expect(columnMeta?.statistics).toEqual({ null_count: 1n })
|
||||
expect(columnMeta?.geospatial_statistics).toEqual({
|
||||
bbox: {
|
||||
xmin: -20,
|
||||
xmax: 60,
|
||||
ymin: -10,
|
||||
ymax: 30,
|
||||
zmin: 50,
|
||||
zmax: 100,
|
||||
mmin: -5,
|
||||
mmax: 5,
|
||||
},
|
||||
// sort numerically not by string order
|
||||
geospatial_types: [7, 3001, 3002],
|
||||
})
|
||||
})
|
||||
})
|
||||
Loading…
Reference in New Issue
Block a user