mirror of
https://github.com/asadbek064/hyparquet.git
synced 2026-02-22 20:31:32 +00:00
Minimal support for GeoParquet (#133)
* Initial support for GeoParquet * pr comments * convert crs * add test file + expected JSON files * add sentence to README * Apply suggestion from @platypii Co-authored-by: Kenny Daniel <platypii@gmail.com> * PR comments * update README * review comment --------- Co-authored-by: Kenny Daniel <platypii@gmail.com>
This commit is contained in:
parent
c427924668
commit
e8b1c8e570
@ -226,6 +226,8 @@ Hyparquet [respects](https://parquet.apache.org/docs/file-format/implementations
|
||||
|
||||
This behavior can be changed by setting the `utf8` option to `false` in functions such as `parquetRead`. Note that this option only affects `BYTE_ARRAY` columns without an annotation. Columns with a `STRING`, `ENUM` or `UUID` logical type, for example, will be decoded as expected by the specification.
|
||||
|
||||
Note also that hyparquet checks by default if the metadata contains a `geo` key to detect [GeoParquet](https://geoparquet.org/). If so, any geospatial column will be marked with the GEOMETRY or GEOGRAPHY logical type and decoded as WKB binary data to GeoJSON geometries, regardless of the `utf8` option. Set the `geoparquet` option to `false` to disable this behavior.
|
||||
|
||||
## Supported Parquet Files
|
||||
|
||||
The parquet format is known to be a sprawling format which includes options for a wide array of compression schemes, encoding types, and data structures.
|
||||
|
||||
37
src/geoparquet.js
Normal file
37
src/geoparquet.js
Normal file
@ -0,0 +1,37 @@
|
||||
/**
|
||||
* @import {KeyValue, LogicalType, SchemaElement} from '../src/types.d.ts'
|
||||
* @param {SchemaElement[]} schema
|
||||
* @param {KeyValue[] | undefined} key_value_metadata
|
||||
* @returns {void}
|
||||
*/
|
||||
export function markGeoColumns(schema, key_value_metadata) {
|
||||
// Prepare the list of GeoParquet columns
|
||||
/** @type {Map<string, LogicalType>} */
|
||||
const columns = new Map()
|
||||
const geo = key_value_metadata?.find(({ key }) => key === 'geo')?.value
|
||||
const decodedColumns = (geo && JSON.parse(geo)?.columns) ?? {}
|
||||
for (const [name, column] of Object.entries(decodedColumns)) {
|
||||
if (column.encoding !== 'WKB') {
|
||||
continue
|
||||
}
|
||||
const type = column.edges === 'spherical' ? 'GEOGRAPHY' : 'GEOMETRY'
|
||||
const id = column.crs?.id ?? column.crs?.ids?.[0]
|
||||
const crs = id ? `${id.authority}:${id.code.toString()}` : undefined
|
||||
// Note: we can't infer GEOGRAPHY's algorithm from GeoParquet
|
||||
columns.set(name, { type, crs })
|
||||
}
|
||||
|
||||
// Mark schema elements with logical type
|
||||
// Only look at root-level columns of type BYTE_ARRAY without existing logical_type
|
||||
for (let i = 1; i < schema.length; i++) { // skip root
|
||||
const element = schema[i]
|
||||
const { logical_type, name, num_children, repetition_type, type } = element
|
||||
if (num_children) {
|
||||
i += num_children
|
||||
continue // skip the element and its children
|
||||
}
|
||||
if (type === 'BYTE_ARRAY' && logical_type === undefined && repetition_type !== 'REPEATED') {
|
||||
element.logical_type = columns.get(name)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2,6 +2,7 @@ import { CompressionCodec, ConvertedType, EdgeInterpolationAlgorithm, Encoding,
|
||||
import { DEFAULT_PARSERS, parseDecimal, parseFloat16 } from './convert.js'
|
||||
import { getSchemaPath } from './schema.js'
|
||||
import { deserializeTCompactProtocol } from './thrift.js'
|
||||
import { markGeoColumns } from './geoparquet.js'
|
||||
|
||||
export const defaultInitialFetchSize = 1 << 19 // 512kb
|
||||
|
||||
@ -34,7 +35,7 @@ function decode(/** @type {Uint8Array} */ value) {
|
||||
* @param {MetadataOptions & { initialFetchSize?: number }} options initial fetch size in bytes (default 512kb)
|
||||
* @returns {Promise<FileMetaData>} parquet metadata object
|
||||
*/
|
||||
export async function parquetMetadataAsync(asyncBuffer, { parsers, initialFetchSize = defaultInitialFetchSize } = {}) {
|
||||
export async function parquetMetadataAsync(asyncBuffer, { parsers, initialFetchSize = defaultInitialFetchSize, geoparquet = true } = {}) {
|
||||
if (!asyncBuffer || !(asyncBuffer.byteLength >= 0)) throw new Error('parquet expected AsyncBuffer')
|
||||
|
||||
// fetch last bytes (footer) of the file
|
||||
@ -64,21 +65,22 @@ export async function parquetMetadataAsync(asyncBuffer, { parsers, initialFetchS
|
||||
const combinedView = new Uint8Array(combinedBuffer)
|
||||
combinedView.set(new Uint8Array(metadataBuffer))
|
||||
combinedView.set(new Uint8Array(footerBuffer), footerOffset - metadataOffset)
|
||||
return parquetMetadata(combinedBuffer, { parsers })
|
||||
return parquetMetadata(combinedBuffer, { parsers, geoparquet })
|
||||
} else {
|
||||
// parse metadata from the footer
|
||||
return parquetMetadata(footerBuffer, { parsers })
|
||||
return parquetMetadata(footerBuffer, { parsers, geoparquet })
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read parquet metadata from a buffer synchronously.
|
||||
*
|
||||
* @import {KeyValue} from '../src/types.d.ts'
|
||||
* @param {ArrayBuffer} arrayBuffer parquet file footer
|
||||
* @param {MetadataOptions} options metadata parsing options
|
||||
* @returns {FileMetaData} parquet metadata object
|
||||
*/
|
||||
export function parquetMetadata(arrayBuffer, { parsers } = {}) {
|
||||
export function parquetMetadata(arrayBuffer, { parsers, geoparquet = true } = {}) {
|
||||
if (!(arrayBuffer instanceof ArrayBuffer)) throw new Error('parquet expected ArrayBuffer')
|
||||
const view = new DataView(arrayBuffer)
|
||||
|
||||
@ -185,12 +187,17 @@ export function parquetMetadata(arrayBuffer, { parsers } = {}) {
|
||||
total_compressed_size: rowGroup.field_6,
|
||||
ordinal: rowGroup.field_7,
|
||||
}))
|
||||
/** @type {KeyValue[] | undefined} */
|
||||
const key_value_metadata = metadata.field_5?.map((/** @type {any} */ keyValue) => ({
|
||||
key: decode(keyValue.field_1),
|
||||
value: decode(keyValue.field_2),
|
||||
}))
|
||||
const created_by = decode(metadata.field_6)
|
||||
|
||||
if (geoparquet) {
|
||||
markGeoColumns(schema, key_value_metadata)
|
||||
}
|
||||
|
||||
return {
|
||||
version,
|
||||
schema,
|
||||
|
||||
@ -18,7 +18,7 @@ export async function parquetQuery(options) {
|
||||
if (!options.file || !(options.file.byteLength >= 0)) {
|
||||
throw new Error('parquet expected AsyncBuffer')
|
||||
}
|
||||
options.metadata ??= await parquetMetadataAsync(options.file)
|
||||
options.metadata ??= await parquetMetadataAsync(options.file, options)
|
||||
|
||||
const { metadata, rowStart = 0, columns, orderBy, filter } = options
|
||||
if (rowStart < 0) throw new Error('parquet rowStart must be positive')
|
||||
@ -122,7 +122,7 @@ export async function parquetQuery(options) {
|
||||
*/
|
||||
async function parquetReadRows(options) {
|
||||
const { file, rows } = options
|
||||
options.metadata ||= await parquetMetadataAsync(file)
|
||||
options.metadata ||= await parquetMetadataAsync(file, options)
|
||||
const { row_groups: rowGroups } = options.metadata
|
||||
// Compute row groups to fetch
|
||||
const groupIncluded = Array(rowGroups.length).fill(false)
|
||||
|
||||
@ -20,7 +20,7 @@ import { concat, flatten } from './utils.js'
|
||||
*/
|
||||
export async function parquetRead(options) {
|
||||
// load metadata if not provided
|
||||
options.metadata ??= await parquetMetadataAsync(options.file)
|
||||
options.metadata ??= await parquetMetadataAsync(options.file, options)
|
||||
|
||||
// read row groups
|
||||
const asyncGroups = parquetReadAsync(options)
|
||||
@ -109,7 +109,7 @@ export async function parquetReadColumn(options) {
|
||||
if (options.columns?.length !== 1) {
|
||||
throw new Error('parquetReadColumn expected columns: [columnName]')
|
||||
}
|
||||
options.metadata ??= await parquetMetadataAsync(options.file)
|
||||
options.metadata ??= await parquetMetadataAsync(options.file, options)
|
||||
const asyncGroups = parquetReadAsync(options)
|
||||
|
||||
// assemble struct columns
|
||||
|
||||
2
src/types.d.ts
vendored
2
src/types.d.ts
vendored
@ -17,6 +17,7 @@ export interface ParquetParsers {
|
||||
*/
|
||||
export interface MetadataOptions {
|
||||
parsers?: ParquetParsers // custom parsers to decode advanced types
|
||||
geoparquet?: boolean // parse geoparquet metadata and set logical type to geometry/geography for geospatial columns (default true)
|
||||
}
|
||||
|
||||
/**
|
||||
@ -33,6 +34,7 @@ export interface BaseParquetReadOptions {
|
||||
compressors?: Compressors // custom decompressors
|
||||
utf8?: boolean // decode byte arrays as utf8 strings (default true)
|
||||
parsers?: ParquetParsers // custom parsers to decode advanced types
|
||||
geoparquet?: boolean // parse geoparquet metadata and set logical type to geometry/geography for geospatial columns (default true)
|
||||
}
|
||||
|
||||
interface ArrayRowFormat {
|
||||
|
||||
1611
test/files/geoparquet.json
Normal file
1611
test/files/geoparquet.json
Normal file
File diff suppressed because it is too large
Load Diff
469
test/files/geoparquet.metadata.json
Normal file
469
test/files/geoparquet.metadata.json
Normal file
@ -0,0 +1,469 @@
|
||||
{
|
||||
"version": 2,
|
||||
"schema": [
|
||||
{
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "schema",
|
||||
"num_children": 7
|
||||
},
|
||||
{
|
||||
"type": "DOUBLE",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "pop_est"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "continent",
|
||||
"converted_type": "UTF8",
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "name",
|
||||
"converted_type": "UTF8",
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "iso_a3",
|
||||
"converted_type": "UTF8",
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "INT64",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "gdp_md_est"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "geometry",
|
||||
"logical_type": {
|
||||
"type": "GEOMETRY",
|
||||
"crs": "OGC:CRS84"
|
||||
}
|
||||
},
|
||||
{
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "bbox",
|
||||
"num_children": 4
|
||||
},
|
||||
{
|
||||
"type": "DOUBLE",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "xmax"
|
||||
},
|
||||
{
|
||||
"type": "DOUBLE",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "xmin"
|
||||
},
|
||||
{
|
||||
"type": "DOUBLE",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "ymax"
|
||||
},
|
||||
{
|
||||
"type": "DOUBLE",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "ymin"
|
||||
}
|
||||
],
|
||||
"num_rows": 5,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "DOUBLE",
|
||||
"encodings": ["PLAIN", "RLE", "RLE_DICTIONARY"],
|
||||
"path_in_schema": ["pop_est"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 5,
|
||||
"total_uncompressed_size": 126,
|
||||
"total_compressed_size": 128,
|
||||
"data_page_offset": 58,
|
||||
"dictionary_page_offset": 4,
|
||||
"statistics": {
|
||||
"max": 328239523,
|
||||
"min": 603253,
|
||||
"null_count": 0,
|
||||
"max_value": 328239523,
|
||||
"min_value": 603253
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": "DICTIONARY_PAGE",
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"page_type": "DATA_PAGE",
|
||||
"encoding": "RLE_DICTIONARY",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"size_statistics": {
|
||||
"repetition_level_histogram": [],
|
||||
"definition_level_histogram": [0, 5]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": ["PLAIN", "RLE", "RLE_DICTIONARY"],
|
||||
"path_in_schema": ["continent"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 5,
|
||||
"total_uncompressed_size": 100,
|
||||
"total_compressed_size": 104,
|
||||
"data_page_offset": 186,
|
||||
"dictionary_page_offset": 132,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "Oceania",
|
||||
"min_value": "Africa"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": "DICTIONARY_PAGE",
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"page_type": "DATA_PAGE",
|
||||
"encoding": "RLE_DICTIONARY",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"size_statistics": {
|
||||
"unencoded_byte_array_data_bytes": 45,
|
||||
"repetition_level_histogram": [],
|
||||
"definition_level_histogram": [0, 5]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": ["PLAIN", "RLE", "RLE_DICTIONARY"],
|
||||
"path_in_schema": ["name"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 5,
|
||||
"total_uncompressed_size": 138,
|
||||
"total_compressed_size": 143,
|
||||
"data_page_offset": 326,
|
||||
"dictionary_page_offset": 236,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "W. Sahara",
|
||||
"min_value": "Canada"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": "DICTIONARY_PAGE",
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"page_type": "DATA_PAGE",
|
||||
"encoding": "RLE_DICTIONARY",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"size_statistics": {
|
||||
"unencoded_byte_array_data_bytes": 51,
|
||||
"repetition_level_histogram": [],
|
||||
"definition_level_histogram": [0, 5]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": ["PLAIN", "RLE", "RLE_DICTIONARY"],
|
||||
"path_in_schema": ["iso_a3"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 5,
|
||||
"total_uncompressed_size": 91,
|
||||
"total_compressed_size": 93,
|
||||
"data_page_offset": 428,
|
||||
"dictionary_page_offset": 379,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "USA",
|
||||
"min_value": "CAN"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": "DICTIONARY_PAGE",
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"page_type": "DATA_PAGE",
|
||||
"encoding": "RLE_DICTIONARY",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"size_statistics": {
|
||||
"unencoded_byte_array_data_bytes": 15,
|
||||
"repetition_level_histogram": [],
|
||||
"definition_level_histogram": [0, 5]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "INT64",
|
||||
"encodings": ["PLAIN", "RLE", "RLE_DICTIONARY"],
|
||||
"path_in_schema": ["gdp_md_est"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 5,
|
||||
"total_uncompressed_size": 126,
|
||||
"total_compressed_size": 123,
|
||||
"data_page_offset": 521,
|
||||
"dictionary_page_offset": 472,
|
||||
"statistics": {
|
||||
"max": 21433226,
|
||||
"min": 907,
|
||||
"null_count": 0,
|
||||
"max_value": 21433226,
|
||||
"min_value": 907
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": "DICTIONARY_PAGE",
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"page_type": "DATA_PAGE",
|
||||
"encoding": "RLE_DICTIONARY",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"size_statistics": {
|
||||
"repetition_level_histogram": [],
|
||||
"definition_level_histogram": [0, 5]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": ["PLAIN", "RLE", "RLE_DICTIONARY"],
|
||||
"path_in_schema": ["geometry"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 5,
|
||||
"total_uncompressed_size": 22634,
|
||||
"total_compressed_size": 20669,
|
||||
"data_page_offset": 20766,
|
||||
"dictionary_page_offset": 595,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"min_value": "\u0001\u0003\u0000\u0000\u0000\u0001\u0000\u0000\u0000\u001c\u0000\u0000\u0000<30>_<EFBFBD>'<27>T!<21>\u0000\f<><66>\u000b<30>;@<40><>\r2<72>T!<21><>\u0004<30>\u0019<31>;@<40>w\u001b<31>i^!<21>8<EFBFBD><38>|Oe;@<40>,<2C><><EFBFBD>_!<21>,ʂ<><CA82>9@H5<48>W<EFBFBD>'<27>K\u00045<34><35>9@h8<68><38><EFBFBD><EFBFBD>'<27>@<40>7h<37>_7@<40>!\u0007<30><37><EFBFBD>)<29><><EFBFBD>`<60><>H7@<40>Z<EFBFBD>a<EFBFBD><*<2A>\u0014<31><34>n<EFBFBD>6@<40>6<EFBFBD>?<3F><>)<29>$<24><><EFBFBD>S5@t<>p<EFBFBD>^<5E>0<EFBFBD><30><EFBFBD><EFBFBD>TU5@<40>[\"<22><\u00101<30>X<EFBFBD><58><EFBFBD><EFBFBD><EFBFBD>4@@@:<3A>:\u00051<35><31><EFBFBD><EFBFBD>\u001cl5@\f<><66>\u001a<31>\u00001<30><31>\u000b<30>;<3B>k5@<40>$<24>\u001d}<7D>-<2D><><EFBFBD><EFBFBD>S'<27>5@蹏<><E8B98F>B-<2D><>Íf<C38D>5@<40>\u001c<31><63><q,<2C><>y<EFBFBD><79>fO6@P\u0005H<35>?<3F>+<2B><>ѓ<EFBFBD><D193>7@\u0010\"<22>.~\u0000)<29>P<EFBFBD>&W&<26>8@\u0000<30>G<EFBFBD><47>\u000f(<28>\u0000<30><30><EFBFBD><EFBFBD>\u0007:@H<><48><EFBFBD><EFBFBD>o'<27><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>\u001a:@<40>D<EFBFBD><44><EFBFBD><EFBFBD>&<26>`a<>\u0012(<28>:@h_<68>\u0016?\u001a%<25><>y-<2D><><EFBFBD>:@(<28>w5<77>`$<24>\r.<2E><>f<EFBFBD>:@<40><><EFBFBD><EFBFBD>~x#<23>\r.<2E><>f<EFBFBD>:@`~\u001b<31>y<EFBFBD>\"<22>\u0000G<30>]<5D>\u0016;@\u0018\u000f\u0018\t<><74>!<21>y\u0010*<2A><>\u001e;@\u0010\u0005Ae<41><65>!<21>\u0000\f<><66>\u000b<30>;@<40>_<EFBFBD>'<27>T!<21>\u0000\f<><66>\u000b<30>;@"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": "DICTIONARY_PAGE",
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"page_type": "DATA_PAGE",
|
||||
"encoding": "RLE_DICTIONARY",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"size_statistics": {
|
||||
"unencoded_byte_array_data_bytes": 22100,
|
||||
"repetition_level_histogram": [],
|
||||
"definition_level_histogram": [0, 5]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "DOUBLE",
|
||||
"encodings": ["PLAIN", "RLE", "RLE_DICTIONARY"],
|
||||
"path_in_schema": ["bbox", "xmax"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 5,
|
||||
"total_uncompressed_size": 126,
|
||||
"total_compressed_size": 129,
|
||||
"data_page_offset": 21319,
|
||||
"dictionary_page_offset": 21264,
|
||||
"statistics": {
|
||||
"max": 180,
|
||||
"min": -66.96465999999998,
|
||||
"null_count": 0,
|
||||
"max_value": 180,
|
||||
"min_value": -66.96465999999998
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": "DICTIONARY_PAGE",
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"page_type": "DATA_PAGE",
|
||||
"encoding": "RLE_DICTIONARY",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"size_statistics": {
|
||||
"repetition_level_histogram": [],
|
||||
"definition_level_histogram": [0, 0, 5]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "DOUBLE",
|
||||
"encodings": ["PLAIN", "RLE", "RLE_DICTIONARY"],
|
||||
"path_in_schema": ["bbox", "xmin"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 5,
|
||||
"total_uncompressed_size": 126,
|
||||
"total_compressed_size": 129,
|
||||
"data_page_offset": 21448,
|
||||
"dictionary_page_offset": 21393,
|
||||
"statistics": {
|
||||
"max": 29.339997592900346,
|
||||
"min": -180,
|
||||
"null_count": 0,
|
||||
"max_value": 29.339997592900346,
|
||||
"min_value": -180
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": "DICTIONARY_PAGE",
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"page_type": "DATA_PAGE",
|
||||
"encoding": "RLE_DICTIONARY",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"size_statistics": {
|
||||
"repetition_level_histogram": [],
|
||||
"definition_level_histogram": [0, 0, 5]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "DOUBLE",
|
||||
"encodings": ["PLAIN", "RLE", "RLE_DICTIONARY"],
|
||||
"path_in_schema": ["bbox", "ymax"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 5,
|
||||
"total_uncompressed_size": 126,
|
||||
"total_compressed_size": 129,
|
||||
"data_page_offset": 21577,
|
||||
"dictionary_page_offset": 21522,
|
||||
"statistics": {
|
||||
"max": 83.23324000000001,
|
||||
"min": -16.020882256741224,
|
||||
"null_count": 0,
|
||||
"max_value": 83.23324000000001,
|
||||
"min_value": -16.020882256741224
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": "DICTIONARY_PAGE",
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"page_type": "DATA_PAGE",
|
||||
"encoding": "RLE_DICTIONARY",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"size_statistics": {
|
||||
"repetition_level_histogram": [],
|
||||
"definition_level_histogram": [0, 0, 5]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "DOUBLE",
|
||||
"encodings": ["PLAIN", "RLE", "RLE_DICTIONARY"],
|
||||
"path_in_schema": ["bbox", "ymin"],
|
||||
"codec": "SNAPPY",
|
||||
"num_values": 5,
|
||||
"total_uncompressed_size": 126,
|
||||
"total_compressed_size": 130,
|
||||
"data_page_offset": 21707,
|
||||
"dictionary_page_offset": 21651,
|
||||
"statistics": {
|
||||
"max": 41.675105088867326,
|
||||
"min": -18.28799,
|
||||
"null_count": 0,
|
||||
"max_value": 41.675105088867326,
|
||||
"min_value": -18.28799
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": "DICTIONARY_PAGE",
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
},
|
||||
{
|
||||
"page_type": "DATA_PAGE",
|
||||
"encoding": "RLE_DICTIONARY",
|
||||
"count": 1
|
||||
}
|
||||
],
|
||||
"size_statistics": {
|
||||
"repetition_level_histogram": [],
|
||||
"definition_level_histogram": [0, 0, 5]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"total_byte_size": 23719,
|
||||
"num_rows": 5,
|
||||
"file_offset": 4,
|
||||
"total_compressed_size": 21777
|
||||
}
|
||||
],
|
||||
"key_value_metadata": [
|
||||
{
|
||||
"key": "geo",
|
||||
"value": "{\"version\": \"1.2.0-dev\", \"primary_column\": \"geometry\", \"columns\": {\"geometry\": {\"encoding\": \"WKB\", \"geometry_types\": [\"Polygon\", \"MultiPolygon\"], \"crs\": {\"$schema\": \"https://proj.org/schemas/v0.7/projjson.schema.json\", \"type\": \"GeographicCRS\", \"name\": \"WGS 84 (CRS84)\", \"datum_ensemble\": {\"name\": \"World Geodetic System 1984 ensemble\", \"members\": [{\"name\": \"World Geodetic System 1984 (Transit)\", \"id\": {\"authority\": \"EPSG\", \"code\": 1166}}, {\"name\": \"World Geodetic System 1984 (G730)\", \"id\": {\"authority\": \"EPSG\", \"code\": 1152}}, {\"name\": \"World Geodetic System 1984 (G873)\", \"id\": {\"authority\": \"EPSG\", \"code\": 1153}}, {\"name\": \"World Geodetic System 1984 (G1150)\", \"id\": {\"authority\": \"EPSG\", \"code\": 1154}}, {\"name\": \"World Geodetic System 1984 (G1674)\", \"id\": {\"authority\": \"EPSG\", \"code\": 1155}}, {\"name\": \"World Geodetic System 1984 (G1762)\", \"id\": {\"authority\": \"EPSG\", \"code\": 1156}}, {\"name\": \"World Geodetic System 1984 (G2139)\", \"id\": {\"authority\": \"EPSG\", \"code\": 1309}}, {\"name\": \"World Geodetic System 1984 (G2296)\", \"id\": {\"authority\": \"EPSG\", \"code\": 1383}}], \"ellipsoid\": {\"name\": \"WGS 84\", \"semi_major_axis\": 6378137, \"inverse_flattening\": 298.257223563}, \"accuracy\": \"2.0\", \"id\": {\"authority\": \"EPSG\", \"code\": 6326}}, \"coordinate_system\": {\"subtype\": \"ellipsoidal\", \"axis\": [{\"name\": \"Geodetic longitude\", \"abbreviation\": \"Lon\", \"direction\": \"east\", \"unit\": \"degree\"}, {\"name\": \"Geodetic latitude\", \"abbreviation\": \"Lat\", \"direction\": \"north\", \"unit\": \"degree\"}]}, \"scope\": \"Not known.\", \"area\": \"World.\", \"bbox\": {\"south_latitude\": -90, \"west_longitude\": -180, \"north_latitude\": 90, \"east_longitude\": 180}, \"id\": {\"authority\": \"OGC\", \"code\": \"CRS84\"}}, \"edges\": \"planar\", \"bbox\": [-180.0, -18.288, 180.0, 83.2332], \"covering\": {\"bbox\": {\"xmin\": [\"bbox\", \"xmin\"], \"ymin\": [\"bbox\", \"ymin\"], \"xmax\": [\"bbox\", \"xmax\"], \"ymax\": [\"bbox\", \"ymax\"]}}}}}"
|
||||
},
|
||||
{
|
||||
"key": "ARROW:schema",
|
||||
"value": "/////+AJAAAQAAAAAAAKAA4ABgAFAAgACgAAAAABBAAQAAAAAAAKAAwAAAAEAAgACgAAAHwHAAAEAAAAAQAAAAwAAAAIAAwABAAIAAgAAABYBwAABAAAAEkHAAB7InZlcnNpb24iOiAiMS4yLjAtZGV2IiwgInByaW1hcnlfY29sdW1uIjogImdlb21ldHJ5IiwgImNvbHVtbnMiOiB7Imdlb21ldHJ5IjogeyJlbmNvZGluZyI6ICJXS0IiLCAiZ2VvbWV0cnlfdHlwZXMiOiBbIlBvbHlnb24iLCAiTXVsdGlQb2x5Z29uIl0sICJjcnMiOiB7IiRzY2hlbWEiOiAiaHR0cHM6Ly9wcm9qLm9yZy9zY2hlbWFzL3YwLjcvcHJvampzb24uc2NoZW1hLmpzb24iLCAidHlwZSI6ICJHZW9ncmFwaGljQ1JTIiwgIm5hbWUiOiAiV0dTIDg0IChDUlM4NCkiLCAiZGF0dW1fZW5zZW1ibGUiOiB7Im5hbWUiOiAiV29ybGQgR2VvZGV0aWMgU3lzdGVtIDE5ODQgZW5zZW1ibGUiLCAibWVtYmVycyI6IFt7Im5hbWUiOiAiV29ybGQgR2VvZGV0aWMgU3lzdGVtIDE5ODQgKFRyYW5zaXQpIiwgImlkIjogeyJhdXRob3JpdHkiOiAiRVBTRyIsICJjb2RlIjogMTE2Nn19LCB7Im5hbWUiOiAiV29ybGQgR2VvZGV0aWMgU3lzdGVtIDE5ODQgKEc3MzApIiwgImlkIjogeyJhdXRob3JpdHkiOiAiRVBTRyIsICJjb2RlIjogMTE1Mn19LCB7Im5hbWUiOiAiV29ybGQgR2VvZGV0aWMgU3lzdGVtIDE5ODQgKEc4NzMpIiwgImlkIjogeyJhdXRob3JpdHkiOiAiRVBTRyIsICJjb2RlIjogMTE1M319LCB7Im5hbWUiOiAiV29ybGQgR2VvZGV0aWMgU3lzdGVtIDE5ODQgKEcxMTUwKSIsICJpZCI6IHsiYXV0aG9yaXR5IjogIkVQU0ciLCAiY29kZSI6IDExNTR9fSwgeyJuYW1lIjogIldvcmxkIEdlb2RldGljIFN5c3RlbSAxOTg0IChHMTY3NCkiLCAiaWQiOiB7ImF1dGhvcml0eSI6ICJFUFNHIiwgImNvZGUiOiAxMTU1fX0sIHsibmFtZSI6ICJXb3JsZCBHZW9kZXRpYyBTeXN0ZW0gMTk4NCAoRzE3NjIpIiwgImlkIjogeyJhdXRob3JpdHkiOiAiRVBTRyIsICJjb2RlIjogMTE1Nn19LCB7Im5hbWUiOiAiV29ybGQgR2VvZGV0aWMgU3lzdGVtIDE5ODQgKEcyMTM5KSIsICJpZCI6IHsiYXV0aG9yaXR5IjogIkVQU0ciLCAiY29kZSI6IDEzMDl9fSwgeyJuYW1lIjogIldvcmxkIEdlb2RldGljIFN5c3RlbSAxOTg0IChHMjI5NikiLCAiaWQiOiB7ImF1dGhvcml0eSI6ICJFUFNHIiwgImNvZGUiOiAxMzgzfX1dLCAiZWxsaXBzb2lkIjogeyJuYW1lIjogIldHUyA4NCIsICJzZW1pX21ham9yX2F4aXMiOiA2Mzc4MTM3LCAiaW52ZXJzZV9mbGF0dGVuaW5nIjogMjk4LjI1NzIyMzU2M30sICJhY2N1cmFjeSI6ICIyLjAiLCAiaWQiOiB7ImF1dGhvcml0eSI6ICJFUFNHIiwgImNvZGUiOiA2MzI2fX0sICJjb29yZGluYXRlX3N5c3RlbSI6IHsic3VidHlwZSI6ICJlbGxpcHNvaWRhbCIsICJheGlzIjogW3sibmFtZSI6ICJHZW9kZXRpYyBsb25naXR1ZGUiLCAiYWJicmV2aWF0aW9uIjogIkxvbiIsICJkaXJlY3Rpb24iOiAiZWFzdCIsICJ1bml0IjogImRlZ3JlZSJ9LCB7Im5hbWUiOiAiR2VvZGV0aWMgbGF0aXR1ZGUiLCAiYWJicmV2aWF0aW9uIjogIkxhdCIsICJkaXJlY3Rpb24iOiAibm9ydGgiLCAidW5pdCI6ICJkZWdyZWUifV19LCAic2NvcGUiOiAiTm90IGtub3duLiIsICJhcmVhIjogIldvcmxkLiIsICJiYm94IjogeyJzb3V0aF9sYXRpdHVkZSI6IC05MCwgIndlc3RfbG9uZ2l0dWRlIjogLTE4MCwgIm5vcnRoX2xhdGl0dWRlIjogOTAsICJlYXN0X2xvbmdpdHVkZSI6IDE4MH0sICJpZCI6IHsiYXV0aG9yaXR5IjogIk9HQyIsICJjb2RlIjogIkNSUzg0In19LCAiZWRnZXMiOiAicGxhbmFyIiwgImJib3giOiBbLTE4MC4wLCAtMTguMjg4LCAxODAuMCwgODMuMjMzMl0sICJjb3ZlcmluZyI6IHsiYmJveCI6IHsieG1pbiI6IFsiYmJveCIsICJ4bWluIl0sICJ5bWluIjogWyJiYm94IiwgInltaW4iXSwgInhtYXgiOiBbImJib3giLCAieG1heCJdLCAieW1heCI6IFsiYmJveCIsICJ5bWF4Il19fX19fQAAAAMAAABnZW8ABwAAAPwBAAC4AQAAjAEAAGABAAAgAQAA8AAAAAQAAAAw/v//AAABDSAAAAAoAAAABAAAAAQAAACkAAAAdAAAAEQAAAAUAAAABAAAAGJib3gAAAAAbP7//2j+//8AAAEDEAAAABgAAAAEAAAAAAAAAAQAAAB5bWluAAAAAFb+//8AAAIAlP7//wAAAQMQAAAAGAAAAAQAAAAAAAAABAAAAHltYXgAAAAAgv7//wAAAgDA/v//AAABAxAAAAAYAAAABAAAAAAAAAAEAAAAeG1pbgAAAACu/v//AAACAOz+//8AAAEDEAAAABgAAAAEAAAAAAAAAAQAAAB4bWF4AAAAANr+//8AAAIAGP///wAAAQQQAAAAHAAAAAQAAAAAAAAACAAAAGdlb21ldHJ5AAAAAEj///9E////AAABAhAAAAAkAAAABAAAAAAAAAAKAAAAZ2RwX21kX2VzdAAACAAMAAgABwAIAAAAAAAAAUAAAACA////AAABBRAAAAAYAAAABAAAAAAAAAAGAAAAaXNvX2EzAACs////qP///wAAAQUQAAAAGAAAAAQAAAAAAAAABAAAAG5hbWUAAAAA1P///9D///8AAAEFEAAAACAAAAAEAAAAAAAAAAkAAABjb250aW5lbnQAAAAEAAQABAAAABAAFAAIAAYABwAMAAAAEAAQAAAAAAABAxAAAAAgAAAABAAAAAAAAAAHAAAAcG9wX2VzdAAAAAYACAAGAAYAAAAAAAIAAAAAAA=="
|
||||
}
|
||||
],
|
||||
"created_by": "parquet-cpp-arrow version 20.0.0",
|
||||
"metadata_length": 6983
|
||||
}
|
||||
BIN
test/files/geoparquet.parquet
Normal file
BIN
test/files/geoparquet.parquet
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user