mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Delta byte array encoding
This commit is contained in:
parent
48d79e6a1d
commit
d4341b803e
@ -181,7 +181,7 @@ Parquet encodings:
|
||||
- [X] RLE
|
||||
- [X] BIT_PACKED
|
||||
- [X] DELTA_BINARY_PACKED
|
||||
- [ ] DELTA_BYTE_ARRAY
|
||||
- [X] DELTA_BYTE_ARRAY
|
||||
- [ ] DELTA_LENGTH_BYTE_ARRAY
|
||||
- [ ] BYTE_STREAM_SPLIT
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import { decompressPage } from './column.js'
|
||||
import { deltaBinaryUnpack } from './delta.js'
|
||||
import { deltaBinaryUnpack, deltaByteArray } from './delta.js'
|
||||
import { readRleBitPackedHybrid, widthFromMaxInt } from './encoding.js'
|
||||
import { readPlain } from './plain.js'
|
||||
import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js'
|
||||
@ -12,7 +12,7 @@ import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js'
|
||||
* @typedef {import("./types.d.ts").Compressors} Compressors
|
||||
* @typedef {import("./types.d.ts").DataPageHeaderV2} DataPageHeaderV2
|
||||
* @typedef {import("./types.d.ts").SchemaTree} SchemaTree
|
||||
* @param {Uint8Array} compressedBytes raw page data (should already be decompressed)
|
||||
* @param {Uint8Array} compressedBytes raw page data
|
||||
* @param {import("./types.d.ts").PageHeader} ph page header
|
||||
* @param {SchemaTree[]} schemaPath
|
||||
* @param {ColumnMetaData} columnMetadata
|
||||
@ -46,7 +46,7 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata,
|
||||
|
||||
// read values based on encoding
|
||||
/** @type {import('./types.d.ts').DecodedArray} */
|
||||
let dataPage = []
|
||||
let dataPage
|
||||
const nValues = daph2.num_values - daph2.num_nulls
|
||||
if (daph2.encoding === 'PLAIN') {
|
||||
const { type_length } = schemaPath[schemaPath.length - 1].element
|
||||
@ -67,6 +67,9 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata,
|
||||
const int32 = columnMetadata.type === 'INT32'
|
||||
dataPage = int32 ? new Int32Array(nValues) : new BigInt64Array(nValues)
|
||||
deltaBinaryUnpack(pageReader, nValues, dataPage)
|
||||
} else if (daph2.encoding === 'DELTA_BYTE_ARRAY') {
|
||||
dataPage = new Array(nValues)
|
||||
deltaByteArray(pageReader, nValues, dataPage)
|
||||
} else {
|
||||
throw new Error(`parquet unsupported encoding: ${daph2.encoding}`)
|
||||
}
|
||||
|
||||
25
src/delta.js
25
src/delta.js
@ -60,3 +60,28 @@ export function deltaBinaryUnpack(reader, nValues, output) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {DataReader} reader
|
||||
* @param {number} nValues
|
||||
* @param {Uint8Array[]} output
|
||||
*/
|
||||
export function deltaByteArray(reader, nValues, output) {
|
||||
const prefixData = new Int32Array(nValues)
|
||||
deltaBinaryUnpack(reader, nValues, prefixData)
|
||||
const suffixData = new Int32Array(nValues)
|
||||
deltaBinaryUnpack(reader, nValues, suffixData)
|
||||
|
||||
for (let i = 0; i < nValues; i++) {
|
||||
const suffix = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, suffixData[i])
|
||||
if (prefixData[i]) {
|
||||
// copy from previous value
|
||||
output[i] = new Uint8Array(prefixData[i] + suffixData[i])
|
||||
output[i].set(output[i - 1].subarray(0, prefixData[i]))
|
||||
output[i].set(suffix, prefixData[i])
|
||||
} else {
|
||||
output[i] = suffix
|
||||
}
|
||||
reader.offset += suffixData[i]
|
||||
}
|
||||
}
|
||||
|
||||
@ -59,7 +59,7 @@ export async function parquetMetadataAsync(asyncBuffer, initialFetchSize = 1 <<
|
||||
// combine initial fetch with the new slice
|
||||
const combinedBuffer = new ArrayBuffer(metadataLength + 8)
|
||||
const combinedView = new Uint8Array(combinedBuffer)
|
||||
combinedView.set(new Uint8Array(metadataBuffer), 0)
|
||||
combinedView.set(new Uint8Array(metadataBuffer))
|
||||
combinedView.set(new Uint8Array(footerBuffer), footerOffset - metadataOffset)
|
||||
return parquetMetadata(combinedBuffer)
|
||||
} else {
|
||||
|
||||
@ -19,6 +19,7 @@ const CompactType = {
|
||||
/**
|
||||
* Parse TCompactProtocol
|
||||
*
|
||||
* @typedef {import("./types.d.ts").DataReader} DataReader
|
||||
* @param {DataReader} reader
|
||||
* @returns {Record<string, any>}
|
||||
*/
|
||||
@ -119,7 +120,6 @@ function readElement(reader, type) {
|
||||
* 7-bit group with the 0 bit, prefixing the remaining 7-bit groups with the
|
||||
* 1 bit and encode the resulting bit-string as Little Endian.
|
||||
*
|
||||
* @typedef {import("./types.d.ts").DataReader} DataReader
|
||||
* @param {DataReader} reader
|
||||
* @returns {number} value
|
||||
*/
|
||||
|
||||
11002
test/files/delta_byte_array.json
Normal file
11002
test/files/delta_byte_array.json
Normal file
File diff suppressed because it is too large
Load Diff
333
test/files/delta_byte_array.metadata.json
Normal file
333
test/files/delta_byte_array.metadata.json
Normal file
@ -0,0 +1,333 @@
|
||||
{
|
||||
"version": 1,
|
||||
"schema": [
|
||||
{
|
||||
"name": "hive_schema",
|
||||
"num_children": 9
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_customer_id",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_salutation",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_first_name",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_last_name",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_preferred_cust_flag",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_birth_country",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_login",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_email_address",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_last_review_date",
|
||||
"converted_type": "UTF8"
|
||||
}
|
||||
],
|
||||
"num_rows": 1000,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
{
|
||||
"file_offset": 4,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_customer_id"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 1000,
|
||||
"total_uncompressed_size": 8248,
|
||||
"total_compressed_size": 8248,
|
||||
"data_page_offset": 4,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "AAAAAAAAPPCAAAAA",
|
||||
"min_value": "AAAAAAAAAABAAAAA"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 8252,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_salutation"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 1000,
|
||||
"total_uncompressed_size": 3362,
|
||||
"total_compressed_size": 3362,
|
||||
"data_page_offset": 8252,
|
||||
"statistics": {
|
||||
"null_count": 30,
|
||||
"max_value": "Sir",
|
||||
"min_value": "Dr."
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 11614,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_first_name"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 1000,
|
||||
"total_uncompressed_size": 6595,
|
||||
"total_compressed_size": 6595,
|
||||
"data_page_offset": 11614,
|
||||
"statistics": {
|
||||
"null_count": 32,
|
||||
"max_value": "Zachary",
|
||||
"min_value": "Aaron"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 18209,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_last_name"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 1000,
|
||||
"total_uncompressed_size": 6955,
|
||||
"total_compressed_size": 6955,
|
||||
"data_page_offset": 18209,
|
||||
"statistics": {
|
||||
"null_count": 24,
|
||||
"max_value": "Zamora",
|
||||
"min_value": "Adams"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 25164,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_preferred_cust_flag"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 1000,
|
||||
"total_uncompressed_size": 1220,
|
||||
"total_compressed_size": 1220,
|
||||
"data_page_offset": 25164,
|
||||
"statistics": {
|
||||
"null_count": 29,
|
||||
"max_value": "Y",
|
||||
"min_value": "N"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 26384,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_birth_country"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 1000,
|
||||
"total_uncompressed_size": 9599,
|
||||
"total_compressed_size": 9599,
|
||||
"data_page_offset": 26384,
|
||||
"statistics": {
|
||||
"null_count": 31,
|
||||
"max_value": "ZIMBABWE",
|
||||
"min_value": "AFGHANISTAN"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 35983,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_login"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 1000,
|
||||
"total_uncompressed_size": 42,
|
||||
"total_compressed_size": 42,
|
||||
"data_page_offset": 35983,
|
||||
"statistics": {
|
||||
"null_count": 1000
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 36025,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_email_address"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 1000,
|
||||
"total_uncompressed_size": 27763,
|
||||
"total_compressed_size": 27763,
|
||||
"data_page_offset": 36025,
|
||||
"statistics": {
|
||||
"null_count": 31,
|
||||
"max_value": "Zachary.Parsons@hHmnLrbKsfY.com",
|
||||
"min_value": "Aaron.Browder@iUpddkHI9z8.org"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 63788,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_last_review_date"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 1000,
|
||||
"total_uncompressed_size": 3511,
|
||||
"total_compressed_size": 3511,
|
||||
"data_page_offset": 63788,
|
||||
"statistics": {
|
||||
"null_count": 25,
|
||||
"max_value": "2452648",
|
||||
"min_value": "2452283"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"total_byte_size": 67295,
|
||||
"num_rows": 1000
|
||||
}
|
||||
],
|
||||
"created_by": "parquet-mr version 1.10.0 (build 031a6654009e3b82020012a18434c582bd74c73a)",
|
||||
"metadata_length": 1046
|
||||
}
|
||||
BIN
test/files/delta_byte_array.parquet
Normal file
BIN
test/files/delta_byte_array.parquet
Normal file
Binary file not shown.
1902
test/files/delta_encoding_optional_column.json
Normal file
1902
test/files/delta_encoding_optional_column.json
Normal file
File diff suppressed because it is too large
Load Diff
624
test/files/delta_encoding_optional_column.metadata.json
Normal file
624
test/files/delta_encoding_optional_column.metadata.json
Normal file
@ -0,0 +1,624 @@
|
||||
{
|
||||
"version": 1,
|
||||
"schema": [
|
||||
{
|
||||
"name": "hive_schema",
|
||||
"num_children": 17
|
||||
},
|
||||
{
|
||||
"type": "INT64",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_customer_sk"
|
||||
},
|
||||
{
|
||||
"type": "INT64",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_current_cdemo_sk"
|
||||
},
|
||||
{
|
||||
"type": "INT64",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_current_hdemo_sk"
|
||||
},
|
||||
{
|
||||
"type": "INT64",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_current_addr_sk"
|
||||
},
|
||||
{
|
||||
"type": "INT64",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_first_shipto_date_sk"
|
||||
},
|
||||
{
|
||||
"type": "INT64",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_first_sales_date_sk"
|
||||
},
|
||||
{
|
||||
"type": "INT64",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_birth_day"
|
||||
},
|
||||
{
|
||||
"type": "INT64",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_birth_month"
|
||||
},
|
||||
{
|
||||
"type": "INT64",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_birth_year"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_customer_id",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_salutation",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_first_name",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_last_name",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_preferred_cust_flag",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_birth_country",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_email_address",
|
||||
"converted_type": "UTF8"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "c_last_review_date",
|
||||
"converted_type": "UTF8"
|
||||
}
|
||||
],
|
||||
"num_rows": 100,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
{
|
||||
"file_offset": 4,
|
||||
"meta_data": {
|
||||
"type": "INT64",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_customer_sk"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 81,
|
||||
"total_compressed_size": 81,
|
||||
"data_page_offset": 4,
|
||||
"statistics": {
|
||||
"max": 100,
|
||||
"min": 1,
|
||||
"null_count": 0,
|
||||
"max_value": 100,
|
||||
"min_value": 1
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 85,
|
||||
"meta_data": {
|
||||
"type": "INT64",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_current_cdemo_sk"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 358,
|
||||
"total_compressed_size": 358,
|
||||
"data_page_offset": 85,
|
||||
"statistics": {
|
||||
"max": 1895444,
|
||||
"min": 8817,
|
||||
"null_count": 3,
|
||||
"max_value": 1895444,
|
||||
"min_value": 8817
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 443,
|
||||
"meta_data": {
|
||||
"type": "INT64",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_current_hdemo_sk"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 311,
|
||||
"total_compressed_size": 311,
|
||||
"data_page_offset": 443,
|
||||
"statistics": {
|
||||
"max": 7135,
|
||||
"min": 37,
|
||||
"null_count": 2,
|
||||
"max_value": 7135,
|
||||
"min_value": 37
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 754,
|
||||
"meta_data": {
|
||||
"type": "INT64",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_current_addr_sk"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 353,
|
||||
"total_compressed_size": 353,
|
||||
"data_page_offset": 754,
|
||||
"statistics": {
|
||||
"max": 49388,
|
||||
"min": 571,
|
||||
"null_count": 0,
|
||||
"max_value": 49388,
|
||||
"min_value": 571
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 1107,
|
||||
"meta_data": {
|
||||
"type": "INT64",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_first_shipto_date_sk"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 297,
|
||||
"total_compressed_size": 297,
|
||||
"data_page_offset": 1107,
|
||||
"statistics": {
|
||||
"max": 2452641,
|
||||
"min": 2449130,
|
||||
"null_count": 1,
|
||||
"max_value": 2452641,
|
||||
"min_value": 2449130
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 1404,
|
||||
"meta_data": {
|
||||
"type": "INT64",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_first_sales_date_sk"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 297,
|
||||
"total_compressed_size": 297,
|
||||
"data_page_offset": 1404,
|
||||
"statistics": {
|
||||
"max": 2452611,
|
||||
"min": 2449010,
|
||||
"null_count": 1,
|
||||
"max_value": 2452611,
|
||||
"min_value": 2449010
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 1701,
|
||||
"meta_data": {
|
||||
"type": "INT64",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_birth_day"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 160,
|
||||
"total_compressed_size": 160,
|
||||
"data_page_offset": 1701,
|
||||
"statistics": {
|
||||
"max": 30,
|
||||
"min": 1,
|
||||
"null_count": 3,
|
||||
"max_value": 30,
|
||||
"min_value": 1
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 1861,
|
||||
"meta_data": {
|
||||
"type": "INT64",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_birth_month"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 146,
|
||||
"total_compressed_size": 146,
|
||||
"data_page_offset": 1861,
|
||||
"statistics": {
|
||||
"max": 12,
|
||||
"min": 1,
|
||||
"null_count": 3,
|
||||
"max_value": 12,
|
||||
"min_value": 1
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 2007,
|
||||
"meta_data": {
|
||||
"type": "INT64",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_birth_year"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 172,
|
||||
"total_compressed_size": 172,
|
||||
"data_page_offset": 2007,
|
||||
"statistics": {
|
||||
"max": 1991,
|
||||
"min": 1925,
|
||||
"null_count": 3,
|
||||
"max_value": 1991,
|
||||
"min_value": 1925
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 2179,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_customer_id"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 976,
|
||||
"total_compressed_size": 976,
|
||||
"data_page_offset": 2179,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "AAAAAAAAPFAAAAAA",
|
||||
"min_value": "AAAAAAAAABAAAAAA"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 3155,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_salutation"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 376,
|
||||
"total_compressed_size": 376,
|
||||
"data_page_offset": 3155,
|
||||
"statistics": {
|
||||
"null_count": 3,
|
||||
"max_value": "Sir",
|
||||
"min_value": "Dr."
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 3531,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_first_name"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 694,
|
||||
"total_compressed_size": 694,
|
||||
"data_page_offset": 3531,
|
||||
"statistics": {
|
||||
"null_count": 3,
|
||||
"max_value": "William",
|
||||
"min_value": "Albert"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 4225,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_last_name"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 777,
|
||||
"total_compressed_size": 777,
|
||||
"data_page_offset": 4225,
|
||||
"statistics": {
|
||||
"null_count": 1,
|
||||
"max_value": "Young",
|
||||
"min_value": "Baker"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 5002,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_preferred_cust_flag"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 156,
|
||||
"total_compressed_size": 156,
|
||||
"data_page_offset": 5002,
|
||||
"statistics": {
|
||||
"null_count": 4,
|
||||
"max_value": "Y",
|
||||
"min_value": "N"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 5158,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_birth_country"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 1111,
|
||||
"total_compressed_size": 1111,
|
||||
"data_page_offset": 5158,
|
||||
"statistics": {
|
||||
"null_count": 4,
|
||||
"max_value": "WALLIS AND FUTUNA",
|
||||
"min_value": "AFGHANISTAN"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 6269,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_email_address"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 2813,
|
||||
"total_compressed_size": 2813,
|
||||
"data_page_offset": 6269,
|
||||
"statistics": {
|
||||
"null_count": 3,
|
||||
"max_value": "William.Warner@zegnrzurU.org",
|
||||
"min_value": "Albert.Brunson@62.com"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 9082,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_last_review_date"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 407,
|
||||
"total_compressed_size": 407,
|
||||
"data_page_offset": 9082,
|
||||
"statistics": {
|
||||
"null_count": 3,
|
||||
"max_value": "2452644",
|
||||
"min_value": "2452293"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"total_byte_size": 9485,
|
||||
"num_rows": 100
|
||||
}
|
||||
],
|
||||
"created_by": "parquet-mr version 1.10.0 (build 031a6654009e3b82020012a18434c582bd74c73a)",
|
||||
"metadata_length": 2070
|
||||
}
|
||||
BIN
test/files/delta_encoding_optional_column.parquet
Normal file
BIN
test/files/delta_encoding_optional_column.parquet
Normal file
Binary file not shown.
1902
test/files/delta_encoding_required_column.json
Normal file
1902
test/files/delta_encoding_required_column.json
Normal file
File diff suppressed because it is too large
Load Diff
746
test/files/delta_encoding_required_column.metadata.json
Normal file
746
test/files/delta_encoding_required_column.metadata.json
Normal file
@ -0,0 +1,746 @@
|
||||
{
|
||||
"version": 1,
|
||||
"schema": [
|
||||
{
|
||||
"name": "spark_schema",
|
||||
"num_children": 17
|
||||
},
|
||||
{
|
||||
"type": "INT32",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_customer_sk:"
|
||||
},
|
||||
{
|
||||
"type": "INT32",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_current_cdemo_sk:"
|
||||
},
|
||||
{
|
||||
"type": "INT32",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_current_hdemo_sk:"
|
||||
},
|
||||
{
|
||||
"type": "INT32",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_current_addr_sk:"
|
||||
},
|
||||
{
|
||||
"type": "INT32",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_first_shipto_date_sk:"
|
||||
},
|
||||
{
|
||||
"type": "INT32",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_first_sales_date_sk:"
|
||||
},
|
||||
{
|
||||
"type": "INT32",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_birth_day:"
|
||||
},
|
||||
{
|
||||
"type": "INT32",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_birth_month:"
|
||||
},
|
||||
{
|
||||
"type": "INT32",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_birth_year:"
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_customer_id:",
|
||||
"converted_type": "UTF8",
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_salutation:",
|
||||
"converted_type": "UTF8",
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_first_name:",
|
||||
"converted_type": "UTF8",
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_last_name:",
|
||||
"converted_type": "UTF8",
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_preferred_cust_flag:",
|
||||
"converted_type": "UTF8",
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_birth_country:",
|
||||
"converted_type": "UTF8",
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_email_address:",
|
||||
"converted_type": "UTF8",
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "BYTE_ARRAY",
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "c_last_review_date:",
|
||||
"converted_type": "UTF8",
|
||||
"logical_type": {
|
||||
"type": "STRING"
|
||||
}
|
||||
}
|
||||
],
|
||||
"num_rows": 100,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
{
|
||||
"file_offset": 4,
|
||||
"meta_data": {
|
||||
"type": "INT32",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_customer_sk:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 50,
|
||||
"total_compressed_size": 50,
|
||||
"data_page_offset": 4,
|
||||
"statistics": {
|
||||
"max": 105,
|
||||
"min": 1,
|
||||
"null_count": 0,
|
||||
"max_value": 105,
|
||||
"min_value": 1
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9714,
|
||||
"offset_index_length": 10,
|
||||
"column_index_offset": 9233,
|
||||
"column_index_length": 23,
|
||||
"crypto_metadata": 23
|
||||
},
|
||||
{
|
||||
"file_offset": 54,
|
||||
"meta_data": {
|
||||
"type": "INT32",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_current_cdemo_sk:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 388,
|
||||
"total_compressed_size": 388,
|
||||
"data_page_offset": 54,
|
||||
"statistics": {
|
||||
"max": 1895444,
|
||||
"min": 8817,
|
||||
"null_count": 0,
|
||||
"max_value": 1895444,
|
||||
"min_value": 8817
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9724,
|
||||
"offset_index_length": 11,
|
||||
"column_index_offset": 9256,
|
||||
"column_index_length": 23,
|
||||
"crypto_metadata": 23
|
||||
},
|
||||
{
|
||||
"file_offset": 442,
|
||||
"meta_data": {
|
||||
"type": "INT32",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_current_hdemo_sk:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 261,
|
||||
"total_compressed_size": 261,
|
||||
"data_page_offset": 442,
|
||||
"statistics": {
|
||||
"max": 7135,
|
||||
"min": 37,
|
||||
"null_count": 0,
|
||||
"max_value": 7135,
|
||||
"min_value": 37
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9735,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9279,
|
||||
"column_index_length": 23,
|
||||
"crypto_metadata": 23
|
||||
},
|
||||
{
|
||||
"file_offset": 703,
|
||||
"meta_data": {
|
||||
"type": "INT32",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_current_addr_sk:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 307,
|
||||
"total_compressed_size": 307,
|
||||
"data_page_offset": 703,
|
||||
"statistics": {
|
||||
"max": 49388,
|
||||
"min": 464,
|
||||
"null_count": 0,
|
||||
"max_value": 49388,
|
||||
"min_value": 464
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9747,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9302,
|
||||
"column_index_length": 23,
|
||||
"crypto_metadata": 23
|
||||
},
|
||||
{
|
||||
"file_offset": 1010,
|
||||
"meta_data": {
|
||||
"type": "INT32",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_first_shipto_date_sk:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 247,
|
||||
"total_compressed_size": 247,
|
||||
"data_page_offset": 1010,
|
||||
"statistics": {
|
||||
"max": 2452641,
|
||||
"min": 2449130,
|
||||
"null_count": 0,
|
||||
"max_value": 2452641,
|
||||
"min_value": 2449130
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9759,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9325,
|
||||
"column_index_length": 23,
|
||||
"crypto_metadata": 23
|
||||
},
|
||||
{
|
||||
"file_offset": 1257,
|
||||
"meta_data": {
|
||||
"type": "INT32",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_first_sales_date_sk:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 247,
|
||||
"total_compressed_size": 247,
|
||||
"data_page_offset": 1257,
|
||||
"statistics": {
|
||||
"max": 2452611,
|
||||
"min": 2449100,
|
||||
"null_count": 0,
|
||||
"max_value": 2452611,
|
||||
"min_value": 2449100
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9771,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9348,
|
||||
"column_index_length": 23,
|
||||
"crypto_metadata": 23
|
||||
},
|
||||
{
|
||||
"file_offset": 1504,
|
||||
"meta_data": {
|
||||
"type": "INT32",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_birth_day:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 131,
|
||||
"total_compressed_size": 131,
|
||||
"data_page_offset": 1504,
|
||||
"statistics": {
|
||||
"max": 30,
|
||||
"min": 1,
|
||||
"null_count": 0,
|
||||
"max_value": 30,
|
||||
"min_value": 1
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9783,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9371,
|
||||
"column_index_length": 23,
|
||||
"crypto_metadata": 23
|
||||
},
|
||||
{
|
||||
"file_offset": 1635,
|
||||
"meta_data": {
|
||||
"type": "INT32",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_birth_month:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 115,
|
||||
"total_compressed_size": 115,
|
||||
"data_page_offset": 1635,
|
||||
"statistics": {
|
||||
"max": 12,
|
||||
"min": 1,
|
||||
"null_count": 0,
|
||||
"max_value": 12,
|
||||
"min_value": 1
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9795,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9394,
|
||||
"column_index_length": 23,
|
||||
"crypto_metadata": 23
|
||||
},
|
||||
{
|
||||
"file_offset": 1750,
|
||||
"meta_data": {
|
||||
"type": "INT32",
|
||||
"encodings": [
|
||||
"DELTA_BINARY_PACKED"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_birth_year:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 144,
|
||||
"total_compressed_size": 144,
|
||||
"data_page_offset": 1750,
|
||||
"statistics": {
|
||||
"max": 1991,
|
||||
"min": 1925,
|
||||
"null_count": 0,
|
||||
"max_value": 1991,
|
||||
"min_value": 1925
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BINARY_PACKED",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9807,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9417,
|
||||
"column_index_length": 23,
|
||||
"crypto_metadata": 23
|
||||
},
|
||||
{
|
||||
"file_offset": 1894,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_customer_id:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 933,
|
||||
"total_compressed_size": 933,
|
||||
"data_page_offset": 1894,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "AAAAAAAAPFAAAAAA",
|
||||
"min_value": "AAAAAAAAABAAAAAA"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9819,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9440,
|
||||
"column_index_length": 47,
|
||||
"crypto_metadata": 47
|
||||
},
|
||||
{
|
||||
"file_offset": 2827,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_salutation:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 378,
|
||||
"total_compressed_size": 378,
|
||||
"data_page_offset": 2827,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "Sir",
|
||||
"min_value": "Dr."
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9831,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9487,
|
||||
"column_index_length": 21,
|
||||
"crypto_metadata": 21
|
||||
},
|
||||
{
|
||||
"file_offset": 3205,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_first_name:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 707,
|
||||
"total_compressed_size": 707,
|
||||
"data_page_offset": 3205,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "William",
|
||||
"min_value": "Albert"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9843,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9508,
|
||||
"column_index_length": 28,
|
||||
"crypto_metadata": 28
|
||||
},
|
||||
{
|
||||
"file_offset": 3912,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_last_name:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 751,
|
||||
"total_compressed_size": 751,
|
||||
"data_page_offset": 3912,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "Young",
|
||||
"min_value": "Baker"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9855,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9536,
|
||||
"column_index_length": 25,
|
||||
"crypto_metadata": 25
|
||||
},
|
||||
{
|
||||
"file_offset": 4663,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_preferred_cust_flag:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 154,
|
||||
"total_compressed_size": 154,
|
||||
"data_page_offset": 4663,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "Y",
|
||||
"min_value": "N"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9867,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9561,
|
||||
"column_index_length": 17,
|
||||
"crypto_metadata": 17
|
||||
},
|
||||
{
|
||||
"file_offset": 4817,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_birth_country:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 1154,
|
||||
"total_compressed_size": 1154,
|
||||
"data_page_offset": 4817,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "WALLIS AND FUTUNA",
|
||||
"min_value": "AFGHANISTAN"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9879,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9578,
|
||||
"column_index_length": 43,
|
||||
"crypto_metadata": 43
|
||||
},
|
||||
{
|
||||
"file_offset": 5971,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_email_address:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 2857,
|
||||
"total_compressed_size": 2857,
|
||||
"data_page_offset": 5971,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "William.Warner@zegnrzurU.org",
|
||||
"min_value": "Albert.Brunson@62.com"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9891,
|
||||
"offset_index_length": 12,
|
||||
"column_index_offset": 9621,
|
||||
"column_index_length": 64,
|
||||
"crypto_metadata": 64
|
||||
},
|
||||
{
|
||||
"file_offset": 8828,
|
||||
"meta_data": {
|
||||
"type": "BYTE_ARRAY",
|
||||
"encodings": [
|
||||
"DELTA_BYTE_ARRAY"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"c_last_review_date:"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 100,
|
||||
"total_uncompressed_size": 405,
|
||||
"total_compressed_size": 405,
|
||||
"data_page_offset": 8828,
|
||||
"statistics": {
|
||||
"null_count": 0,
|
||||
"max_value": "2452644",
|
||||
"min_value": "2452293"
|
||||
},
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": 3,
|
||||
"encoding": "DELTA_BYTE_ARRAY",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"offset_index_offset": 9903,
|
||||
"offset_index_length": 13,
|
||||
"column_index_offset": 9685,
|
||||
"column_index_length": 29,
|
||||
"crypto_metadata": 29
|
||||
}
|
||||
],
|
||||
"total_byte_size": 9229,
|
||||
"num_rows": 100,
|
||||
"file_offset": 4,
|
||||
"total_compressed_size": 9229,
|
||||
"ordinal": 0
|
||||
}
|
||||
],
|
||||
"key_value_metadata": [
|
||||
{
|
||||
"key": "org.apache.spark.version",
|
||||
"value": "3.2.0"
|
||||
},
|
||||
{
|
||||
"key": "org.apache.spark.sql.parquet.row.metadata",
|
||||
"value": "{\"type\":\"struct\",\"fields\":[{\"name\":\"c_customer_sk:\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_current_cdemo_sk:\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_current_hdemo_sk:\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_current_addr_sk:\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_first_shipto_date_sk:\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_first_sales_date_sk:\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_birth_day:\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_birth_month:\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_birth_year:\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_customer_id:\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_salutation:\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_first_name:\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_last_name:\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_preferred_cust_flag:\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_birth_country:\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_email_address:\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}},{\"name\":\"c_last_review_date:\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}}]}"
|
||||
}
|
||||
],
|
||||
"created_by": "parquet-mr version 1.12.1 (build 2a5c06c58fa987f85aa22170be14d927d5ff6e7d)",
|
||||
"metadata_length": 3604
|
||||
}
|
||||
BIN
test/files/delta_encoding_required_column.parquet
Normal file
BIN
test/files/delta_encoding_required_column.parquet
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user