Add another column to page_indexed test

This commit is contained in:
Kenny Daniel 2025-04-26 16:59:06 -07:00
parent fe85ba1184
commit b7db4653e7
No known key found for this signature in database
GPG Key ID: FDF16101AF5AFD3A
6 changed files with 502 additions and 345 deletions

@ -2,107 +2,33 @@
[
{
"boundary_order": "ASCENDING",
"max_values": [
"good",
"good",
"good",
"good",
"good",
"good",
"good",
"good",
"good",
"good"
],
"min_values": [
"bad",
"bad",
"bad",
"bad",
"bad",
"bad",
"bad",
"bad",
"bad",
"bad"
],
"null_counts": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"null_pages": [
false,
false,
false,
false,
false,
false,
false,
false,
false,
false
]
"max_values": [9, 19, 29, 39, 49, 59, 69, 79, 89, 99],
"min_values": [0, 10, 20, 30, 40, 50, 60, 70, 80, 90],
"null_counts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"null_pages": [false, false, false, false, false, false, false, false, false, false]
},
{
"boundary_order": "ASCENDING",
"max_values": ["good", "good", "good", "good", "good", "good", "good", "good", "good", "good"],
"min_values": ["bad", "bad", "bad", "bad", "bad", "bad", "bad", "bad", "bad", "bad"],
"null_counts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"null_pages": [false, false, false, false, false, false, false, false, false, false]
}
],
[
{
"boundary_order": "ASCENDING",
"max_values": [109, 119, 129, 139, 149, 159, 169, 179, 189, 199],
"min_values": [100, 110, 120, 130, 140, 150, 160, 170, 180, 190],
"null_counts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"null_pages": [false, false, false, false, false, false, false, false, false, false]
},
{
"boundary_order": "UNORDERED",
"max_values": [
"good",
"bad",
"good",
"bad",
"good",
"bad",
"good",
"good",
"bad",
"good"
],
"min_values": [
"bad",
"bad",
"bad",
"bad",
"bad",
"bad",
"bad",
"bad",
"bad",
"bad"
],
"null_counts": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"null_pages": [
false,
false,
false,
false,
false,
false,
false,
false,
false,
false
]
"max_values": ["good", "bad", "good", "bad", "good", "bad", "good", "good", "bad", "good"],
"min_values": ["bad", "bad", "bad", "bad", "bad", "bad", "bad", "bad", "bad", "bad"],
"null_counts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"null_pages": [false, false, false, false, false, false, false, false, false, false]
}
]
]

@ -1,202 +1,202 @@
[
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["good"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"],
["bad"]
[0, "bad"],
[1, "bad"],
[2, "bad"],
[3, "bad"],
[4, "bad"],
[5, "bad"],
[6, "good"],
[7, "bad"],
[8, "bad"],
[9, "bad"],
[10, "good"],
[11, "bad"],
[12, "bad"],
[13, "bad"],
[14, "bad"],
[15, "bad"],
[16, "bad"],
[17, "bad"],
[18, "bad"],
[19, "bad"],
[20, "bad"],
[21, "bad"],
[22, "bad"],
[23, "bad"],
[24, "bad"],
[25, "bad"],
[26, "bad"],
[27, "bad"],
[28, "bad"],
[29, "good"],
[30, "bad"],
[31, "bad"],
[32, "good"],
[33, "bad"],
[34, "bad"],
[35, "bad"],
[36, "bad"],
[37, "good"],
[38, "bad"],
[39, "bad"],
[40, "bad"],
[41, "bad"],
[42, "good"],
[43, "bad"],
[44, "bad"],
[45, "bad"],
[46, "bad"],
[47, "bad"],
[48, "bad"],
[49, "bad"],
[50, "bad"],
[51, "bad"],
[52, "bad"],
[53, "bad"],
[54, "bad"],
[55, "bad"],
[56, "good"],
[57, "bad"],
[58, "good"],
[59, "bad"],
[60, "bad"],
[61, "bad"],
[62, "bad"],
[63, "bad"],
[64, "bad"],
[65, "bad"],
[66, "bad"],
[67, "bad"],
[68, "good"],
[69, "bad"],
[70, "bad"],
[71, "bad"],
[72, "good"],
[73, "bad"],
[74, "bad"],
[75, "bad"],
[76, "bad"],
[77, "good"],
[78, "bad"],
[79, "bad"],
[80, "bad"],
[81, "bad"],
[82, "bad"],
[83, "good"],
[84, "bad"],
[85, "bad"],
[86, "bad"],
[87, "bad"],
[88, "bad"],
[89, "bad"],
[90, "bad"],
[91, "bad"],
[92, "bad"],
[93, "bad"],
[94, "bad"],
[95, "bad"],
[96, "bad"],
[97, "bad"],
[98, "good"],
[99, "bad"],
[100, "good"],
[101, "bad"],
[102, "bad"],
[103, "bad"],
[104, "bad"],
[105, "bad"],
[106, "bad"],
[107, "bad"],
[108, "bad"],
[109, "good"],
[110, "bad"],
[111, "bad"],
[112, "bad"],
[113, "bad"],
[114, "bad"],
[115, "bad"],
[116, "bad"],
[117, "bad"],
[118, "bad"],
[119, "bad"],
[120, "bad"],
[121, "bad"],
[122, "bad"],
[123, "bad"],
[124, "bad"],
[125, "bad"],
[126, "bad"],
[127, "bad"],
[128, "good"],
[129, "bad"],
[130, "bad"],
[131, "bad"],
[132, "bad"],
[133, "bad"],
[134, "bad"],
[135, "bad"],
[136, "bad"],
[137, "bad"],
[138, "bad"],
[139, "bad"],
[140, "bad"],
[141, "bad"],
[142, "bad"],
[143, "bad"],
[144, "bad"],
[145, "good"],
[146, "bad"],
[147, "bad"],
[148, "good"],
[149, "bad"],
[150, "bad"],
[151, "bad"],
[152, "bad"],
[153, "bad"],
[154, "bad"],
[155, "bad"],
[156, "bad"],
[157, "bad"],
[158, "bad"],
[159, "bad"],
[160, "bad"],
[161, "bad"],
[162, "bad"],
[163, "bad"],
[164, "good"],
[165, "bad"],
[166, "bad"],
[167, "bad"],
[168, "good"],
[169, "bad"],
[170, "bad"],
[171, "good"],
[172, "bad"],
[173, "bad"],
[174, "bad"],
[175, "bad"],
[176, "bad"],
[177, "bad"],
[178, "bad"],
[179, "bad"],
[180, "bad"],
[181, "bad"],
[182, "bad"],
[183, "bad"],
[184, "bad"],
[185, "bad"],
[186, "bad"],
[187, "bad"],
[188, "bad"],
[189, "bad"],
[190, "good"],
[191, "bad"],
[192, "bad"],
[193, "bad"],
[194, "bad"],
[195, "bad"],
[196, "bad"],
[197, "bad"],
[198, "bad"],
[199, "bad"]
]

@ -4,12 +4,17 @@
{
"repetition_type": "REQUIRED",
"name": "schema",
"num_children": 1
"num_children": 2
},
{
"type": "INT64",
"repetition_type": "OPTIONAL",
"name": "row"
},
{
"type": "BYTE_ARRAY",
"repetition_type": "OPTIONAL",
"name": "col",
"name": "quality",
"converted_type": "UTF8",
"logical_type": {
"type": "STRING"
@ -21,7 +26,50 @@
{
"columns": [
{
"file_offset": 338,
"file_offset": 0,
"meta_data": {
"type": "INT64",
"encodings": [
"PLAIN",
"RLE",
"RLE_DICTIONARY"
],
"path_in_schema": [
"row"
],
"codec": "SNAPPY",
"num_values": 100,
"total_uncompressed_size": 1197,
"total_compressed_size": 828,
"data_page_offset": 432,
"dictionary_page_offset": 4,
"statistics": {
"max": 99,
"min": 0,
"null_count": 0,
"max_value": 99,
"min_value": 0
},
"encoding_stats": [
{
"page_type": "DICTIONARY_PAGE",
"encoding": "PLAIN",
"count": 1
},
{
"page_type": "DATA_PAGE",
"encoding": "RLE_DICTIONARY",
"count": 10
}
]
},
"offset_index_offset": 2986,
"offset_index_length": 86,
"column_index_offset": 2326,
"column_index_length": 211
},
{
"file_offset": 0,
"meta_data": {
"type": "BYTE_ARRAY",
"encodings": [
@ -30,14 +78,14 @@
"RLE_DICTIONARY"
],
"path_in_schema": [
"col"
"quality"
],
"codec": "SNAPPY",
"num_values": 100,
"total_uncompressed_size": 312,
"total_compressed_size": 334,
"data_page_offset": 35,
"dictionary_page_offset": 4,
"data_page_offset": 863,
"dictionary_page_offset": 832,
"statistics": {
"null_count": 0,
"max_value": "good",
@ -56,22 +104,65 @@
}
]
},
"offset_index_offset": 1036,
"offset_index_length": 85,
"column_index_offset": 798,
"offset_index_offset": 3072,
"offset_index_length": 86,
"column_index_offset": 2537,
"column_index_length": 121
}
],
"total_byte_size": 312,
"total_byte_size": 1509,
"num_rows": 100,
"file_offset": 4,
"total_compressed_size": 334,
"total_compressed_size": 1162,
"ordinal": 0
},
{
"columns": [
{
"file_offset": 731,
"file_offset": 0,
"meta_data": {
"type": "INT64",
"encodings": [
"PLAIN",
"RLE",
"RLE_DICTIONARY"
],
"path_in_schema": [
"row"
],
"codec": "SNAPPY",
"num_values": 100,
"total_uncompressed_size": 1197,
"total_compressed_size": 832,
"data_page_offset": 1598,
"dictionary_page_offset": 1166,
"statistics": {
"max": 199,
"min": 100,
"null_count": 0,
"max_value": 199,
"min_value": 100
},
"encoding_stats": [
{
"page_type": "DICTIONARY_PAGE",
"encoding": "PLAIN",
"count": 1
},
{
"page_type": "DATA_PAGE",
"encoding": "RLE_DICTIONARY",
"count": 10
}
]
},
"offset_index_offset": 3158,
"offset_index_length": 86,
"column_index_offset": 2658,
"column_index_length": 211
},
{
"file_offset": 0,
"meta_data": {
"type": "BYTE_ARRAY",
"encodings": [
@ -80,14 +171,14 @@
"RLE_DICTIONARY"
],
"path_in_schema": [
"col"
"quality"
],
"codec": "SNAPPY",
"num_values": 100,
"total_uncompressed_size": 306,
"total_compressed_size": 328,
"data_page_offset": 434,
"dictionary_page_offset": 403,
"data_page_offset": 2029,
"dictionary_page_offset": 1998,
"statistics": {
"null_count": 0,
"max_value": "good",
@ -106,25 +197,25 @@
}
]
},
"offset_index_offset": 1121,
"offset_index_offset": 3244,
"offset_index_length": 86,
"column_index_offset": 919,
"column_index_offset": 2869,
"column_index_length": 117
}
],
"total_byte_size": 306,
"total_byte_size": 1503,
"num_rows": 100,
"file_offset": 403,
"total_compressed_size": 328,
"file_offset": 1166,
"total_compressed_size": 1160,
"ordinal": 1
}
],
"key_value_metadata": [
{
"key": "ARROW:schema",
"value": "/////3AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAEAAAAUAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAEFEAAAABgAAAAEAAAAAAAAAAMAAABjb2wABAAEAAQAAAAAAAAA"
"value": "/////6gAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAIAAABEAAAABAAAANT///8AAAEFEAAAABwAAAAEAAAAAAAAAAcAAABxdWFsaXR5AAQABAAEAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAECEAAAABwAAAAEAAAAAAAAAAMAAAByb3cACAAMAAgABwAIAAAAAAAAAUAAAAA="
}
],
"created_by": "parquet-cpp-arrow version 15.0.0",
"metadata_length": 447
"created_by": "parquet-cpp-arrow version 19.0.1",
"metadata_length": 761
}

@ -1,112 +1,220 @@
[
[
{
"page_locations": [
{
"compressed_page_size": 36,
"first_row_index": 0,
"offset": 432
},
{
"compressed_page_size": 38,
"first_row_index": 10,
"offset": 468
},
{
"compressed_page_size": 38,
"first_row_index": 20,
"offset": 506
},
{
"compressed_page_size": 40,
"first_row_index": 30,
"offset": 544
},
{
"compressed_page_size": 40,
"first_row_index": 40,
"offset": 584
},
{
"compressed_page_size": 40,
"first_row_index": 50,
"offset": 624
},
{
"compressed_page_size": 42,
"first_row_index": 60,
"offset": 664
},
{
"compressed_page_size": 42,
"first_row_index": 70,
"offset": 706
},
{
"compressed_page_size": 42,
"first_row_index": 80,
"offset": 748
},
{
"compressed_page_size": 42,
"first_row_index": 90,
"offset": 790
}
]
},
{
"page_locations": [
{
"compressed_page_size": 30,
"first_row_index": 0,
"offset": 35
"offset": 863
},
{
"compressed_page_size": 30,
"first_row_index": 10,
"offset": 65
"offset": 893
},
{
"compressed_page_size": 31,
"first_row_index": 20,
"offset": 95
"offset": 923
},
{
"compressed_page_size": 30,
"first_row_index": 30,
"offset": 126
"offset": 954
},
{
"compressed_page_size": 30,
"first_row_index": 40,
"offset": 156
"offset": 984
},
{
"compressed_page_size": 30,
"first_row_index": 50,
"offset": 186
"offset": 1014
},
{
"compressed_page_size": 31,
"first_row_index": 60,
"offset": 216
"offset": 1044
},
{
"compressed_page_size": 30,
"first_row_index": 70,
"offset": 247
"offset": 1075
},
{
"compressed_page_size": 30,
"first_row_index": 80,
"offset": 277
"offset": 1105
},
{
"compressed_page_size": 31,
"first_row_index": 90,
"offset": 307
"offset": 1135
}
]
}
],
[
{
"page_locations": [
{
"compressed_page_size": 36,
"first_row_index": 0,
"offset": 1598
},
{
"compressed_page_size": 38,
"first_row_index": 10,
"offset": 1634
},
{
"compressed_page_size": 38,
"first_row_index": 20,
"offset": 1672
},
{
"compressed_page_size": 40,
"first_row_index": 30,
"offset": 1710
},
{
"compressed_page_size": 40,
"first_row_index": 40,
"offset": 1750
},
{
"compressed_page_size": 40,
"first_row_index": 50,
"offset": 1790
},
{
"compressed_page_size": 42,
"first_row_index": 60,
"offset": 1830
},
{
"compressed_page_size": 42,
"first_row_index": 70,
"offset": 1872
},
{
"compressed_page_size": 42,
"first_row_index": 80,
"offset": 1914
},
{
"compressed_page_size": 42,
"first_row_index": 90,
"offset": 1956
}
]
},
{
"page_locations": [
{
"compressed_page_size": 30,
"first_row_index": 0,
"offset": 434
"offset": 2029
},
{
"compressed_page_size": 29,
"first_row_index": 10,
"offset": 464
"offset": 2059
},
{
"compressed_page_size": 31,
"first_row_index": 20,
"offset": 493
"offset": 2088
},
{
"compressed_page_size": 29,
"first_row_index": 30,
"offset": 524
"offset": 2119
},
{
"compressed_page_size": 30,
"first_row_index": 40,
"offset": 553
"offset": 2148
},
{
"compressed_page_size": 29,
"first_row_index": 50,
"offset": 583
"offset": 2178
},
{
"compressed_page_size": 30,
"first_row_index": 60,
"offset": 612
"offset": 2207
},
{
"compressed_page_size": 30,
"first_row_index": 70,
"offset": 642
"offset": 2237
},
{
"compressed_page_size": 29,
"first_row_index": 80,
"offset": 672
"offset": 2267
},
{
"compressed_page_size": 30,
"first_row_index": 90,
"offset": 701
"offset": 2296
}
]
}

Binary file not shown.

@ -178,13 +178,13 @@ describe('parquetRead', () => {
rowStart: 90,
rowEnd: 91,
})
expect(rows).toEqual([{ col: 'bad' }])
expect(convertWithDictionary).toHaveBeenCalledTimes(2)
expect(rows).toEqual([{ row: 90n, quality: 'bad' }])
expect(convertWithDictionary).toHaveBeenCalledTimes(4)
})
it('reads individual pages', async () => {
const file = await asyncBufferFromFile('test/files/page_indexed.parquet')
/** @type {import('../src/types.js').ColumnData[]} */
const file = countingBuffer(await asyncBufferFromFile('test/files/page_indexed.parquet'))
/** @type {ColumnData[]} */
const pages = []
await parquetRead({
@ -196,7 +196,13 @@ describe('parquetRead', () => {
expect(pages).toEqual([
{
columnName: 'col',
columnName: 'row',
columnData: Array.from({ length: 100 }, (_, i) => BigInt(i)),
rowStart: 0,
rowEnd: 100,
},
{
columnName: 'quality',
columnData: [
'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good', 'bad', 'bad', 'bad',
'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',
@ -213,7 +219,13 @@ describe('parquetRead', () => {
rowEnd: 100,
},
{
columnName: 'col',
columnName: 'row',
columnData: Array.from({ length: 100 }, (_, i) => BigInt(i + 100)),
rowStart: 100,
rowEnd: 200,
},
{
columnName: 'quality',
columnData: [
'good', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'good',
'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad', 'bad',
@ -230,5 +242,25 @@ describe('parquetRead', () => {
rowEnd: 200,
},
])
expect(file.fetches).toBe(3) // 1 metadata, 2 rowgroups
})
})
/**
* Wraps an AsyncBuffer to count the number of fetches made
*
* @import {AsyncBuffer, ColumnData} from '../src/types.js'
* @param {AsyncBuffer} asyncBuffer
* @returns {AsyncBuffer & {fetches: number}}
*/
function countingBuffer(asyncBuffer) {
return {
...asyncBuffer,
fetches: 0,
slice(start, end) {
this.fetches++
return asyncBuffer.slice(start, end)
},
}
}