mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Skip plan for files with no rows (#98)
This commit is contained in:
parent
3e3ddf343a
commit
ee192054b2
@ -26,7 +26,7 @@ export function parquetPlan({ metadata, rowStart = 0, rowEnd = Infinity, columns
|
||||
const groupRows = Number(rowGroup.num_rows)
|
||||
const groupEnd = groupStart + groupRows
|
||||
// if row group overlaps with row range, add it to the plan
|
||||
if (groupEnd >= rowStart && groupStart < rowEnd) {
|
||||
if (groupRows > 0 && groupEnd >= rowStart && groupStart < rowEnd) {
|
||||
/** @type {ByteRange[]} */
|
||||
const ranges = []
|
||||
// loop through each column chunk
|
||||
|
||||
1
test/files/issue97.json
Normal file
1
test/files/issue97.json
Normal file
@ -0,0 +1 @@
|
||||
[]
|
||||
87
test/files/issue97.metadata.json
Normal file
87
test/files/issue97.metadata.json
Normal file
@ -0,0 +1,87 @@
|
||||
{
|
||||
"version": 2,
|
||||
"schema": [
|
||||
{
|
||||
"repetition_type": "REQUIRED",
|
||||
"name": "schema",
|
||||
"num_children": 2
|
||||
},
|
||||
{
|
||||
"type": "DOUBLE",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "a"
|
||||
},
|
||||
{
|
||||
"type": "BOOLEAN",
|
||||
"repetition_type": "OPTIONAL",
|
||||
"name": "b"
|
||||
}
|
||||
],
|
||||
"num_rows": 0,
|
||||
"row_groups": [
|
||||
{
|
||||
"columns": [
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "DOUBLE",
|
||||
"encodings": [
|
||||
"PLAIN",
|
||||
"RLE"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"a"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 0,
|
||||
"total_uncompressed_size": 14,
|
||||
"total_compressed_size": 14,
|
||||
"data_page_offset": 0,
|
||||
"dictionary_page_offset": 4,
|
||||
"encoding_stats": [
|
||||
{
|
||||
"page_type": "DICTIONARY_PAGE",
|
||||
"encoding": "PLAIN",
|
||||
"count": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"file_offset": 0,
|
||||
"meta_data": {
|
||||
"type": "BOOLEAN",
|
||||
"encodings": [
|
||||
"RLE"
|
||||
],
|
||||
"path_in_schema": [
|
||||
"b"
|
||||
],
|
||||
"codec": "UNCOMPRESSED",
|
||||
"num_values": 0,
|
||||
"total_uncompressed_size": 0,
|
||||
"total_compressed_size": 0,
|
||||
"data_page_offset": 0,
|
||||
"encoding_stats": []
|
||||
}
|
||||
}
|
||||
],
|
||||
"total_byte_size": 14,
|
||||
"num_rows": 0,
|
||||
"file_offset": 4,
|
||||
"total_compressed_size": 14
|
||||
}
|
||||
],
|
||||
"key_value_metadata": [
|
||||
{
|
||||
"key": "pandas",
|
||||
"value": "{\"index_columns\": [{\"kind\": \"range\", \"name\": null, \"start\": 0, \"stop\": 0, \"step\": 1}], \"column_indexes\": [{\"name\": null, \"field_name\": null, \"pandas_type\": \"unicode\", \"numpy_type\": \"object\", \"metadata\": {\"encoding\": \"UTF-8\"}}], \"columns\": [{\"name\": \"a\", \"field_name\": \"a\", \"pandas_type\": \"float64\", \"numpy_type\": \"float64\", \"metadata\": null}, {\"name\": \"b\", \"field_name\": \"b\", \"pandas_type\": \"bool\", \"numpy_type\": \"bool\", \"metadata\": null}], \"creator\": {\"library\": \"pyarrow\", \"version\": \"20.0.0\"}, \"pandas_version\": \"2.3.0\"}"
|
||||
},
|
||||
{
|
||||
"key": "ARROW:schema",
|
||||
"value": "/////+ACAAAQAAAAAAAKAA4ABgAFAAgACgAAAAABBAAQAAAAAAAKAAwAAAAEAAgACgAAAEACAAAEAAAAAQAAAAwAAAAIAAwABAAIAAgAAAAYAgAABAAAAAsCAAB7ImluZGV4X2NvbHVtbnMiOiBbeyJraW5kIjogInJhbmdlIiwgIm5hbWUiOiBudWxsLCAic3RhcnQiOiAwLCAic3RvcCI6IDAsICJzdGVwIjogMX1dLCAiY29sdW1uX2luZGV4ZXMiOiBbeyJuYW1lIjogbnVsbCwgImZpZWxkX25hbWUiOiBudWxsLCAicGFuZGFzX3R5cGUiOiAidW5pY29kZSIsICJudW1weV90eXBlIjogIm9iamVjdCIsICJtZXRhZGF0YSI6IHsiZW5jb2RpbmciOiAiVVRGLTgifX1dLCAiY29sdW1ucyI6IFt7Im5hbWUiOiAiYSIsICJmaWVsZF9uYW1lIjogImEiLCAicGFuZGFzX3R5cGUiOiAiZmxvYXQ2NCIsICJudW1weV90eXBlIjogImZsb2F0NjQiLCAibWV0YWRhdGEiOiBudWxsfSwgeyJuYW1lIjogImIiLCAiZmllbGRfbmFtZSI6ICJiIiwgInBhbmRhc190eXBlIjogImJvb2wiLCAibnVtcHlfdHlwZSI6ICJib29sIiwgIm1ldGFkYXRhIjogbnVsbH1dLCAiY3JlYXRvciI6IHsibGlicmFyeSI6ICJweWFycm93IiwgInZlcnNpb24iOiAiMjAuMC4wIn0sICJwYW5kYXNfdmVyc2lvbiI6ICIyLjMuMCJ9AAYAAABwYW5kYXMAAAIAAABAAAAABAAAANj///8AAAEGEAAAABgAAAAEAAAAAAAAAAEAAABiAAAABAAEAAQAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAGAAAAAQAAAAAAAAAAQAAAGEABgAIAAYABgAAAAAAAgAAAAAA"
|
||||
}
|
||||
],
|
||||
"created_by": "parquet-cpp-arrow version 20.0.0",
|
||||
"metadata_length": 1700
|
||||
}
|
||||
BIN
test/files/issue97.parquet
Normal file
BIN
test/files/issue97.parquet
Normal file
Binary file not shown.
@ -37,8 +37,14 @@ describe('parquetRead test files', () => {
|
||||
rowEnd: numRows,
|
||||
onComplete(rows) {
|
||||
const base = filename.replace('.parquet', '')
|
||||
const expected = [fileToJson(`test/files/${base}.json`).at(-1)]
|
||||
expect(toJson(rows)).toEqual(expected)
|
||||
if (filename === 'issue97.parquet') {
|
||||
// issue97 has no rows
|
||||
const expected = fileToJson(`test/files/${base}.json`)
|
||||
expect(toJson(rows)).toEqual(expected)
|
||||
} else {
|
||||
const expected = [fileToJson(`test/files/${base}.json`).at(-1)]
|
||||
expect(toJson(rows)).toEqual(expected)
|
||||
}
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user