From ee192054b208a375be7a928e86c7ff11e5a36678 Mon Sep 17 00:00:00 2001 From: kroche98 Date: Wed, 2 Jul 2025 18:46:32 -0400 Subject: [PATCH] Skip plan for files with no rows (#98) --- src/plan.js | 2 +- test/files/issue97.json | 1 + test/files/issue97.metadata.json | 87 +++++++++++++++++++++++++++++++ test/files/issue97.parquet | Bin 0 -> 1726 bytes test/readFiles.test.js | 10 +++- 5 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 test/files/issue97.json create mode 100644 test/files/issue97.metadata.json create mode 100644 test/files/issue97.parquet diff --git a/src/plan.js b/src/plan.js index a7ca7c8..c8d9d1b 100644 --- a/src/plan.js +++ b/src/plan.js @@ -26,7 +26,7 @@ export function parquetPlan({ metadata, rowStart = 0, rowEnd = Infinity, columns const groupRows = Number(rowGroup.num_rows) const groupEnd = groupStart + groupRows // if row group overlaps with row range, add it to the plan - if (groupEnd >= rowStart && groupStart < rowEnd) { + if (groupRows > 0 && groupEnd >= rowStart && groupStart < rowEnd) { /** @type {ByteRange[]} */ const ranges = [] // loop through each column chunk diff --git a/test/files/issue97.json b/test/files/issue97.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/files/issue97.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/files/issue97.metadata.json b/test/files/issue97.metadata.json new file mode 100644 index 0000000..0835543 --- /dev/null +++ b/test/files/issue97.metadata.json @@ -0,0 +1,87 @@ +{ + "version": 2, + "schema": [ + { + "repetition_type": "REQUIRED", + "name": "schema", + "num_children": 2 + }, + { + "type": "DOUBLE", + "repetition_type": "OPTIONAL", + "name": "a" + }, + { + "type": "BOOLEAN", + "repetition_type": "OPTIONAL", + "name": "b" + } + ], + "num_rows": 0, + "row_groups": [ + { + "columns": [ + { + "file_offset": 0, + "meta_data": { + "type": "DOUBLE", + "encodings": [ + "PLAIN", + "RLE" + ], + "path_in_schema": [ + "a" + ], + "codec": "UNCOMPRESSED", + "num_values": 0, + "total_uncompressed_size": 14, + "total_compressed_size": 14, + "data_page_offset": 0, + "dictionary_page_offset": 4, + "encoding_stats": [ + { + "page_type": "DICTIONARY_PAGE", + "encoding": "PLAIN", + "count": 1 + } + ] + } + }, + { + "file_offset": 0, + "meta_data": { + "type": "BOOLEAN", + "encodings": [ + "RLE" + ], + "path_in_schema": [ + "b" + ], + "codec": "UNCOMPRESSED", + "num_values": 0, + "total_uncompressed_size": 0, + "total_compressed_size": 0, + "data_page_offset": 0, + "encoding_stats": [] + } + } + ], + "total_byte_size": 14, + "num_rows": 0, + "file_offset": 4, + "total_compressed_size": 14 + } + ], + "key_value_metadata": [ + { + "key": "pandas", + "value": "{\"index_columns\": [{\"kind\": \"range\", \"name\": null, \"start\": 0, \"stop\": 0, \"step\": 1}], \"column_indexes\": [{\"name\": null, \"field_name\": null, \"pandas_type\": \"unicode\", \"numpy_type\": \"object\", \"metadata\": {\"encoding\": \"UTF-8\"}}], \"columns\": [{\"name\": \"a\", \"field_name\": \"a\", \"pandas_type\": \"float64\", \"numpy_type\": \"float64\", \"metadata\": null}, {\"name\": \"b\", \"field_name\": \"b\", \"pandas_type\": \"bool\", \"numpy_type\": \"bool\", \"metadata\": null}], \"creator\": {\"library\": \"pyarrow\", \"version\": \"20.0.0\"}, \"pandas_version\": \"2.3.0\"}" + }, + { + "key": "ARROW:schema", + "value": "/////+ACAAAQAAAAAAAKAA4ABgAFAAgACgAAAAABBAAQAAAAAAAKAAwAAAAEAAgACgAAAEACAAAEAAAAAQAAAAwAAAAIAAwABAAIAAgAAAAYAgAABAAAAAsCAAB7ImluZGV4X2NvbHVtbnMiOiBbeyJraW5kIjogInJhbmdlIiwgIm5hbWUiOiBudWxsLCAic3RhcnQiOiAwLCAic3RvcCI6IDAsICJzdGVwIjogMX1dLCAiY29sdW1uX2luZGV4ZXMiOiBbeyJuYW1lIjogbnVsbCwgImZpZWxkX25hbWUiOiBudWxsLCAicGFuZGFzX3R5cGUiOiAidW5pY29kZSIsICJudW1weV90eXBlIjogIm9iamVjdCIsICJtZXRhZGF0YSI6IHsiZW5jb2RpbmciOiAiVVRGLTgifX1dLCAiY29sdW1ucyI6IFt7Im5hbWUiOiAiYSIsICJmaWVsZF9uYW1lIjogImEiLCAicGFuZGFzX3R5cGUiOiAiZmxvYXQ2NCIsICJudW1weV90eXBlIjogImZsb2F0NjQiLCAibWV0YWRhdGEiOiBudWxsfSwgeyJuYW1lIjogImIiLCAiZmllbGRfbmFtZSI6ICJiIiwgInBhbmRhc190eXBlIjogImJvb2wiLCAibnVtcHlfdHlwZSI6ICJib29sIiwgIm1ldGFkYXRhIjogbnVsbH1dLCAiY3JlYXRvciI6IHsibGlicmFyeSI6ICJweWFycm93IiwgInZlcnNpb24iOiAiMjAuMC4wIn0sICJwYW5kYXNfdmVyc2lvbiI6ICIyLjMuMCJ9AAYAAABwYW5kYXMAAAIAAABAAAAABAAAANj///8AAAEGEAAAABgAAAAEAAAAAAAAAAEAAABiAAAABAAEAAQAAAAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAGAAAAAQAAAAAAAAAAQAAAGEABgAIAAYABgAAAAAAAgAAAAAA" + } + ], + "created_by": "parquet-cpp-arrow version 20.0.0", + "metadata_length": 1700 +} \ No newline at end of file diff --git a/test/files/issue97.parquet b/test/files/issue97.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a1ba6ab1c82d68c74203daf91672b49693646bc2 GIT binary patch literal 1726 zcmZuy>yFw+6rM(_Z58ELB~~Eu16j&SQP~vCvJFxHj0rZT3klxDx2jrsYy$@0;tO79 zwNKF3=!5l9dS=X0*z5qFbDQ&hbIy$Ugq3dXq=stfnm&F;sCLr$y^rdrsl9Rot9F8F zFJHf?|6qZa&IFyIMzhgAN6p&H#%pxi0ACG^XU((bIXeI8O_Sw&fe@lGs8ORKgoQxZ zIg(!?8VsRlP1I=DPh%@|thE03$vx+Vj(c}&M}8iJDfhnh=RLOp8%SJYg-e%fx46&> zTrh;W?}L_RR+53*VR97jMVE@*hrhsH<-29c-Cf$_EDO)|o!etuMe8;zV+zORp=U=< z#UKyja_@-9+O;#vEO0Z+u`&z7?>RSwa4%fawO>Aq7w@=-12V@fIEyYnB2k&1a9H?} zm0ex3Djab*a2$}JC3aVb(eAM1o$FTnIkH6CK3m#&O=kTNB z4~#Fdu*4$9OFUdMn;;zd3u+wg{Uf#>bQmlP^UD|)7}mkek{EOX9YzXx!f$fm=cc4y z>ild=#%e~wsrS(nh+9q)OY3jsb+nYj$%+JyFMGvO4*DyieW5YAqur&~L+sf-Wo3tR zu;F5-Z|$LcC4a!FJe+)UB(AaY^<&7tF>hpw4II0Ls}4G@F0j0EFz~EET{}bO&rDre zL5_}b1AND+XKMX5;gy&KHp{1~igf*X=`Egcw@Zi@GpOGqFW6Uw8CaT{n&M!;7daSt ztS-;+HG{jYq0jkQ#pfBmW=eRmGh5GD4x*_YLsM3cG}`xIaZ@azUk7+Hi!}q^Cz7%t zftW!(uAm+s>tQHBKcQ#c!@5kigfA+LP|fVIzi`HW@wkpa&#L}+eMb^E2Gnier}3_@ zy@?O@t?gB{B$Dshfmpf~cHwGbX$ON|#liILa26B(lGS;-#`$!3S;%3B_N_4B>>2uO z;RI@F^Zu65oI|<1UQZ!zGQik?Gc4@lrXa%+-<@oBzGrK=RPVqxlGvqy%S09Ws0I}U zkIE1a3cECOEX?Uek}$UMhyR^af{-Z-0w-12b1wo4KmZAY&ehn>sNRaL { rowEnd: numRows, onComplete(rows) { const base = filename.replace('.parquet', '') - const expected = [fileToJson(`test/files/${base}.json`).at(-1)] - expect(toJson(rows)).toEqual(expected) + if (filename === 'issue97.parquet') { + // issue97 has no rows + const expected = fileToJson(`test/files/${base}.json`) + expect(toJson(rows)).toEqual(expected) + } else { + const expected = [fileToJson(`test/files/${base}.json`).at(-1)] + expect(toJson(rows)).toEqual(expected) + } }, }) })