From 3583aeb549a09f65a8f770fdee538f648b4cedcf Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Sun, 28 Apr 2024 19:22:27 -0700 Subject: [PATCH] nullable.impala.parquet --- test/files/nullable.impala.json | 58 +++ test/files/nullable.impala.metadata.json | 591 +++++++++++++++++++++++ test/files/nullable.impala.parquet | Bin 0 -> 3896 bytes 3 files changed, 649 insertions(+) create mode 100644 test/files/nullable.impala.json create mode 100644 test/files/nullable.impala.metadata.json create mode 100644 test/files/nullable.impala.parquet diff --git a/test/files/nullable.impala.json b/test/files/nullable.impala.json new file mode 100644 index 0000000..0ff1a46 --- /dev/null +++ b/test/files/nullable.impala.json @@ -0,0 +1,58 @@ +[ + [ + 1, + [1, 2, 3], + [[1, 2], [3, 4]], + {"k1": 1, "k2": 100}, + [{"k1": 1}], + {"A":1,"b":[1],"C":{"d":[[{"E":10,"F":"aaa"},{"E":-10,"F":"bbb"}],[{"E":11,"F":"c"}]]},"g":{"foo":{"H":{"i":[1.1]}}}} + ], + [ + 2, + [null, 1, 2, null, 3, null], + [[null, 1, 2, null], [3, null, 4], [], null], + {"k1": 2, "k2": null}, + [{"k1": 1, "k3": null}, null, {}], + {"A":null,"b":[null],"C":{"d":[[{"E":null,"F":null},{"E":10,"F":"aaa"},{"E":null,"F":null},{"E":-10,"F":"bbb"},{"E":null,"F":null}],[{"E":11,"F":"c"},{}],[],null]},"g":{"g1":{"H":{"i":[2.2,null]}},"g2":{"H":{"i":[]}},"g3":{},"g4":{"H":{}},"g5":{"H":{}}}} + ], + [ + 3, + [], + [null], + {}, + [null, null], + {"A":null,"C":{"d":[]},"g":{}} + ], + [ + 4, + null, + [], + {}, + [], + {"A":null,"C":{}} + ], + [ + 5, + null, + null, + {}, + null, + {"A":null,"C":{},"g":{"foo":{"H":{"i":[2.2,3.3]}}}} + ], + [ + 6, + null, + null, + null, + null, + {} + ], + [ + 7, + null, + [null, [5, 6]], + {"k1": null, "k3": null}, + null, + {"A":7,"b":[2,3,null],"C":{"d":[[],[{}],null]}} + ] +] diff --git a/test/files/nullable.impala.metadata.json b/test/files/nullable.impala.metadata.json new file mode 100644 index 0000000..79229af --- /dev/null +++ b/test/files/nullable.impala.metadata.json @@ -0,0 +1,591 @@ +{ + "version": 1, + "schema": [ + { + "name": "org.apache.impala.ComplexTypesTbl", + "num_children": 6 + }, + { + "type": "INT64", + "repetition_type": "OPTIONAL", + "name": "id" + }, + { + "repetition_type": "OPTIONAL", + "name": "int_array", + "num_children": 1, + "converted_type": "LIST" + }, + { + "repetition_type": "REPEATED", + "name": "list", + "num_children": 1 + }, + { + "type": "INT32", + "repetition_type": "OPTIONAL", + "name": "element" + }, + { + "repetition_type": "OPTIONAL", + "name": "int_array_Array", + "num_children": 1, + "converted_type": "LIST" + }, + { + "repetition_type": "REPEATED", + "name": "list", + "num_children": 1 + }, + { + "repetition_type": "OPTIONAL", + "name": "element", + "num_children": 1, + "converted_type": "LIST" + }, + { + "repetition_type": "REPEATED", + "name": "list", + "num_children": 1 + }, + { + "type": "INT32", + "repetition_type": "OPTIONAL", + "name": "element" + }, + { + "repetition_type": "OPTIONAL", + "name": "int_map", + "num_children": 1, + "converted_type": "MAP" + }, + { + "repetition_type": "REPEATED", + "name": "map", + "num_children": 2, + "converted_type": "MAP_KEY_VALUE" + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "REQUIRED", + "name": "key", + "converted_type": "UTF8" + }, + { + "type": "INT32", + "repetition_type": "OPTIONAL", + "name": "value" + }, + { + "repetition_type": "OPTIONAL", + "name": "int_Map_Array", + "num_children": 1, + "converted_type": "LIST" + }, + { + "repetition_type": "REPEATED", + "name": "list", + "num_children": 1 + }, + { + "repetition_type": "OPTIONAL", + "name": "element", + "num_children": 1, + "converted_type": "MAP" + }, + { + "repetition_type": "REPEATED", + "name": "map", + "num_children": 2, + "converted_type": "MAP_KEY_VALUE" + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "REQUIRED", + "name": "key", + "converted_type": "UTF8" + }, + { + "type": "INT32", + "repetition_type": "OPTIONAL", + "name": "value" + }, + { + "repetition_type": "OPTIONAL", + "name": "nested_struct", + "num_children": 4 + }, + { + "type": "INT32", + "repetition_type": "OPTIONAL", + "name": "A" + }, + { + "repetition_type": "OPTIONAL", + "name": "b", + "num_children": 1, + "converted_type": "LIST" + }, + { + "repetition_type": "REPEATED", + "name": "list", + "num_children": 1 + }, + { + "type": "INT32", + "repetition_type": "OPTIONAL", + "name": "element" + }, + { + "repetition_type": "OPTIONAL", + "name": "C", + "num_children": 1 + }, + { + "repetition_type": "OPTIONAL", + "name": "d", + "num_children": 1, + "converted_type": "LIST" + }, + { + "repetition_type": "REPEATED", + "name": "list", + "num_children": 1 + }, + { + "repetition_type": "OPTIONAL", + "name": "element", + "num_children": 1, + "converted_type": "LIST" + }, + { + "repetition_type": "REPEATED", + "name": "list", + "num_children": 1 + }, + { + "repetition_type": "OPTIONAL", + "name": "element", + "num_children": 2 + }, + { + "type": "INT32", + "repetition_type": "OPTIONAL", + "name": "E" + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "OPTIONAL", + "name": "F", + "converted_type": "UTF8" + }, + { + "repetition_type": "OPTIONAL", + "name": "g", + "num_children": 1, + "converted_type": "MAP" + }, + { + "repetition_type": "REPEATED", + "name": "map", + "num_children": 2, + "converted_type": "MAP_KEY_VALUE" + }, + { + "type": "BYTE_ARRAY", + "repetition_type": "REQUIRED", + "name": "key", + "converted_type": "UTF8" + }, + { + "repetition_type": "OPTIONAL", + "name": "value", + "num_children": 1 + }, + { + "repetition_type": "OPTIONAL", + "name": "H", + "num_children": 1 + }, + { + "repetition_type": "OPTIONAL", + "name": "i", + "num_children": 1, + "converted_type": "LIST" + }, + { + "repetition_type": "REPEATED", + "name": "list", + "num_children": 1 + }, + { + "type": "DOUBLE", + "repetition_type": "OPTIONAL", + "name": "element" + } + ], + "num_rows": 7, + "row_groups": [ + { + "columns": [ + { + "file_offset": 4, + "meta_data": { + "type": "INT64", + "encodings": [ + "BIT_PACKED", + "PLAIN", + "RLE" + ], + "path_in_schema": [ + "id" + ], + "codec": "UNCOMPRESSED", + "num_values": 7, + "total_uncompressed_size": 103, + "total_compressed_size": 103, + "data_page_offset": 4, + "statistics": { + "max": 7, + "min": 1, + "null_count": 0 + } + } + }, + { + "file_offset": 107, + "meta_data": { + "type": "INT32", + "encodings": [ + "RLE", + "PLAIN_DICTIONARY" + ], + "path_in_schema": [ + "int_array", + "list", + "element" + ], + "codec": "UNCOMPRESSED", + "num_values": 14, + "total_uncompressed_size": 78, + "total_compressed_size": 78, + "data_page_offset": 107, + "statistics": { + "max": 3, + "min": 1, + "null_count": 8 + } + } + }, + { + "file_offset": 185, + "meta_data": { + "type": "INT32", + "encodings": [ + "RLE", + "PLAIN_DICTIONARY" + ], + "path_in_schema": [ + "int_array_Array", + "list", + "element", + "list", + "element" + ], + "codec": "UNCOMPRESSED", + "num_values": 20, + "total_uncompressed_size": 103, + "total_compressed_size": 103, + "data_page_offset": 185, + "statistics": { + "max": 6, + "min": 1, + "null_count": 10 + } + } + }, + { + "file_offset": 288, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "RLE", + "PLAIN_DICTIONARY" + ], + "path_in_schema": [ + "int_map", + "map", + "key" + ], + "codec": "UNCOMPRESSED", + "num_values": 10, + "total_uncompressed_size": 80, + "total_compressed_size": 80, + "data_page_offset": 288, + "statistics": { + "max": "k3", + "min": "k1", + "null_count": 4 + } + } + }, + { + "file_offset": 368, + "meta_data": { + "type": "INT32", + "encodings": [ + "PLAIN", + "RLE" + ], + "path_in_schema": [ + "int_map", + "map", + "value" + ], + "codec": "UNCOMPRESSED", + "num_values": 10, + "total_uncompressed_size": 61, + "total_compressed_size": 61, + "data_page_offset": 368, + "statistics": { + "max": 100, + "min": 1, + "null_count": 7 + } + } + }, + { + "file_offset": 429, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "RLE", + "PLAIN_DICTIONARY" + ], + "path_in_schema": [ + "int_Map_Array", + "list", + "element", + "map", + "key" + ], + "codec": "UNCOMPRESSED", + "num_values": 11, + "total_uncompressed_size": 77, + "total_compressed_size": 77, + "data_page_offset": 429, + "statistics": { + "max": "k3", + "min": "k1", + "null_count": 8 + } + } + }, + { + "file_offset": 506, + "meta_data": { + "type": "INT32", + "encodings": [ + "RLE", + "PLAIN_DICTIONARY" + ], + "path_in_schema": [ + "int_Map_Array", + "list", + "element", + "map", + "value" + ], + "codec": "UNCOMPRESSED", + "num_values": 11, + "total_uncompressed_size": 72, + "total_compressed_size": 72, + "data_page_offset": 506, + "statistics": { + "max": 1, + "min": 1, + "null_count": 9 + } + } + }, + { + "file_offset": 578, + "meta_data": { + "type": "INT32", + "encodings": [ + "BIT_PACKED", + "PLAIN", + "RLE" + ], + "path_in_schema": [ + "nested_struct", + "A" + ], + "codec": "UNCOMPRESSED", + "num_values": 7, + "total_uncompressed_size": 48, + "total_compressed_size": 48, + "data_page_offset": 578, + "statistics": { + "max": 7, + "min": 1, + "null_count": 5 + } + } + }, + { + "file_offset": 626, + "meta_data": { + "type": "INT32", + "encodings": [ + "PLAIN", + "RLE" + ], + "path_in_schema": [ + "nested_struct", + "b", + "list", + "element" + ], + "codec": "UNCOMPRESSED", + "num_values": 9, + "total_uncompressed_size": 63, + "total_compressed_size": 63, + "data_page_offset": 626, + "statistics": { + "max": 3, + "min": 1, + "null_count": 6 + } + } + }, + { + "file_offset": 689, + "meta_data": { + "type": "INT32", + "encodings": [ + "RLE", + "PLAIN_DICTIONARY" + ], + "path_in_schema": [ + "nested_struct", + "C", + "d", + "list", + "element", + "list", + "element", + "E" + ], + "codec": "UNCOMPRESSED", + "num_values": 19, + "total_uncompressed_size": 90, + "total_compressed_size": 90, + "data_page_offset": 689, + "statistics": { + "max": 11, + "min": -10, + "null_count": 13 + } + } + }, + { + "file_offset": 779, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "RLE", + "PLAIN_DICTIONARY" + ], + "path_in_schema": [ + "nested_struct", + "C", + "d", + "list", + "element", + "list", + "element", + "F" + ], + "codec": "UNCOMPRESSED", + "num_values": 19, + "total_uncompressed_size": 93, + "total_compressed_size": 93, + "data_page_offset": 779, + "statistics": { + "max": "c", + "min": "aaa", + "null_count": 13 + } + } + }, + { + "file_offset": 872, + "meta_data": { + "type": "BYTE_ARRAY", + "encodings": [ + "RLE", + "PLAIN_DICTIONARY" + ], + "path_in_schema": [ + "nested_struct", + "g", + "map", + "key" + ], + "codec": "UNCOMPRESSED", + "num_values": 11, + "total_uncompressed_size": 101, + "total_compressed_size": 101, + "data_page_offset": 872, + "statistics": { + "max": "g5", + "min": "foo", + "null_count": 4 + } + } + }, + { + "file_offset": 973, + "meta_data": { + "type": "DOUBLE", + "encodings": [ + "RLE", + "PLAIN_DICTIONARY" + ], + "path_in_schema": [ + "nested_struct", + "g", + "map", + "value", + "H", + "i", + "list", + "element" + ], + "codec": "UNCOMPRESSED", + "num_values": 13, + "total_uncompressed_size": 104, + "total_compressed_size": 104, + "data_page_offset": 973, + "statistics": { + "max": 3.3, + "min": 1.1, + "null_count": 9 + } + } + } + ], + "total_byte_size": 1073, + "num_rows": 7 + } + ], + "key_value_metadata": [ + { + "key": "parquet.avro.schema", + "value": "{\"type\":\"record\",\"name\":\"ComplexTypesTbl\",\"namespace\":\"org.apache.impala\",\"fields\":[{\"name\":\"id\",\"type\":[\"null\",\"long\"]},{\"name\":\"int_array\",\"type\":[\"null\",{\"type\":\"array\",\"items\":[\"null\",\"int\"]}]},{\"name\":\"int_array_Array\",\"type\":[\"null\",{\"type\":\"array\",\"items\":[\"null\",{\"type\":\"array\",\"items\":[\"null\",\"int\"]}]}]},{\"name\":\"int_map\",\"type\":[\"null\",{\"type\":\"map\",\"values\":[\"null\",\"int\"]}]},{\"name\":\"int_Map_Array\",\"type\":[\"null\",{\"type\":\"array\",\"items\":[\"null\",{\"type\":\"map\",\"values\":[\"null\",\"int\"]}]}]},{\"name\":\"nested_struct\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"r1\",\"fields\":[{\"name\":\"A\",\"type\":[\"null\",\"int\"]},{\"name\":\"b\",\"type\":[\"null\",{\"type\":\"array\",\"items\":[\"null\",\"int\"]}]},{\"name\":\"C\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"r2\",\"fields\":[{\"name\":\"d\",\"type\":[\"null\",{\"type\":\"array\",\"items\":[\"null\",{\"type\":\"array\",\"items\":[\"null\",{\"type\":\"record\",\"name\":\"r3\",\"fields\":[{\"name\":\"E\",\"type\":[\"null\",\"int\"]},{\"name\":\"F\",\"type\":[\"null\",\"string\"]}]}]}]}]}]}]},{\"name\":\"g\",\"type\":[\"null\",{\"type\":\"map\",\"values\":[\"null\",{\"type\":\"record\",\"name\":\"r4\",\"fields\":[{\"name\":\"H\",\"type\":[\"null\",{\"type\":\"record\",\"name\":\"r5\",\"fields\":[{\"name\":\"i\",\"type\":[\"null\",{\"type\":\"array\",\"items\":[\"null\",\"double\"]}]}]}]}]}]}]}]}]}]}" + } + ], + "created_by": "parquet-mr version 1.8.0 (build 0fda28af84b9746396014ad6a415b90592a98b3b)", + "metadata_length": 2811 +} diff --git a/test/files/nullable.impala.parquet b/test/files/nullable.impala.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2c72f52f30be891a52e080fb3d1c4e5446b09770 GIT binary patch literal 3896 zcmb7HO>7%Q6yDigI~zAiOPU$2BdbbHvZU2w#fcr9$`VOKs!$cFf(jKC)b7R`cNPC( zJ5kb>0un+w=71`QIB-NgbLyo%^-v_#69>crRooF3>IJ0m-t4ZuKX#Kk8_(>{_rCYd zdvE6@mzUnpQi0x~cXT=elQ>mUsxlxrA5|gljNvH4uk^Y12=DMCIfEd(W6T}n?&y(G znW|L11TFxHB%BULa63(>b(#W1oW@hCjFhU37AaU&BUvWW{CI+tT=spM0NL*-U{Ih1osPl8*=q@quA-5U z(L*r8ARq+&ffF7<8TafHgEBHSqf-`^fzjjsZ@lJgHa3|*1J8&g0#u>OB?=QK9YCT} z-ABR`;7S{f5Lui^kihH|J>{z&8H4JYVNfv)(ub#UBe(+iA}#846ef{u54rf*7X-1f zk4N{wJWwqDDnj}f2L}ho9NtB`s8h|0fs9X88Q0l3OnI)=`yUBo!Z6X z7)+<4G^)9&M$v|0;9#023RnZZK?h__l>l>8@{9DMTdv5uUaw;Y%eWFzeP<(aG&W{iGmCyN@xY{GH%$*#39;`TK2WB#Z0(lUM}a9L?!; z+>4Xqt*DA;!@B9F6T%?qjO~^`nEVos15x@$pkngBGpncT&2q+Q7;87Jj9qOQ6(h4; zuQn>y_LZH6)w*I<=n$0bl&Fe!Nyv-ppj~TUH=0dj2Z}B(i7X zwKhVJcc9modKm!3gCEK@fLv9h0YDL*N;t+o^=%h8P)wW94TkYoNTAM14Q?i5(iFxQg$TI2&XekVRNi_URuw&dMS;3vdcmei} zeC3x$1`tkMT<@#yXopqMgg=RFB$-P7t|@S+C-aIBPhu5NfsL?V2wa*{?AvCvUr7o| zY$`4%3*DOGwbDttz*6jc0vCXbk%pl?7d?in;Zh3l1a3gxw5#s92&AE2p*@mQbjXqA z)fCneRuGtuu^$LrfGR=<=yn0^j~LeiAt?CG+@PZ2Ep`h~;1LgcUoX&pi+dig1}ie4 z+b4pteF7I4c12(h(Eb_d1mqB=jxNUb2wY$a%J2mAwF2#{LFD6!ogNOL$hPC+LI@ocIA@4+wA+Rey8Po(_AY;XRiF$$LWB69Q;Rp8v4d_K&A% zf>&Nx6t{O$MplU(*eoa-XHN-SKme~>pL3o)JHbYTWJ*2RFq)rkTJ4On)vRY)aIRMk z_0iz%bQ{j|^tp7?TB|opX+2#ts(2qd?cIeIbOck)mD8VmrguSJ--uv4+IU-mWxH)v(SgSW=QvRHBy-O8SEf9&%dk^8D*8Kf z7J2_UTz;o9_2=sokB<05{Q0#9-tPV$!`0Eu0ydiPr3gdX$x%1mW(3la-Dg`ag-JBTpkdotMl8Tu7KslI7eEdHu{TlPjleb+tM-mEq!wyb8$uGc2AnL=h}V%pra zE2W8<^^!4LFxCq>bK$Mr{M^F)Og3kf=8ar7Z!XN_7iNuxf;nfN5&ngLIWEKh%>M&R C&}Di6 literal 0 HcmV?d00001