diff --git a/README.md b/README.md index 4e397e9..3749c8b 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ You can extend support for other compression codecs using the `compressors` opti import { parquetRead } from 'hyparquet' import { gunzipSync } from 'zlib' -parquetRead({ file, compressors: { +await parquetRead({ file, compressors: { GZIP: (input, output) => output.set(gunzipSync(input)), // add gzip support }}) ``` @@ -199,7 +199,7 @@ To use hysnappy for faster parsing of large parquet files, override the `SNAPPY` import { parquetRead } from 'hyparquet' import { snappyUncompressor } from 'hysnappy' -parquetRead({ file, compressors: { +await parquetRead({ file, compressors: { SNAPPY: snappyUncompressor(), }}) ``` diff --git a/src/byteStreamSplit.js b/src/byteStreamSplit.js new file mode 100644 index 0000000..c308fce --- /dev/null +++ b/src/byteStreamSplit.js @@ -0,0 +1,14 @@ +/** + * @param {import('./types.d.ts').DataReader} reader + * @param {number} nValues + * @param {Float32Array | Float64Array} output + */ +export function byteStreamSplit(reader, nValues, output) { + const byteWidth = output instanceof Float32Array ? 4 : 8 + const bytes = new Uint8Array(output.buffer) + for (let b = 0; b < byteWidth; b++) { + for (let i = 0; i < nValues; i++) { + bytes[i * byteWidth + b] = reader.view.getUint8(reader.offset++) + } + } +} diff --git a/src/column.js b/src/column.js index 408a17b..c79542f 100644 --- a/src/column.js +++ b/src/column.js @@ -143,8 +143,7 @@ function dereferenceDictionary(dictionary, dataPage) { * @param {ColumnMetaData} columnMetadata * @returns {number} byte offset */ -export function getColumnOffset(columnMetadata) { - const { dictionary_page_offset, data_page_offset } = columnMetadata +export function getColumnOffset({ dictionary_page_offset, data_page_offset }) { let columnOffset = dictionary_page_offset if (dictionary_page_offset === undefined || data_page_offset < dictionary_page_offset) { columnOffset = data_page_offset @@ -160,7 +159,7 @@ export function getColumnOffset(columnMetadata) { * @returns {Uint8Array} */ export function decompressPage(compressedBytes, uncompressed_page_size, codec, compressors) { - /** @type {Uint8Array | undefined} */ + /** @type {Uint8Array} */ let page const customDecompressor = compressors?.[codec] if (codec === 'UNCOMPRESSED') { diff --git a/src/datapage.js b/src/datapage.js index a3b99f9..eaa197c 100644 --- a/src/datapage.js +++ b/src/datapage.js @@ -1,3 +1,4 @@ +import { byteStreamSplit } from './byteStreamSplit.js' import { readRleBitPackedHybrid, widthFromMaxInt } from './encoding.js' import { readPlain } from './plain.js' import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired } from './schema.js' @@ -16,11 +17,11 @@ import { getMaxDefinitionLevel, getMaxRepetitionLevel, isRequired } from './sche * @param {ColumnMetaData} columnMetadata * @returns {DataPage} definition levels, repetition levels, and array of values */ -export function readDataPage(bytes, daph, schemaPath, columnMetadata) { +export function readDataPage(bytes, daph, schemaPath, { type }) { const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength) const reader = { view, offset: 0 } /** @type {DecodedArray} */ - let dataPage = [] + let dataPage // repetition and definition levels const repetitionLevels = readRepetitionLevels(reader, daph, schemaPath) @@ -30,26 +31,25 @@ export function readDataPage(bytes, daph, schemaPath, columnMetadata) { const nValues = daph.num_values - numNulls if (daph.encoding === 'PLAIN') { const { type_length } = schemaPath[schemaPath.length - 1].element - dataPage = readPlain(reader, columnMetadata.type, nValues, type_length) + dataPage = readPlain(reader, type, nValues, type_length) } else if ( daph.encoding === 'PLAIN_DICTIONARY' || daph.encoding === 'RLE_DICTIONARY' || daph.encoding === 'RLE' ) { - // bit width is stored as single byte - let bitWidth = 1 // TODO: RLE encoding uses bitWidth = schemaElement.type_length - if (columnMetadata.type !== 'BOOLEAN') { - bitWidth = view.getUint8(reader.offset) - reader.offset++ - } + const bitWidth = type === 'BOOLEAN' ? 1 : view.getUint8(reader.offset++) if (bitWidth) { dataPage = new Array(nValues) readRleBitPackedHybrid(reader, bitWidth, view.byteLength - reader.offset, dataPage) } else { - // nval zeros - dataPage = new Array(nValues).fill(0) + dataPage = new Uint8Array(nValues) // nValue zeroes } + } else if (daph.encoding === 'BYTE_STREAM_SPLIT') { + if (type === 'FLOAT') dataPage = new Float32Array(nValues) + else if (type === 'DOUBLE') dataPage = new Float64Array(nValues) + else throw new Error(`parquet byte_stream_split unsupported type: ${type}`) + byteStreamSplit(reader, nValues, dataPage) } else { throw new Error(`parquet unsupported encoding: ${daph.encoding}`) } diff --git a/src/datapageV2.js b/src/datapageV2.js index c7fcaa6..33bcea9 100644 --- a/src/datapageV2.js +++ b/src/datapageV2.js @@ -1,3 +1,4 @@ +import { byteStreamSplit } from './byteStreamSplit.js' import { decompressPage } from './column.js' import { deltaBinaryUnpack, deltaByteArray, deltaLengthByteArray } from './delta.js' import { readRleBitPackedHybrid, widthFromMaxInt } from './encoding.js' @@ -22,7 +23,7 @@ import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js' export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata, compressors) { const view = new DataView(compressedBytes.buffer, compressedBytes.byteOffset, compressedBytes.byteLength) const reader = { view, offset: 0 } - + const { codec, type } = columnMetadata const daph2 = ph.data_page_header_v2 if (!daph2) throw new Error('parquet data page header v2 is undefined') @@ -38,9 +39,7 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata, const uncompressedPageSize = ph.uncompressed_page_size - daph2.definition_levels_byte_length - daph2.repetition_levels_byte_length let page = compressedBytes.subarray(reader.offset) - if (daph2.is_compressed && columnMetadata.codec !== 'UNCOMPRESSED') { - page = decompressPage(page, uncompressedPageSize, columnMetadata.codec, compressors) - } + page = decompressPage(page, uncompressedPageSize, codec, compressors) const pageView = new DataView(page.buffer, page.byteOffset, page.byteLength) const pageReader = { view: pageView, offset: 0 } @@ -50,7 +49,7 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata, const nValues = daph2.num_values - daph2.num_nulls if (daph2.encoding === 'PLAIN') { const { type_length } = schemaPath[schemaPath.length - 1].element - dataPage = readPlain(pageReader, columnMetadata.type, nValues, type_length) + dataPage = readPlain(pageReader, type, nValues, type_length) } else if (daph2.encoding === 'RLE') { pageReader.offset = 4 dataPage = new Array(nValues) @@ -64,7 +63,7 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata, dataPage = new Array(nValues) readRleBitPackedHybrid(pageReader, bitWidth, uncompressedPageSize, dataPage) } else if (daph2.encoding === 'DELTA_BINARY_PACKED') { - const int32 = columnMetadata.type === 'INT32' + const int32 = type === 'INT32' dataPage = int32 ? new Int32Array(nValues) : new BigInt64Array(nValues) deltaBinaryUnpack(pageReader, nValues, dataPage) } else if (daph2.encoding === 'DELTA_LENGTH_BYTE_ARRAY') { @@ -73,6 +72,11 @@ export function readDataPageV2(compressedBytes, ph, schemaPath, columnMetadata, } else if (daph2.encoding === 'DELTA_BYTE_ARRAY') { dataPage = new Array(nValues) deltaByteArray(pageReader, nValues, dataPage) + } else if (daph2.encoding === 'BYTE_STREAM_SPLIT') { + if (type === 'FLOAT') dataPage = new Float32Array(nValues) + else if (type === 'DOUBLE') dataPage = new Float64Array(nValues) + else throw new Error(`parquet byte_stream_split unsupported type: ${type}`) + byteStreamSplit(pageReader, nValues, dataPage) } else { throw new Error(`parquet unsupported encoding: ${daph2.encoding}`) } diff --git a/src/encoding.js b/src/encoding.js index 67c1ee8..4fdad0f 100644 --- a/src/encoding.js +++ b/src/encoding.js @@ -24,22 +24,20 @@ export function widthFromMaxInt(value) { */ export function readRleBitPackedHybrid(reader, width, length, values) { if (!length) { - length = reader.view.getInt32(reader.offset, true) + length = reader.view.getUint32(reader.offset, true) reader.offset += 4 - if (length < 0) throw new Error(`parquet invalid rle/bitpack length ${length}`) } let seen = 0 - const startOffset = reader.offset - while (reader.offset - startOffset < length && seen < values.length) { + while (seen < values.length) { const header = readVarInt(reader) - if ((header & 1) === 0) { + if (header & 1) { + // bit-packed + seen = readBitPacked(reader, header, width, values, seen) + } else { // rle const count = header >>> 1 readRle(reader, count, width, values, seen) seen += count - } else { - // bit-packed - seen = readBitPacked(reader, header, width, values, seen) } } } diff --git a/test/encoding.test.js b/test/encoding.test.js index 01e62f0..d05b3db 100644 --- a/test/encoding.test.js +++ b/test/encoding.test.js @@ -98,17 +98,6 @@ describe('readRleBitPackedHybrid', () => { expect(() => readRleBitPackedHybrid(reader, 1, 3, values)) .toThrow('parquet bitpack offset 1 out of range') }) - - it('throws for negative implicit length', () => { - const buffer = new ArrayBuffer(4) - const view = new DataView(buffer) - view.setInt32(0, -1, true) // negative length - const reader = { view, offset: 0 } - - const values = new Array(3) - expect(() => readRleBitPackedHybrid(reader, 1, 0, values)) - .toThrow('parquet invalid rle/bitpack length -1') - }) }) describe('widthFromMaxInt', () => { diff --git a/test/files/byte_stream_split.zstd.json b/test/files/byte_stream_split.zstd.json new file mode 100644 index 0000000..fb7155d --- /dev/null +++ b/test/files/byte_stream_split.zstd.json @@ -0,0 +1,302 @@ +[ + [1.764052391052246, -1.3065268517353166], + [0.40015721321105957, 1.658130679618188], + [0.978738009929657, -0.11816404512856976], + [2.2408931255340576, -0.6801782039968504], + [1.8675580024719238, 0.6663830820319143], + [-0.9772778749465942, -0.4607197873885533], + [0.9500884413719177, -1.3342584714027534], + [-0.15135720372200012, -1.3467175057975553], + [-0.10321885347366333, 0.6937731526901325], + [0.4105985164642334, -0.1595734381462669], + [0.14404356479644775, -0.13370155966843916], + [1.4542734622955322, 1.0777438059762627], + [0.7610377073287964, -1.1268258087567435], + [0.12167501449584961, -0.7306777528648248], + [0.44386324286460876, -0.38487980918127546], + [0.3336743414402008, 0.094351589317074], + [1.4940791130065918, -0.042171451290578935], + [-0.2051582634449005, -0.2868871923899076], + [0.3130677044391632, -0.0616264020956474], + [-0.8540957570075989, -0.10730527629117469], + [-2.5529897212982178, -0.7196043885517929], + [0.653618574142456, -0.8129929885540773], + [0.8644362092018127, 0.2745163577239395], + [-0.7421650290489197, -0.8909150829955279], + [2.269754648208618, -1.1573552591908536], + [-1.4543657302856445, -0.3122922511256933], + [0.04575851559638977, -0.1576670161638159], + [-0.18718385696411133, 2.2567234972982093], + [1.5327792167663574, -0.7047002758562337], + [1.4693588018417358, 0.9432607249694948], + [0.154947429895401, 0.7471883342046318], + [0.37816253304481506, -1.188944955203736], + [-0.8877857327461243, 0.7732529774025997], + [-1.980796456336975, -1.1838806401933177], + [-0.34791216254234314, -2.659172237996741], + [0.15634897351264954, 0.6063195243593807], + [1.2302906513214111, -1.7558905834377194], + [1.202379822731018, 0.45093446180591484], + [-0.38732680678367615, -0.6840108977372166], + [-0.302302747964859, 1.6595507961898721], + [-1.0485529899597168, 1.068509399316009], + [-1.420017957687378, -0.45338580385138766], + [-1.7062702178955078, -0.6878376110286823], + [1.950775384902954, -1.2140774030941206], + [-0.5096521973609924, -0.4409226322925914], + [-0.4380742907524109, -0.2803554951845091], + [-1.2527953386306763, -0.3646935443916854], + [0.7774903774261475, 0.15670385527236397], + [-1.6138978004455566, 0.5785214977288784], + [-0.21274028718471527, 0.349654456993174], + [-0.8954665660858154, -0.764143923906443], + [0.38690251111984253, -1.4377914738015785], + [-0.5108051300048828, 1.3645318481024713], + [-1.18063223361969, -0.6894491845499376], + [-0.02818222902715206, -0.6522935999350191], + [0.4283318817615509, -0.5211893123011109], + [0.06651721894741058, -1.8430695501566485], + [0.30247190594673157, -0.4779740040404867], + [-0.6343221068382263, -0.47965581400794766], + [-0.3627411723136902, 0.6203582983435125], + [-0.6724604368209839, 0.698457149107336], + [-0.35955315828323364, 0.00377088908626934], + [-0.8131462931632996, 0.9318483741143037], + [-1.7262825965881348, 0.339964983801262], + [0.17742614448070526, -0.01568211160255477], + [-0.4017809331417084, 0.16092816829822298], + [-1.630198359489441, -0.19065349358139935], + [0.46278226375579834, -0.3948495140334503], + [-0.9072983860969543, -0.26773353689396645], + [0.05194539576768875, -1.1280113314700069], + [0.7290905714035034, 0.280441705316296], + [0.12898291647434235, -0.9931236109295807], + [1.1394007205963135, 0.8416312640736364], + [-1.234825849533081, -0.24945858016094885], + [0.4023416340351105, 0.04949498165009074], + [-0.6848101019859314, 0.49383677628095635], + [-0.8707971572875977, 0.6433144650629279], + [-0.5788496732711792, -1.5706234086334527], + [-0.3115525245666504, -0.20690367616397173], + [0.056165341287851334, 0.8801789120807822], + [-1.1651498079299927, -1.6981058194322545], + [0.9008265137672424, 0.3872804753950634], + [0.4656624495983124, -2.2555642294021894], + [-1.5362436771392822, -1.0225068436356035], + [1.4882521629333496, 0.0386305518401881], + [1.895889163017273, -1.6567151023219537], + [1.1787796020507812, -0.9855107376841507], + [-0.1799248307943344, -1.4718350074635869], + [-1.0707526206970215, 1.6481349322075596], + [1.0544517040252686, 0.16422775548733395], + [-0.4031769335269928, 0.5672902778526694], + [1.222445011138916, -0.2226751005151545], + [0.2082749754190445, -0.35343174875719907], + [0.9766390323638916, -1.6164741886510325], + [0.3563663959503174, -0.2918373627478628], + [0.7065731883049011, -0.7614922118116233], + [0.01050002034753561, 0.8579239242923363], + [1.7858705520629883, 1.1411018666575734], + [0.12691208720207214, 1.4665787155741776], + [0.4019893705844879, 0.852551939461232], + [1.8831506967544556, -0.5986539369229861], + [-1.3477590084075928, -1.1158969859603944], + [-1.2704850435256958, 0.7666631816450861], + [0.969396710395813, 0.3562928174722889], + [-1.1731233596801758, -1.7685384506770307], + [1.9436211585998535, 0.35548179274376907], + [-0.4136189818382263, 0.8145198224878663], + [-0.747454822063446, 0.058925589181629955], + [1.922942042350769, -0.18505367100934153], + [1.4805147647857666, -0.8076484876163557], + [1.8675589561462402, -1.4465346995633879], + [0.9060446619987488, 0.8002979493400275], + [-0.8612256646156311, -0.3091144447717088], + [1.910064935684204, -0.23346666154369272], + [-0.26800337433815, 1.7327211869191332], + [0.8024563789367676, 0.6845011068591904], + [0.9472519755363464, 0.3708250012811021], + [-0.15501008927822113, 0.14206180518723566], + [0.6140793561935425, 1.5199948607657727], + [0.922206699848175, 1.7195893074161945], + [0.37642553448677063, 0.9295051114795281], + [-1.0994007587432861, 0.5822245913979243], + [0.29823818802833557, -2.0946030712061448], + [1.3263858556747437, 0.12372191423350658], + [-0.694567859172821, -0.130106954193704], + [-0.14963454008102417, 0.09395322938556872], + [-0.4351535439491272, 0.9430460873225178], + [1.8492637872695923, -2.7396771671895563], + [0.6722947359085083, -0.5693120534701851], + [0.40746182203292847, 0.26990435494076137], + [-0.7699160575866699, -0.4668455460527625], + [0.5392491817474365, -1.4169061131262595], + [-0.6743326783180237, 0.8689634868967954], + [0.0318305566906929, 0.27687190584612803], + [-0.6358460783958435, -0.9711045704444846], + [0.676433265209198, 0.3148172045158238], + [0.5765908360481262, 0.8215857120497958], + [-0.20829875767230988, 0.005292646299360854], + [0.39600670337677, 0.8005648034309968], + [-1.0930615663528442, 0.07826017516166135], + [-1.4912575483322144, -0.39522898265435435], + [0.43939170241355896, -1.159420516399913], + [0.16667349636554718, -0.08593076697161273], + [0.6350314617156982, 0.19429293804577166], + [2.3831448554992676, 0.8758327615873309], + [0.9444794654846191, -0.11510746848722672], + [-0.9128222465515137, 0.4574156062209908], + [1.117016315460205, -0.9646120137337284], + [-1.31590735912323, -0.7826291558275251], + [-0.46158459782600403, -0.11038929902688775], + [-0.06824160367250443, -1.0546284639850139], + [1.7133426666259766, 0.8202478373246812], + [-0.7447548508644104, 0.4631303293186071], + [-0.8264385461807251, 0.2790957643924534], + [-0.09845252335071564, 0.33890412521594454], + [-0.6634783148765564, 2.0210435614847975], + [1.1266359090805054, -0.46886418796679563], + [-1.0799314975738525, -2.201441285500558], + [-1.1474686861038208, 0.1993001968964652], + [-0.43782004714012146, -0.050603540961665895], + [-0.49803245067596436, -0.5175190425104033], + [1.9295320510864258, -0.9788298593587699], + [0.9494208097457886, -0.43918952180214793], + [0.08755124360322952, 0.18133842921782128], + [-1.225435495376587, -0.5028167006425383], + [0.8443629741668701, 2.4124536795437486], + [-1.0002152919769287, -0.960504381633148], + [-1.5447710752487183, -0.7931173627076716], + [1.1880297660827637, -2.2886200400145285], + [0.31694260239601135, 0.251484415021537], + [0.9208588004112244, -2.01640662779976], + [0.31872764229774475, -0.5394546333745014], + [0.8568305969238281, -0.27567053456055696], + [-0.6510255932807922, -0.7097279658468882], + [-1.034242868423462, 1.738872677454511], + [0.6815944910049438, 0.9943943913154989], + [-0.8034096360206604, 1.3191368763015756], + [-0.6895498037338257, -0.8824188185499185], + [-0.4555324912071228, 1.1285940645145685], + [0.01747915893793106, 0.4960009463439622], + [-0.3539939224720001, 0.7714059486768455], + [-1.3749512434005737, 1.0294388287827672], + [-0.6436184048652649, -0.9087632459590531], + [-2.223403215408325, -0.4243176209779015], + [0.6252314448356628, 0.8625960113284511], + [-1.602057695388794, -2.655619092974933], + [-1.1043833494186401, 1.5133280825732052], + [0.05216507986187935, 0.553132064207584], + [-0.73956298828125, -0.045703960660234855], + [1.543014645576477, 0.2205076557571733], + [-1.2928569316864014, -1.0299352833089765], + [0.2670508623123169, -0.34994336458910474], + [-0.039282817393541336, 1.1002843382203737], + [-1.1680934429168701, 1.2980219723262212], + [0.523276686668396, 2.6962240525635797], + [-0.1715463250875473, -0.07392466628041514], + [0.7717905640602112, -0.6585529668050037], + [0.8235041499137878, -0.5142339659399888], + [2.163235902786255, -1.0180418752873648], + [1.336527943611145, -0.07785475594085076], + [-0.3691818416118622, 0.38273243001226814], + [-0.2393791824579239, -0.03424228053195387], + [1.0996595621109009, 1.0963468456657985], + [0.6552637219429016, -0.23421580134453654], + [0.6401315331459045, -0.3474506524985633], + [-1.6169559955596924, -0.5812684768603252], + [-0.024326125159859657, -1.6326345262344952], + [-0.7380309104919434, -1.567767724308454], + [0.279924601316452, -1.1791579306376878], + [-0.09815038740634918, 1.3014280716647608], + [0.9101788997650146, 0.8952602728899299], + [0.31721821427345276, 1.3749640663929898], + [0.7863279581069946, -1.3322116545945017], + [-0.4664191007614136, -1.9686246897860202], + [-0.9444462656974792, -0.6600563201340829], + [-0.410049706697464, 0.175818953296028], + [-0.017020413652062416, 0.4986902749098275], + [0.37915173172950745, 1.0479721559680528], + [2.2593090534210205, 0.2842796708072146], + [-0.0422571524977684, 1.7426687806556311], + [-0.9559450149536133, -0.22260568094832048], + [-0.34598177671432495, -0.9130792180417964], + [-0.463595986366272, -1.6812182154944335], + [0.4814814627170563, -0.8889713580954499], + [-1.5407969951629639, 0.242117960985123], + [0.06326199322938919, -0.8887202573536308], + [0.15650653839111328, 0.9367424635352571], + [0.23218104243278503, 1.412327706037443], + [-0.5973160862922668, -2.369586905226603], + [-0.23792172968387604, 0.8640523004976479], + [-1.4240609407424927, -2.2396040586617367], + [-0.49331986904144287, 0.4014990550902875], + [-0.5428614616394043, 1.2248705641936597], + [0.4160500466823578, 0.06485610634357618], + [-1.1561824083328247, -1.2796891732042395], + [0.7811980843544006, -0.5854312042777726], + [1.494484543800354, -0.2616454457109007], + [-2.0699849128723145, -0.18224478378994294], + [0.42625874280929565, -0.20289684076666706], + [0.676908016204834, -0.1098827793093138], + [-0.6374370455741882, 0.2134800489101689], + [-0.3972718119621277, -1.2085736537332212], + [-0.1328805834054947, -0.2420198298702195], + [-0.29779088497161865, 1.5182611703557054], + [-0.3090129792690277, -0.38464542314251776], + [-1.6760038137435913, -0.4438360931551978], + [1.1523315906524658, 1.0781973037142378], + [1.0796185731887817, -2.5591846663440965], + [-0.8133642673492432, 1.1813786012882859], + [-1.4664243459701538, -0.6319037580051673], + [0.5210648775100708, 0.16392857245258663], + [-0.5757879614830017, 0.09632135592119682], + [0.14195317029953003, 0.9424681192203938], + [-0.3193284273147583, -0.2675947462353477], + [0.6915387511253357, -0.6780257815644504], + [0.694749116897583, 1.2978457906510987], + [-0.7255973815917969, -2.36417381714118], + [-1.383363962173462, 0.02033418170524325], + [-1.5829384326934814, -1.3479254226291204], + [0.6103793978691101, -0.761573388256559], + [-1.188859224319458, 2.011256681463137], + [-0.5068163275718689, -0.044595426455857026], + [-0.596314013004303, 0.19506969715138117], + [-0.05256729573011398, -1.7815628557055914], + [-1.9362797737121582, -0.7290446587946957], + [0.1887785941362381, 0.19655740072878491], + [0.523891031742096, 0.3547576931132181], + [0.08842208981513977, 0.6168865543932788], + [-0.3108861744403839, 0.008627898917576322], + [0.09740016609430313, 0.5270042084546597], + [0.3990463316440582, 0.453781912635684], + [-2.772592782974243, -1.8297404110045314], + [1.9559123516082764, 0.03700572191014953], + [0.3900933265686035, 0.7679024077327037], + [-0.6524085998535156, 0.5898798207345195], + [-0.3909533619880676, -0.3638588099707899], + [0.4937417805194855, -0.8056265075393678], + [-0.11610393971204758, -1.1183119243216322], + [-2.030684471130371, -0.13105401154141233], + [2.06449294090271, 1.133079879559722], + [-0.11054065823554993, -1.951804101481602], + [1.0201727151870728, -0.659891729729498], + [-0.6920498609542847, -1.139802455426774], + [1.5363770723342896, 0.7849575212405001], + [0.28634369373321533, -0.5543096265713009], + [0.6088438630104065, -0.4706376581547914], + [-1.0452533960342407, -0.216949569936649], + [1.211145281791687, 0.4453932508947973], + [0.6898181438446045, -0.39238899814963674], + [1.3018462657928467, -3.0461430547999266], + [-0.62808758020401, 0.5433118913875197], + [-0.48102712631225586, 0.43904295767204254], + [2.3039166927337646, -0.21954102833121325], + [-1.0600157976150513, -1.0840366206719345], + [-0.13594970107078552, 0.3517801106813583], + [1.1368913650512695, 0.37923553353558676], + [0.09772496670484543, -0.4700328827008748], + [0.582953691482544, -0.21673147057553863], + [-0.39944902062416077, -0.9301565025243212], + [0.3700558841228485, -0.17858909208732915] +] diff --git a/test/files/byte_stream_split.zstd.metadata.json b/test/files/byte_stream_split.zstd.metadata.json new file mode 100644 index 0000000..48df755 --- /dev/null +++ b/test/files/byte_stream_split.zstd.metadata.json @@ -0,0 +1,104 @@ +{ + "version": 2, + "schema": [ + { + "repetition_type": "REQUIRED", + "name": "schema", + "num_children": 2 + }, + { + "type": "FLOAT", + "repetition_type": "OPTIONAL", + "name": "f32" + }, + { + "type": "DOUBLE", + "repetition_type": "OPTIONAL", + "name": "f64" + } + ], + "num_rows": 300, + "row_groups": [ + { + "columns": [ + { + "file_offset": 1162, + "meta_data": { + "type": "FLOAT", + "encodings": [ + "RLE", + "BYTE_STREAM_SPLIT" + ], + "path_in_schema": [ + "f32" + ], + "codec": "ZSTD", + "num_values": 300, + "total_uncompressed_size": 1255, + "total_compressed_size": 1158, + "data_page_offset": 4, + "statistics": { + "max": 2.3831448554992676, + "min": -2.772592782974243, + "null_count": 0, + "max_value": 2.3831448554992676, + "min_value": -2.772592782974243 + }, + "encoding_stats": [ + { + "page_type": 0, + "encoding": "BYTE_STREAM_SPLIT", + "count": 1 + } + ] + } + }, + { + "file_offset": 3513, + "meta_data": { + "type": "DOUBLE", + "encodings": [ + "RLE", + "BYTE_STREAM_SPLIT" + ], + "path_in_schema": [ + "f64" + ], + "codec": "ZSTD", + "num_values": 300, + "total_uncompressed_size": 2471, + "total_compressed_size": 2283, + "data_page_offset": 1230, + "statistics": { + "max": 2.6962240525635797, + "min": -3.0461430547999266, + "null_count": 0, + "max_value": 2.6962240525635797, + "min_value": -3.0461430547999266 + }, + "encoding_stats": [ + { + "page_type": 0, + "encoding": "BYTE_STREAM_SPLIT", + "count": 1 + } + ] + } + } + ], + "total_byte_size": 3726, + "num_rows": 300, + "file_offset": 4, + "total_compressed_size": 3441, + "ordinal": 0 + } + ], + "key_value_metadata": [ + { + "key": "ARROW:schema", + "value": "/////6AAAAAQAAAAAAAKAAwABgAFAAgACgAAAAABBAAMAAAACAAIAAAABAAIAAAABAAAAAIAAABAAAAABAAAANj///8AAAEDEAAAABQAAAAEAAAAAAAAAAMAAABmNjQAxv///wAAAgAQABQACAAGAAcADAAAABAAEAAAAAAAAQMQAAAAHAAAAAQAAAAAAAAAAwAAAGYzMgAAAAYACAAGAAYAAAAAAAEA" + } + ], + "created_by": "parquet-cpp-arrow version 14.0.2", + "metadata_length": 498 +} diff --git a/test/files/byte_stream_split.zstd.parquet b/test/files/byte_stream_split.zstd.parquet new file mode 100644 index 0000000..631d492 Binary files /dev/null and b/test/files/byte_stream_split.zstd.parquet differ