From 36f5b4f043ac211986172b8d32895bfedd43f750 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Sun, 26 May 2024 06:00:20 -0700 Subject: [PATCH] Move decompressPage to avoid circular dependency chain --- src/column.js | 30 +----------------------------- src/datapage.js | 28 ++++++++++++++++++++++++++++ src/datapageV2.js | 2 +- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/column.js b/src/column.js index dfafc4b..5e856e1 100644 --- a/src/column.js +++ b/src/column.js @@ -1,10 +1,9 @@ import { assembleLists } from './assemble.js' import { convertWithDictionary } from './convert.js' -import { readDataPage, readDictionaryPage } from './datapage.js' +import { decompressPage, readDataPage, readDictionaryPage } from './datapage.js' import { readDataPageV2 } from './datapageV2.js' import { parquetHeader } from './header.js' import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js' -import { snappyUncompress } from './snappy.js' import { concat } from './utils.js' /** @@ -120,30 +119,3 @@ export function getColumnOffset({ dictionary_page_offset, data_page_offset }) { } return Number(columnOffset) } - -/** - * @param {Uint8Array} compressedBytes - * @param {number} uncompressed_page_size - * @param {import('./types.js').CompressionCodec} codec - * @param {import('./types.js').Compressors | undefined} compressors - * @returns {Uint8Array} - */ -export function decompressPage(compressedBytes, uncompressed_page_size, codec, compressors) { - /** @type {Uint8Array} */ - let page - const customDecompressor = compressors?.[codec] - if (codec === 'UNCOMPRESSED') { - page = compressedBytes - } else if (customDecompressor) { - page = customDecompressor(compressedBytes, uncompressed_page_size) - } else if (codec === 'SNAPPY') { - page = new Uint8Array(uncompressed_page_size) - snappyUncompress(compressedBytes, page) - } else { - throw new Error(`parquet unsupported compression codec: ${codec}`) - } - if (page?.length !== uncompressed_page_size) { - throw new Error(`parquet decompressed page length ${page?.length} does not match header ${uncompressed_page_size}`) - } - return page -} diff --git a/src/datapage.js b/src/datapage.js index 4e05b4d..1597f0f 100644 --- a/src/datapage.js +++ b/src/datapage.js @@ -1,6 +1,7 @@ import { bitWidth, byteStreamSplit, readRleBitPackedHybrid } from './encoding.js' import { readPlain } from './plain.js' import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js' +import { snappyUncompress } from './snappy.js' /** * Read a data page from uncompressed reader. @@ -107,3 +108,30 @@ function readDefinitionLevels(reader, daph, schemaPath) { return { definitionLevels, numNulls } } + +/** + * @param {Uint8Array} compressedBytes + * @param {number} uncompressed_page_size + * @param {import('./types.js').CompressionCodec} codec + * @param {import('./types.js').Compressors | undefined} compressors + * @returns {Uint8Array} + */ +export function decompressPage(compressedBytes, uncompressed_page_size, codec, compressors) { + /** @type {Uint8Array} */ + let page + const customDecompressor = compressors?.[codec] + if (codec === 'UNCOMPRESSED') { + page = compressedBytes + } else if (customDecompressor) { + page = customDecompressor(compressedBytes, uncompressed_page_size) + } else if (codec === 'SNAPPY') { + page = new Uint8Array(uncompressed_page_size) + snappyUncompress(compressedBytes, page) + } else { + throw new Error(`parquet unsupported compression codec: ${codec}`) + } + if (page?.length !== uncompressed_page_size) { + throw new Error(`parquet decompressed page length ${page?.length} does not match header ${uncompressed_page_size}`) + } + return page +} diff --git a/src/datapageV2.js b/src/datapageV2.js index 38a45f3..6bb924a 100644 --- a/src/datapageV2.js +++ b/src/datapageV2.js @@ -1,4 +1,4 @@ -import { decompressPage } from './column.js' +import { decompressPage } from './datapage.js' import { deltaBinaryUnpack, deltaByteArray, deltaLengthByteArray } from './delta.js' import { bitWidth, byteStreamSplit, readRleBitPackedHybrid } from './encoding.js' import { readPlain } from './plain.js'