From 034e9cda161967e28cf9fea92bfa7b814606a10f Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Tue, 14 May 2024 02:19:37 -0700 Subject: [PATCH] Faster row transpose --- src/column.js | 7 +++---- src/read.js | 28 ++++++++-------------------- src/types.d.ts | 2 +- 3 files changed, 12 insertions(+), 25 deletions(-) diff --git a/src/column.js b/src/column.js index f4b2817..0d8c86b 100644 --- a/src/column.js +++ b/src/column.js @@ -23,7 +23,7 @@ import { concat } from './utils.js' * @param {ColumnMetaData} columnMetadata column metadata * @param {SchemaTree[]} schemaPath schema path for the column * @param {Compressors} [compressors] custom decompressors - * @returns {ArrayLike} array of values + * @returns {any[]} array of values */ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, schemaPath, compressors) { /** @type {ArrayLike | undefined} */ @@ -43,9 +43,8 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, } // read compressed_page_size bytes starting at offset - const compressedBytes = new Uint8Array(arrayBuffer).subarray( - columnOffset + reader.offset, - columnOffset + reader.offset + header.compressed_page_size + const compressedBytes = new Uint8Array( + arrayBuffer, columnOffset + reader.offset, header.compressed_page_size ) // parse page data by type diff --git a/src/read.js b/src/read.js index f4d2e7b..e6c989e 100644 --- a/src/read.js +++ b/src/read.js @@ -109,10 +109,9 @@ async function readRowGroup(options, rowGroup, groupStart) { } /** @type {any[][]} */ - const groupData = [] + const groupColumnData = [] const promises = [] const maps = new Map() - let outputColumnIndex = 0 // read column data for (let columnIndex = 0; columnIndex < rowGroup.columns.length; columnIndex++) { const columnMetadata = rowGroup.columns[columnIndex].meta_data @@ -149,7 +148,7 @@ async function readRowGroup(options, rowGroup, groupStart) { // read column data async promises.push(buffer.then(arrayBuffer => { const schemaPath = getSchemaPath(metadata.schema, columnMetadata.path_in_schema) - /** @type {ArrayLike | undefined} */ + /** @type {any[] | undefined} */ let columnData = readColumn( arrayBuffer, bufferOffset, rowGroup, columnMetadata, schemaPath, compressors ) @@ -207,25 +206,14 @@ async function readRowGroup(options, rowGroup, groupStart) { rowStart: groupStart, rowEnd: groupStart + columnData.length, }) - // add colum data to group data only if onComplete is defined - if (options.onComplete) addColumn(groupData, outputColumnIndex, columnData) - outputColumnIndex++ + // save column data only if onComplete is defined + if (options.onComplete) groupColumnData.push(columnData) })) } await Promise.all(promises) - return groupData -} - -/** - * Add a column to rows. - * - * @param {any[][]} rows rows to add column data to - * @param {number} columnIndex column index to add - * @param {ArrayLike} columnData column data to add - */ -function addColumn(rows, columnIndex, columnData) { - for (let i = 0; i < columnData.length; i++) { - if (!rows[i]) rows[i] = [] - rows[i][columnIndex] = columnData[i] + if (options.onComplete) { + // transpose columns into rows + return groupColumnData[0].map((_, row) => groupColumnData.map(col => col[row])) } + return [] } diff --git a/src/types.d.ts b/src/types.d.ts index ccfe9a0..3b130b1 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -1,4 +1,4 @@ -type Awaitable = T | Promise +export type Awaitable = T | Promise /** * File-like object that can read slices of a file asynchronously.