mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-28 16:06:37 +00:00
Faster row transpose
This commit is contained in:
parent
9f95eff222
commit
034e9cda16
@ -23,7 +23,7 @@ import { concat } from './utils.js'
|
||||
* @param {ColumnMetaData} columnMetadata column metadata
|
||||
* @param {SchemaTree[]} schemaPath schema path for the column
|
||||
* @param {Compressors} [compressors] custom decompressors
|
||||
* @returns {ArrayLike<any>} array of values
|
||||
* @returns {any[]} array of values
|
||||
*/
|
||||
export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata, schemaPath, compressors) {
|
||||
/** @type {ArrayLike<any> | undefined} */
|
||||
@ -43,9 +43,8 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
}
|
||||
|
||||
// read compressed_page_size bytes starting at offset
|
||||
const compressedBytes = new Uint8Array(arrayBuffer).subarray(
|
||||
columnOffset + reader.offset,
|
||||
columnOffset + reader.offset + header.compressed_page_size
|
||||
const compressedBytes = new Uint8Array(
|
||||
arrayBuffer, columnOffset + reader.offset, header.compressed_page_size
|
||||
)
|
||||
|
||||
// parse page data by type
|
||||
|
||||
28
src/read.js
28
src/read.js
@ -109,10 +109,9 @@ async function readRowGroup(options, rowGroup, groupStart) {
|
||||
}
|
||||
|
||||
/** @type {any[][]} */
|
||||
const groupData = []
|
||||
const groupColumnData = []
|
||||
const promises = []
|
||||
const maps = new Map()
|
||||
let outputColumnIndex = 0
|
||||
// read column data
|
||||
for (let columnIndex = 0; columnIndex < rowGroup.columns.length; columnIndex++) {
|
||||
const columnMetadata = rowGroup.columns[columnIndex].meta_data
|
||||
@ -149,7 +148,7 @@ async function readRowGroup(options, rowGroup, groupStart) {
|
||||
// read column data async
|
||||
promises.push(buffer.then(arrayBuffer => {
|
||||
const schemaPath = getSchemaPath(metadata.schema, columnMetadata.path_in_schema)
|
||||
/** @type {ArrayLike<any> | undefined} */
|
||||
/** @type {any[] | undefined} */
|
||||
let columnData = readColumn(
|
||||
arrayBuffer, bufferOffset, rowGroup, columnMetadata, schemaPath, compressors
|
||||
)
|
||||
@ -207,25 +206,14 @@ async function readRowGroup(options, rowGroup, groupStart) {
|
||||
rowStart: groupStart,
|
||||
rowEnd: groupStart + columnData.length,
|
||||
})
|
||||
// add colum data to group data only if onComplete is defined
|
||||
if (options.onComplete) addColumn(groupData, outputColumnIndex, columnData)
|
||||
outputColumnIndex++
|
||||
// save column data only if onComplete is defined
|
||||
if (options.onComplete) groupColumnData.push(columnData)
|
||||
}))
|
||||
}
|
||||
await Promise.all(promises)
|
||||
return groupData
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a column to rows.
|
||||
*
|
||||
* @param {any[][]} rows rows to add column data to
|
||||
* @param {number} columnIndex column index to add
|
||||
* @param {ArrayLike<any>} columnData column data to add
|
||||
*/
|
||||
function addColumn(rows, columnIndex, columnData) {
|
||||
for (let i = 0; i < columnData.length; i++) {
|
||||
if (!rows[i]) rows[i] = []
|
||||
rows[i][columnIndex] = columnData[i]
|
||||
if (options.onComplete) {
|
||||
// transpose columns into rows
|
||||
return groupColumnData[0].map((_, row) => groupColumnData.map(col => col[row]))
|
||||
}
|
||||
return []
|
||||
}
|
||||
|
||||
2
src/types.d.ts
vendored
2
src/types.d.ts
vendored
@ -1,4 +1,4 @@
|
||||
type Awaitable<T> = T | Promise<T>
|
||||
export type Awaitable<T> = T | Promise<T>
|
||||
|
||||
/**
|
||||
* File-like object that can read slices of a file asynchronously.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user