mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Throw exception for unsupported file_path
This commit is contained in:
parent
ad1dd12990
commit
d1d08d02bd
@ -49,7 +49,7 @@
|
||||
"devDependencies": {
|
||||
"@types/node": "22.15.3",
|
||||
"@vitest/coverage-v8": "3.1.2",
|
||||
"eslint": "9.25.1",
|
||||
"eslint": "9.26.0",
|
||||
"eslint-plugin-jsdoc": "50.6.11",
|
||||
"hyparquet-compressors": "1.1.1",
|
||||
"typescript": "5.8.3",
|
||||
|
||||
@ -29,7 +29,8 @@ export function parquetPlan({ metadata, rowStart = 0, rowEnd = Infinity, columns
|
||||
/** @type {ByteRange[]} */
|
||||
const plan = []
|
||||
// loop through each column chunk
|
||||
for (const { meta_data } of rowGroup.columns) {
|
||||
for (const { file_path, meta_data } of rowGroup.columns) {
|
||||
if (file_path) throw new Error('parquet file_path not supported')
|
||||
if (!meta_data) throw new Error('parquet column metadata is undefined')
|
||||
// add included columns to the plan
|
||||
if (!columns || columns.includes(meta_data.path_in_schema[0])) {
|
||||
|
||||
22
src/read.js
22
src/read.js
@ -84,21 +84,21 @@ export async function readRowGroup(options, rowGroup, groupStart) {
|
||||
/** @type {Map<string, DecodedArray[]>} */
|
||||
const subcolumnData = new Map() // columns to assemble as maps
|
||||
// read column data
|
||||
for (let i = 0; i < rowGroup.columns.length; i++) {
|
||||
const columnMetadata = rowGroup.columns[i].meta_data
|
||||
if (!columnMetadata) throw new Error('parquet column metadata is undefined')
|
||||
for (const { file_path, meta_data } of rowGroup.columns) {
|
||||
if (file_path) throw new Error('parquet file_path not supported')
|
||||
if (!meta_data) throw new Error('parquet column metadata is undefined')
|
||||
|
||||
// skip columns that are not requested
|
||||
const columnName = columnMetadata.path_in_schema[0]
|
||||
const columnName = meta_data.path_in_schema[0]
|
||||
if (columns && !columns.includes(columnName)) continue
|
||||
|
||||
const { startByte, endByte } = getColumnRange(columnMetadata)
|
||||
const { startByte, endByte } = getColumnRange(meta_data)
|
||||
const columnBytes = endByte - startByte
|
||||
|
||||
// skip columns larger than 1gb
|
||||
// TODO: stream process the data, returning only the requested rows
|
||||
if (columnBytes > 1 << 30) {
|
||||
console.warn(`parquet skipping huge column "${columnMetadata.path_in_schema}" ${columnBytes} bytes`)
|
||||
console.warn(`parquet skipping huge column "${meta_data.path_in_schema}" ${columnBytes} bytes`)
|
||||
// TODO: set column to new Error('parquet column too large')
|
||||
continue
|
||||
}
|
||||
@ -109,14 +109,15 @@ export async function readRowGroup(options, rowGroup, groupStart) {
|
||||
|
||||
// read column data async
|
||||
promises.push(buffer.then(arrayBuffer => {
|
||||
const schemaPath = getSchemaPath(metadata.schema, columnMetadata.path_in_schema)
|
||||
const schemaPath = getSchemaPath(metadata.schema, meta_data.path_in_schema)
|
||||
const reader = { view: new DataView(arrayBuffer), offset: 0 }
|
||||
const subcolumn = meta_data.path_in_schema.join('.')
|
||||
const columnDecoder = {
|
||||
columnName: columnMetadata.path_in_schema.join('.'),
|
||||
type: columnMetadata.type,
|
||||
columnName: subcolumn,
|
||||
type: meta_data.type,
|
||||
element: schemaPath[schemaPath.length - 1].element,
|
||||
schemaPath,
|
||||
codec: columnMetadata.codec,
|
||||
codec: meta_data.codec,
|
||||
compressors: options.compressors,
|
||||
utf8: options.utf8,
|
||||
}
|
||||
@ -128,7 +129,6 @@ export async function readRowGroup(options, rowGroup, groupStart) {
|
||||
|
||||
// TODO: fast path for non-nested columns
|
||||
// save column data for assembly
|
||||
const subcolumn = columnMetadata.path_in_schema.join('.')
|
||||
subcolumnData.set(subcolumn, chunks)
|
||||
chunks = undefined
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user