mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-25 14:56:37 +00:00
Adjust read coalesce size
This commit is contained in:
parent
d6a1981bcc
commit
a42cc558d0
@ -28,7 +28,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "20.12.7",
|
||||
"@typescript-eslint/eslint-plugin": "7.7.1",
|
||||
"@typescript-eslint/eslint-plugin": "7.8.0",
|
||||
"@vitest/coverage-v8": "1.5.2",
|
||||
"eslint": "8.57.0",
|
||||
"eslint-plugin-import": "2.29.1",
|
||||
|
||||
@ -66,11 +66,13 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
|
||||
if (repetitionLevels.length) {
|
||||
dereferenceDictionary(dictionary, dataPage)
|
||||
// Use repetition levels to construct lists
|
||||
const isNull = columnMetadata && !isRequired(schema, [columnMetadata.path_in_schema[0]])
|
||||
const isNullable = columnMetadata && !isRequired(schema, [columnMetadata.path_in_schema[0]])
|
||||
const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema)
|
||||
const maxRepetitionLevel = getMaxRepetitionLevel(schema, columnMetadata.path_in_schema)
|
||||
// convert primitive types to rich types
|
||||
values = convert(dataPage, schemaElement)
|
||||
values = assembleObjects(
|
||||
definitionLevels, repetitionLevels, dataPage, isNull, maxDefinitionLevel, maxRepetitionLevel
|
||||
definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel
|
||||
)
|
||||
} else if (definitionLevels?.length) {
|
||||
const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema)
|
||||
|
||||
@ -101,9 +101,9 @@ async function readRowGroup(options, rowGroup, groupStart) {
|
||||
// TODO: should throw if any column is missing
|
||||
throw new Error(`parquet columns not found: ${columns.join(', ')}`)
|
||||
}
|
||||
// if row group size is less than 128mb, pre-load in one read
|
||||
// if row group size is less than 32mb, pre-load in one read
|
||||
let groupBuffer
|
||||
if (groupEndByte - groupStartByte <= 1 << 27) {
|
||||
if (groupEndByte - groupStartByte <= 1 << 25) {
|
||||
// pre-load row group byte data in one big read,
|
||||
// otherwise read column data individually
|
||||
groupBuffer = await file.slice(groupStartByte, groupEndByte)
|
||||
@ -186,10 +186,6 @@ async function readRowGroup(options, rowGroup, groupStart) {
|
||||
keys[i][j] = keys[i][j][0]
|
||||
values[i][j] = values[i][j][0]
|
||||
}
|
||||
if (keys[i][j] instanceof Uint8Array) {
|
||||
// decode utf-8 keys
|
||||
keys[i][j] = new TextDecoder().decode(keys[i][j])
|
||||
}
|
||||
if (!keys[i][j]) continue
|
||||
obj[keys[i][j]] = values[i][j] === undefined ? null : values[i][j]
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user