Adjust read coalesce size

This commit is contained in:
Kenny Daniel 2024-04-29 14:22:07 -07:00
parent d6a1981bcc
commit a42cc558d0
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
3 changed files with 7 additions and 9 deletions

@ -28,7 +28,7 @@
},
"devDependencies": {
"@types/node": "20.12.7",
"@typescript-eslint/eslint-plugin": "7.7.1",
"@typescript-eslint/eslint-plugin": "7.8.0",
"@vitest/coverage-v8": "1.5.2",
"eslint": "8.57.0",
"eslint-plugin-import": "2.29.1",

@ -66,11 +66,13 @@ export function readColumn(arrayBuffer, columnOffset, rowGroup, columnMetadata,
if (repetitionLevels.length) {
dereferenceDictionary(dictionary, dataPage)
// Use repetition levels to construct lists
const isNull = columnMetadata && !isRequired(schema, [columnMetadata.path_in_schema[0]])
const isNullable = columnMetadata && !isRequired(schema, [columnMetadata.path_in_schema[0]])
const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema)
const maxRepetitionLevel = getMaxRepetitionLevel(schema, columnMetadata.path_in_schema)
// convert primitive types to rich types
values = convert(dataPage, schemaElement)
values = assembleObjects(
definitionLevels, repetitionLevels, dataPage, isNull, maxDefinitionLevel, maxRepetitionLevel
definitionLevels, repetitionLevels, values, isNullable, maxDefinitionLevel, maxRepetitionLevel
)
} else if (definitionLevels?.length) {
const maxDefinitionLevel = getMaxDefinitionLevel(schema, columnMetadata.path_in_schema)

@ -101,9 +101,9 @@ async function readRowGroup(options, rowGroup, groupStart) {
// TODO: should throw if any column is missing
throw new Error(`parquet columns not found: ${columns.join(', ')}`)
}
// if row group size is less than 128mb, pre-load in one read
// if row group size is less than 32mb, pre-load in one read
let groupBuffer
if (groupEndByte - groupStartByte <= 1 << 27) {
if (groupEndByte - groupStartByte <= 1 << 25) {
// pre-load row group byte data in one big read,
// otherwise read column data individually
groupBuffer = await file.slice(groupStartByte, groupEndByte)
@ -186,10 +186,6 @@ async function readRowGroup(options, rowGroup, groupStart) {
keys[i][j] = keys[i][j][0]
values[i][j] = values[i][j][0]
}
if (keys[i][j] instanceof Uint8Array) {
// decode utf-8 keys
keys[i][j] = new TextDecoder().decode(keys[i][j])
}
if (!keys[i][j]) continue
obj[keys[i][j]] = values[i][j] === undefined ? null : values[i][j]
}