Rename PageData

This commit is contained in:
Kenny Daniel 2025-10-27 23:08:33 -07:00
parent 7f3a04d5df
commit b2ea21b366
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
4 changed files with 13 additions and 13 deletions

@ -19,14 +19,14 @@ export function writeColumn(writer, column, values, stats) {
if (!type) throw new Error(`column ${columnName} cannot determine type`)
const offsetStart = writer.offset
/** @type {ListValues | undefined} */
let listValues
/** @type {PageData | undefined} */
let pageData
if (isListLike(schemaPath)) {
if (!Array.isArray(values)) {
throw new Error(`parquet column ${columnName} expects array values for list encoding`)
}
listValues = encodeListValues(schemaPath, values)
values = listValues.values
pageData = encodeListValues(schemaPath, values)
values = pageData.values
}
const num_values = values.length
@ -61,7 +61,7 @@ export function writeColumn(writer, column, values, stats) {
// write data page with dictionary indexes
data_page_offset = BigInt(writer.offset)
writeDataPageV2(writer, indexes, column, 'RLE_DICTIONARY', listValues)
writeDataPageV2(writer, indexes, column, 'RLE_DICTIONARY', pageData)
encodings.push('RLE_DICTIONARY')
} else {
// unconvert values from rich types to simple
@ -69,7 +69,7 @@ export function writeColumn(writer, column, values, stats) {
// write data page
const encoding = type === 'BOOLEAN' && values.length > 16 ? 'RLE' : 'PLAIN'
writeDataPageV2(writer, values, column, encoding, listValues)
writeDataPageV2(writer, values, column, encoding, pageData)
encodings.push(encoding)
}
@ -138,7 +138,7 @@ function writeDictionaryPage(writer, column, dictionary) {
/**
* @import {ColumnMetaData, DecodedArray, Encoding, ParquetType, SchemaElement, Statistics} from 'hyparquet'
* @import {ColumnEncoder, ListValues, Writer} from '../src/types.js'
* @import {ColumnEncoder, PageData, Writer} from '../src/types.js'
* @param {DecodedArray} values
* @returns {Statistics}
*/

@ -11,7 +11,7 @@ import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js'
* @param {DecodedArray} values
* @param {ColumnEncoder} column
* @param {import('hyparquet').Encoding} encoding
* @param {ListValues} [listValues]
* @param {PageData} [listValues]
*/
export function writeDataPageV2(writer, values, column, encoding, listValues) {
const { columnName, element, compressed } = column
@ -115,11 +115,11 @@ export function writePageHeader(writer, header) {
/**
* @import {DecodedArray, PageHeader, SchemaElement} from 'hyparquet'
* @import {ColumnEncoder, ListValues, Writer} from '../src/types.js'
* @import {ColumnEncoder, PageData, Writer} from '../src/types.js'
* @param {Writer} writer
* @param {ColumnEncoder} column
* @param {DecodedArray} values
* @param {ListValues} [listValues]
* @param {PageData} [listValues]
* @returns {{
* definition_levels_byte_length: number
* repetition_levels_byte_length: number

@ -4,10 +4,10 @@ import { getMaxDefinitionLevel } from './schema.js'
* Encode nested list values into repetition and definition levels.
*
* @import {SchemaElement} from 'hyparquet'
* @import {ListValues} from '../src/types.js'
* @import {PageData} from '../src/types.js'
* @param {SchemaElement[]} schemaPath schema elements from root to leaf
* @param {any[]} rows column data for the current row group
* @returns {ListValues}
* @returns {PageData} encoded list values
*/
export function encodeListValues(schemaPath, rows) {
if (schemaPath.length < 2) throw new Error('parquet list schema path must include column')

2
src/types.d.ts vendored

@ -33,7 +33,7 @@ export interface ColumnSource {
nullable?: boolean
}
export interface ListValues {
export interface PageData {
values: any[]
definitionLevels: number[]
repetitionLevels: number[]