diff --git a/src/column.js b/src/column.js index 41dcecf..5390a3a 100644 --- a/src/column.js +++ b/src/column.js @@ -19,14 +19,14 @@ export function writeColumn(writer, column, values, stats) { if (!type) throw new Error(`column ${columnName} cannot determine type`) const offsetStart = writer.offset - /** @type {ListValues | undefined} */ - let listValues + /** @type {PageData | undefined} */ + let pageData if (isListLike(schemaPath)) { if (!Array.isArray(values)) { throw new Error(`parquet column ${columnName} expects array values for list encoding`) } - listValues = encodeListValues(schemaPath, values) - values = listValues.values + pageData = encodeListValues(schemaPath, values) + values = pageData.values } const num_values = values.length @@ -61,7 +61,7 @@ export function writeColumn(writer, column, values, stats) { // write data page with dictionary indexes data_page_offset = BigInt(writer.offset) - writeDataPageV2(writer, indexes, column, 'RLE_DICTIONARY', listValues) + writeDataPageV2(writer, indexes, column, 'RLE_DICTIONARY', pageData) encodings.push('RLE_DICTIONARY') } else { // unconvert values from rich types to simple @@ -69,7 +69,7 @@ export function writeColumn(writer, column, values, stats) { // write data page const encoding = type === 'BOOLEAN' && values.length > 16 ? 'RLE' : 'PLAIN' - writeDataPageV2(writer, values, column, encoding, listValues) + writeDataPageV2(writer, values, column, encoding, pageData) encodings.push(encoding) } @@ -138,7 +138,7 @@ function writeDictionaryPage(writer, column, dictionary) { /** * @import {ColumnMetaData, DecodedArray, Encoding, ParquetType, SchemaElement, Statistics} from 'hyparquet' - * @import {ColumnEncoder, ListValues, Writer} from '../src/types.js' + * @import {ColumnEncoder, PageData, Writer} from '../src/types.js' * @param {DecodedArray} values * @returns {Statistics} */ diff --git a/src/datapage.js b/src/datapage.js index 92ed892..73d1887 100644 --- a/src/datapage.js +++ b/src/datapage.js @@ -11,7 +11,7 @@ import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js' * @param {DecodedArray} values * @param {ColumnEncoder} column * @param {import('hyparquet').Encoding} encoding - * @param {ListValues} [listValues] + * @param {PageData} [listValues] */ export function writeDataPageV2(writer, values, column, encoding, listValues) { const { columnName, element, compressed } = column @@ -115,11 +115,11 @@ export function writePageHeader(writer, header) { /** * @import {DecodedArray, PageHeader, SchemaElement} from 'hyparquet' - * @import {ColumnEncoder, ListValues, Writer} from '../src/types.js' + * @import {ColumnEncoder, PageData, Writer} from '../src/types.js' * @param {Writer} writer * @param {ColumnEncoder} column * @param {DecodedArray} values - * @param {ListValues} [listValues] + * @param {PageData} [listValues] * @returns {{ * definition_levels_byte_length: number * repetition_levels_byte_length: number diff --git a/src/dremel.js b/src/dremel.js index 3f776fb..21fe12f 100644 --- a/src/dremel.js +++ b/src/dremel.js @@ -4,10 +4,10 @@ import { getMaxDefinitionLevel } from './schema.js' * Encode nested list values into repetition and definition levels. * * @import {SchemaElement} from 'hyparquet' - * @import {ListValues} from '../src/types.js' + * @import {PageData} from '../src/types.js' * @param {SchemaElement[]} schemaPath schema elements from root to leaf * @param {any[]} rows column data for the current row group - * @returns {ListValues} + * @returns {PageData} encoded list values */ export function encodeListValues(schemaPath, rows) { if (schemaPath.length < 2) throw new Error('parquet list schema path must include column') diff --git a/src/types.d.ts b/src/types.d.ts index 5a1afa0..829aef1 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -33,7 +33,7 @@ export interface ColumnSource { nullable?: boolean } -export interface ListValues { +export interface PageData { values: any[] definitionLevels: number[] repetitionLevels: number[]