mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Repeated types not yet supported
This commit is contained in:
parent
b11b92ffb9
commit
842ff4c15e
@ -13,9 +13,9 @@ import { writeDataPageV2, writePageHeader } from './datapage.js'
|
||||
* @returns {ColumnMetaData}
|
||||
*/
|
||||
export function writeColumn(writer, schemaPath, values, compressed, stats) {
|
||||
const schemaElement = schemaPath[schemaPath.length - 1]
|
||||
const { type, type_length } = schemaElement
|
||||
if (!type) throw new Error(`column ${schemaElement.name} cannot determine type`)
|
||||
const element = schemaPath[schemaPath.length - 1]
|
||||
const { type, type_length } = element
|
||||
if (!type) throw new Error(`column ${element.name} cannot determine type`)
|
||||
const offsetStart = writer.offset
|
||||
const num_values = values.length
|
||||
/** @type {Encoding[]} */
|
||||
@ -41,20 +41,20 @@ export function writeColumn(writer, schemaPath, values, compressed, stats) {
|
||||
}
|
||||
|
||||
// write unconverted dictionary page
|
||||
const unconverted = unconvert(schemaElement, dictionary)
|
||||
const unconverted = unconvert(element, dictionary)
|
||||
writeDictionaryPage(writer, unconverted, type, type_length, compressed)
|
||||
|
||||
// write data page with dictionary indexes
|
||||
data_page_offset = BigInt(writer.offset)
|
||||
writeDataPageV2(writer, indexes, type, schemaPath, 'RLE_DICTIONARY', compressed)
|
||||
writeDataPageV2(writer, indexes, schemaPath, 'RLE_DICTIONARY', compressed)
|
||||
encodings.push('RLE_DICTIONARY')
|
||||
} else {
|
||||
// unconvert values from rich types to simple
|
||||
values = unconvert(schemaElement, values)
|
||||
values = unconvert(element, values)
|
||||
|
||||
// write data page
|
||||
const encoding = type === 'BOOLEAN' && values.length > 16 ? 'RLE' : 'PLAIN'
|
||||
writeDataPageV2(writer, values, type, schemaPath, encoding, compressed)
|
||||
writeDataPageV2(writer, values, schemaPath, encoding, compressed)
|
||||
encodings.push(encoding)
|
||||
}
|
||||
|
||||
|
||||
@ -10,13 +10,15 @@ import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js'
|
||||
* @import {Writer} from '../src/types.js'
|
||||
* @param {Writer} writer
|
||||
* @param {DecodedArray} values
|
||||
* @param {ParquetType} type
|
||||
* @param {SchemaElement[]} schemaPath
|
||||
* @param {import('hyparquet').Encoding} encoding
|
||||
* @param {boolean} compressed
|
||||
*/
|
||||
export function writeDataPageV2(writer, values, type, schemaPath, encoding, compressed) {
|
||||
const fixedLength = schemaPath.at(-1)?.type_length
|
||||
export function writeDataPageV2(writer, values, schemaPath, encoding, compressed) {
|
||||
const { name, type, type_length, repetition_type } = schemaPath[schemaPath.length - 1]
|
||||
|
||||
if (!type) throw new Error(`column ${name} cannot determine type`)
|
||||
if (repetition_type === 'REPEATED') throw new Error(`column ${name} repeated types not supported`)
|
||||
|
||||
// write levels to temp buffer
|
||||
const levels = new ByteWriter()
|
||||
@ -39,7 +41,7 @@ export function writeDataPageV2(writer, values, type, schemaPath, encoding, comp
|
||||
page.appendUint8(bitWidth) // prepend bitWidth
|
||||
writeRleBitPackedHybrid(page, nonnull, bitWidth)
|
||||
} else {
|
||||
writePlain(page, nonnull, type, fixedLength)
|
||||
writePlain(page, nonnull, type, type_length)
|
||||
}
|
||||
|
||||
// compress page data
|
||||
@ -108,7 +110,7 @@ export function writePageHeader(writer, header) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @import {DecodedArray, PageHeader, ParquetType, SchemaElement} from 'hyparquet'
|
||||
* @import {DecodedArray, PageHeader, SchemaElement} from 'hyparquet'
|
||||
* @param {Writer} writer
|
||||
* @param {SchemaElement[]} schemaPath
|
||||
* @param {DecodedArray} values
|
||||
|
||||
@ -35,7 +35,7 @@ export function unconvert(element, values) {
|
||||
if (ctype === 'UTF8') {
|
||||
if (!Array.isArray(values)) throw new Error('strings must be an array')
|
||||
const encoder = new TextEncoder()
|
||||
return values.map(v => encoder.encode(v))
|
||||
return values.map(v => typeof v === 'string' ? encoder.encode(v) : v)
|
||||
}
|
||||
if (ltype?.type === 'FLOAT16') {
|
||||
if (type !== 'FIXED_LEN_BYTE_ARRAY') throw new Error('FLOAT16 must be FIXED_LEN_BYTE_ARRAY type')
|
||||
|
||||
Loading…
Reference in New Issue
Block a user