Consistent parquet error messages

This commit is contained in:
Kenny Daniel 2024-01-12 16:28:37 -08:00
parent 8820ade761
commit 6ca1d2a85c
No known key found for this signature in database
GPG Key ID: 6A3C5E318BE71391
5 changed files with 14 additions and 14 deletions

@ -26,7 +26,7 @@ export function readColumn(arrayBuffer, rowGroup, columnMetadata, schema) {
if (dictionary_page_offset === undefined || data_page_offset < dictionary_page_offset) {
columnOffset = data_page_offset
}
columnOffset = Number(columnOffset) // cast bigint to number
columnOffset = Number(columnOffset)
// parse column data
let valuesSeen = 0
@ -37,7 +37,7 @@ export function readColumn(arrayBuffer, rowGroup, columnMetadata, schema) {
// parse column header
const { value: header, byteLength: headerLength } = parquetHeader(arrayBuffer, columnOffset + byteOffset)
byteOffset += headerLength
if (!header || header.compressed_page_size === undefined) throw new Error('header is undefined')
if (!header || header.compressed_page_size === undefined) throw new Error('parquet header is undefined')
// read compressed_page_size bytes starting at offset
const compressedBytes = new Uint8Array(
@ -45,23 +45,23 @@ export function readColumn(arrayBuffer, rowGroup, columnMetadata, schema) {
)
// decompress bytes
let page
const uncompressed_page_size = Number(header.uncompressed_page_size) // TODO: unsafe cast
const uncompressed_page_size = Number(header.uncompressed_page_size)
if (codec === CompressionCodec.GZIP) {
throw new Error('GZIP compression not supported')
throw new Error('parquet gzip compression not supported')
} else if (codec === CompressionCodec.SNAPPY) {
page = new Uint8Array(uncompressed_page_size)
snappyUncompress(compressedBytes, page)
} else if (codec === CompressionCodec.LZO) {
throw new Error('LZO compression not supported')
throw new Error('parquet lzo compression not supported')
}
if (!page || page.length !== uncompressed_page_size) {
throw new Error('decompressed page size does not match header')
throw new Error('parquet decompressed page size does not match header')
}
// parse page data by type
if (header.type === PageType.DATA_PAGE) {
const daph = header.data_page_header
if (!daph) throw new Error('data page header is undefined')
if (!daph) throw new Error('parquet data page header is undefined')
const { definitionLevels, repetitionLevels, value } = readDataPage(page, daph, schema, columnMetadata)
valuesSeen += daph.num_values
@ -112,7 +112,7 @@ export function readColumn(arrayBuffer, rowGroup, columnMetadata, schema) {
return values
} else if (header.type === PageType.DICTIONARY_PAGE) {
const diph = header.dictionary_page_header
if (!diph) throw new Error('dictionary page header is undefined')
if (!diph) throw new Error('parquet dictionary page header is undefined')
dictionary = readDictionaryPage(page, diph, schema, columnMetadata)
} else {

@ -174,7 +174,7 @@ export function readPlain(dataView, type, count, offset = 0) {
} else if (type === ParquetType.FIXED_LEN_BYTE_ARRAY) {
return readPlainByteArrayFixed(dataView, offset, count)
} else {
throw new Error(`Unhandled type: ${type}`)
throw new Error(`parquet unhandled type: ${type}`)
}
}
@ -234,7 +234,7 @@ export function readRleBitPackedHybrid(dataView, offset, width, length, numValue
let byteLength = 0
if (!length) {
length = dataView.getInt32(offset, true)
if (length < 0) throw new Error(`invalid rle/bitpack length ${length}`)
if (length < 0) throw new Error(`parquet invalid rle/bitpack length ${length}`)
byteLength += 4
}
const value = []
@ -285,7 +285,7 @@ function readRle(dataView, offset, header, bitWidth) {
readValue = dataView.getUint32(offset, true)
byteLength += 4
} else {
throw new Error(`invalid rle width ${width}`)
throw new Error(`parquet invalid rle width ${width}`)
}
// repeat value count times

@ -42,7 +42,7 @@ export function schemaElement(schema, name) {
for (const part of name) {
const child = tree.children.find(child => child.element.name === part)
if (!child) {
throw new Error(`schema element not found: ${name}`)
throw new Error(`parquet schema element not found: ${name}`)
}
tree = child
}

@ -118,7 +118,7 @@ function readElement(view, type, index) {
return [uuid, index]
}
default:
throw new Error(`Unhandled type: ${type}`)
throw new Error(`thrift unhandled type: ${type}`)
}
}

@ -86,7 +86,7 @@ describe('readPlain', () => {
it('throws an error for unhandled types', () => {
const dataView = new DataView(new ArrayBuffer(0))
const invalidType = 999
expect(() => readPlain(dataView, invalidType, 1, 0)).toThrow(`Unhandled type: ${invalidType}`)
expect(() => readPlain(dataView, invalidType, 1, 0)).toThrow(`parquet unhandled type: ${invalidType}`)
})
})