mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-06 06:51:54 +00:00
Consistent parquet error messages
This commit is contained in:
parent
8820ade761
commit
6ca1d2a85c
@ -26,7 +26,7 @@ export function readColumn(arrayBuffer, rowGroup, columnMetadata, schema) {
|
||||
if (dictionary_page_offset === undefined || data_page_offset < dictionary_page_offset) {
|
||||
columnOffset = data_page_offset
|
||||
}
|
||||
columnOffset = Number(columnOffset) // cast bigint to number
|
||||
columnOffset = Number(columnOffset)
|
||||
|
||||
// parse column data
|
||||
let valuesSeen = 0
|
||||
@ -37,7 +37,7 @@ export function readColumn(arrayBuffer, rowGroup, columnMetadata, schema) {
|
||||
// parse column header
|
||||
const { value: header, byteLength: headerLength } = parquetHeader(arrayBuffer, columnOffset + byteOffset)
|
||||
byteOffset += headerLength
|
||||
if (!header || header.compressed_page_size === undefined) throw new Error('header is undefined')
|
||||
if (!header || header.compressed_page_size === undefined) throw new Error('parquet header is undefined')
|
||||
|
||||
// read compressed_page_size bytes starting at offset
|
||||
const compressedBytes = new Uint8Array(
|
||||
@ -45,23 +45,23 @@ export function readColumn(arrayBuffer, rowGroup, columnMetadata, schema) {
|
||||
)
|
||||
// decompress bytes
|
||||
let page
|
||||
const uncompressed_page_size = Number(header.uncompressed_page_size) // TODO: unsafe cast
|
||||
const uncompressed_page_size = Number(header.uncompressed_page_size)
|
||||
if (codec === CompressionCodec.GZIP) {
|
||||
throw new Error('GZIP compression not supported')
|
||||
throw new Error('parquet gzip compression not supported')
|
||||
} else if (codec === CompressionCodec.SNAPPY) {
|
||||
page = new Uint8Array(uncompressed_page_size)
|
||||
snappyUncompress(compressedBytes, page)
|
||||
} else if (codec === CompressionCodec.LZO) {
|
||||
throw new Error('LZO compression not supported')
|
||||
throw new Error('parquet lzo compression not supported')
|
||||
}
|
||||
if (!page || page.length !== uncompressed_page_size) {
|
||||
throw new Error('decompressed page size does not match header')
|
||||
throw new Error('parquet decompressed page size does not match header')
|
||||
}
|
||||
|
||||
// parse page data by type
|
||||
if (header.type === PageType.DATA_PAGE) {
|
||||
const daph = header.data_page_header
|
||||
if (!daph) throw new Error('data page header is undefined')
|
||||
if (!daph) throw new Error('parquet data page header is undefined')
|
||||
|
||||
const { definitionLevels, repetitionLevels, value } = readDataPage(page, daph, schema, columnMetadata)
|
||||
valuesSeen += daph.num_values
|
||||
@ -112,7 +112,7 @@ export function readColumn(arrayBuffer, rowGroup, columnMetadata, schema) {
|
||||
return values
|
||||
} else if (header.type === PageType.DICTIONARY_PAGE) {
|
||||
const diph = header.dictionary_page_header
|
||||
if (!diph) throw new Error('dictionary page header is undefined')
|
||||
if (!diph) throw new Error('parquet dictionary page header is undefined')
|
||||
|
||||
dictionary = readDictionaryPage(page, diph, schema, columnMetadata)
|
||||
} else {
|
||||
|
||||
@ -174,7 +174,7 @@ export function readPlain(dataView, type, count, offset = 0) {
|
||||
} else if (type === ParquetType.FIXED_LEN_BYTE_ARRAY) {
|
||||
return readPlainByteArrayFixed(dataView, offset, count)
|
||||
} else {
|
||||
throw new Error(`Unhandled type: ${type}`)
|
||||
throw new Error(`parquet unhandled type: ${type}`)
|
||||
}
|
||||
}
|
||||
|
||||
@ -234,7 +234,7 @@ export function readRleBitPackedHybrid(dataView, offset, width, length, numValue
|
||||
let byteLength = 0
|
||||
if (!length) {
|
||||
length = dataView.getInt32(offset, true)
|
||||
if (length < 0) throw new Error(`invalid rle/bitpack length ${length}`)
|
||||
if (length < 0) throw new Error(`parquet invalid rle/bitpack length ${length}`)
|
||||
byteLength += 4
|
||||
}
|
||||
const value = []
|
||||
@ -285,7 +285,7 @@ function readRle(dataView, offset, header, bitWidth) {
|
||||
readValue = dataView.getUint32(offset, true)
|
||||
byteLength += 4
|
||||
} else {
|
||||
throw new Error(`invalid rle width ${width}`)
|
||||
throw new Error(`parquet invalid rle width ${width}`)
|
||||
}
|
||||
|
||||
// repeat value count times
|
||||
|
||||
@ -42,7 +42,7 @@ export function schemaElement(schema, name) {
|
||||
for (const part of name) {
|
||||
const child = tree.children.find(child => child.element.name === part)
|
||||
if (!child) {
|
||||
throw new Error(`schema element not found: ${name}`)
|
||||
throw new Error(`parquet schema element not found: ${name}`)
|
||||
}
|
||||
tree = child
|
||||
}
|
||||
|
||||
@ -118,7 +118,7 @@ function readElement(view, type, index) {
|
||||
return [uuid, index]
|
||||
}
|
||||
default:
|
||||
throw new Error(`Unhandled type: ${type}`)
|
||||
throw new Error(`thrift unhandled type: ${type}`)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -86,7 +86,7 @@ describe('readPlain', () => {
|
||||
it('throws an error for unhandled types', () => {
|
||||
const dataView = new DataView(new ArrayBuffer(0))
|
||||
const invalidType = 999
|
||||
expect(() => readPlain(dataView, invalidType, 1, 0)).toThrow(`Unhandled type: ${invalidType}`)
|
||||
expect(() => readPlain(dataView, invalidType, 1, 0)).toThrow(`parquet unhandled type: ${invalidType}`)
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user