mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Custom string parser option (#129)
This commit is contained in:
parent
3c1fce4b3f
commit
8611663334
@ -54,10 +54,10 @@
|
||||
"test": "vitest run"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "24.5.1",
|
||||
"@types/node": "24.5.2",
|
||||
"@vitest/coverage-v8": "3.2.4",
|
||||
"eslint": "9.35.0",
|
||||
"eslint-plugin-jsdoc": "58.1.0",
|
||||
"eslint": "9.36.0",
|
||||
"eslint-plugin-jsdoc": "60.4.0",
|
||||
"hyparquet-compressors": "1.1.1",
|
||||
"typescript": "5.9.2",
|
||||
"vitest": "3.2.4"
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
/**
|
||||
* @import {ColumnDecoder, DecodedArray, Encoding, ParquetParsers, SchemaElement} from '../src/types.d.ts'
|
||||
* @import {ColumnDecoder, DecodedArray, Encoding, ParquetParsers} from '../src/types.d.ts'
|
||||
*/
|
||||
|
||||
const decoder = new TextDecoder()
|
||||
|
||||
/**
|
||||
* Default type parsers when no custom ones are given
|
||||
* @type ParquetParsers
|
||||
@ -20,6 +22,9 @@ export const DEFAULT_PARSERS = {
|
||||
const dayInMillis = 86400000
|
||||
return new Date(days * dayInMillis)
|
||||
},
|
||||
stringFromBytes(bytes) {
|
||||
return bytes && decoder.decode(bytes)
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
@ -109,10 +114,10 @@ export function convert(data, columnDecoder) {
|
||||
throw new Error('parquet interval not supported')
|
||||
}
|
||||
if (ctype === 'UTF8' || ltype?.type === 'STRING' || utf8 && type === 'BYTE_ARRAY') {
|
||||
const decoder = new TextDecoder()
|
||||
const arr = new Array(data.length)
|
||||
for (let i = 0; i < arr.length; i++) {
|
||||
arr[i] = data[i] && decoder.decode(data[i])
|
||||
const value = data[i]
|
||||
arr[i] = value instanceof Uint8Array ? parsers.stringFromBytes(value) : value
|
||||
}
|
||||
return arr
|
||||
}
|
||||
|
||||
@ -279,7 +279,7 @@ export function convertMetadata(value, schema, parsers) {
|
||||
const { type, converted_type, logical_type } = schema
|
||||
if (value === undefined) return value
|
||||
if (type === 'BOOLEAN') return value[0] === 1
|
||||
if (type === 'BYTE_ARRAY') return new TextDecoder().decode(value)
|
||||
if (type === 'BYTE_ARRAY') return parsers.stringFromBytes(value)
|
||||
const view = new DataView(value.buffer, value.byteOffset, value.byteLength)
|
||||
if (type === 'FLOAT' && view.byteLength === 4) return view.getFloat32(0, true)
|
||||
if (type === 'DOUBLE' && view.byteLength === 8) return view.getFloat64(0, true)
|
||||
|
||||
9
src/types.d.ts
vendored
9
src/types.d.ts
vendored
@ -2,10 +2,11 @@
|
||||
* Custom parsers for columns
|
||||
*/
|
||||
export interface ParquetParsers {
|
||||
timestampFromMilliseconds(millis: bigint): any;
|
||||
timestampFromMicroseconds(micros: bigint): any;
|
||||
timestampFromNanoseconds(nanos: bigint): any;
|
||||
dateFromDays(days: number): any;
|
||||
timestampFromMilliseconds(millis: bigint): any
|
||||
timestampFromMicroseconds(micros: bigint): any
|
||||
timestampFromNanoseconds(nanos: bigint): any
|
||||
dateFromDays(days: number): any
|
||||
stringFromBytes(bytes: Uint8Array): any
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -225,6 +225,24 @@ describe('convert function', () => {
|
||||
|
||||
expect(convert(data, columnParser)).toEqual([ 12358656, 12358656 ])
|
||||
})
|
||||
|
||||
it('respects custom parsers - stringFromBytes', () => {
|
||||
const encoder = new TextEncoder()
|
||||
const data = [encoder.encode('foo'), undefined]
|
||||
/** @type {SchemaElement} */
|
||||
const element = { name, converted_type: 'UTF8' }
|
||||
const columnParser = {
|
||||
element,
|
||||
parsers: {
|
||||
...parsers,
|
||||
stringFromBytes(/** @type {Uint8Array} */ bytes) {
|
||||
return `custom-${new TextDecoder().decode(bytes)}`
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
expect(convert(data, columnParser)).toEqual(['custom-foo', undefined])
|
||||
})
|
||||
})
|
||||
|
||||
describe('parseFloat16', () => {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user