const dayMillis = 86400000 // 1 day in milliseconds /** * Convert from rich to primitive types. * * @import {DecodedArray, SchemaElement, Statistics} from 'hyparquet' * @param {SchemaElement} element * @param {DecodedArray} values * @returns {DecodedArray} */ export function unconvert(element, values) { const { converted_type: ctype, logical_type: ltype } = element if (ctype === 'DECIMAL') { const factor = 10 ** (element.scale || 0) return values.map(v => { if (v === null || v === undefined) return v if (typeof v !== 'number') throw new Error('DECIMAL must be a number') return unconvertDecimal(element, BigInt(Math.round(v * factor))) }) } if (ctype === 'DATE') { return Array.from(values).map(v => v && v.getTime() / dayMillis) } if (ctype === 'TIMESTAMP_MILLIS') { return Array.from(values).map(v => v && BigInt(v.getTime())) } if (ctype === 'TIMESTAMP_MICROS') { return Array.from(values).map(v => v && BigInt(v.getTime() * 1000)) } if (ctype === 'JSON') { if (!Array.isArray(values)) throw new Error('JSON must be an array') const encoder = new TextEncoder() return values.map(v => encoder.encode(JSON.stringify(v))) } if (ltype?.type === 'FLOAT16') { return Array.from(values).map(unconvertFloat16) } if (ctype === 'UTF8') { if (!Array.isArray(values)) throw new Error('strings must be an array') const encoder = new TextEncoder() return values.map(v => encoder.encode(v)) } return values } /** * Uncovert from rich type to byte array for metadata statistics. * * @param {import('hyparquet/src/types.js').MinMaxType | undefined} value * @param {SchemaElement} element * @returns {Uint8Array | undefined} */ export function unconvertMinMax(value, element) { if (value === undefined || value === null) return undefined const { type, converted_type } = element if (type === 'BOOLEAN') return new Uint8Array([value ? 1 : 0]) if (converted_type === 'DECIMAL') { if (typeof value !== 'number') throw new Error('DECIMAL must be a number') const factor = 10 ** (element.scale || 0) const out = unconvertDecimal(element, BigInt(Math.round(value * factor))) if (out instanceof Uint8Array) return out if (typeof out === 'number') { const buffer = new ArrayBuffer(4) new DataView(buffer).setFloat32(0, out, true) return new Uint8Array(buffer) } if (typeof out === 'bigint') { const buffer = new ArrayBuffer(8) new DataView(buffer).setBigInt64(0, out, true) return new Uint8Array(buffer) } } if (type === 'BYTE_ARRAY' || type === 'FIXED_LEN_BYTE_ARRAY') { // truncate byte arrays to 16 bytes for statistics if (value instanceof Uint8Array) return value.slice(0, 16) return new TextEncoder().encode(value.toString().slice(0, 16)) } if (type === 'FLOAT' && typeof value === 'number') { const buffer = new ArrayBuffer(4) new DataView(buffer).setFloat32(0, value, true) return new Uint8Array(buffer) } if (type === 'DOUBLE' && typeof value === 'number') { const buffer = new ArrayBuffer(8) new DataView(buffer).setFloat64(0, value, true) return new Uint8Array(buffer) } if (type === 'INT32' && typeof value === 'number') { const buffer = new ArrayBuffer(4) new DataView(buffer).setInt32(0, value, true) return new Uint8Array(buffer) } if (type === 'INT64' && typeof value === 'bigint') { const buffer = new ArrayBuffer(8) new DataView(buffer).setBigInt64(0, value, true) return new Uint8Array(buffer) } if (type === 'INT32' && converted_type === 'DATE' && value instanceof Date) { const buffer = new ArrayBuffer(4) new DataView(buffer).setInt32(0, Math.floor(value.getTime() / dayMillis), true) return new Uint8Array(buffer) } if (type === 'INT64' && converted_type === 'TIMESTAMP_MILLIS' && value instanceof Date) { const buffer = new ArrayBuffer(8) new DataView(buffer).setBigInt64(0, BigInt(value.getTime()), true) return new Uint8Array(buffer) } throw new Error(`unsupported type for statistics: ${type} with value ${value}`) } /** * @param {Statistics} stats * @param {SchemaElement} element * @returns {import('../src/types.js').ThriftObject} */ export function unconvertStatistics(stats, element) { return { field_1: unconvertMinMax(stats.max, element), field_2: unconvertMinMax(stats.min, element), field_3: stats.null_count, field_4: stats.distinct_count, field_5: unconvertMinMax(stats.max_value, element), field_6: unconvertMinMax(stats.min_value, element), field_7: stats.is_max_value_exact, field_8: stats.is_min_value_exact, } } /** * @param {SchemaElement} element * @param {bigint} value * @returns {number | bigint | Uint8Array} */ export function unconvertDecimal({ type, type_length }, value) { if (type === 'INT32') return Number(value) if (type === 'INT64') return value if (type === 'FIXED_LEN_BYTE_ARRAY' && !type_length) { throw new Error('fixed length byte array type_length is required') } if (!type_length && !value) return new Uint8Array() const bytes = [] while (true) { // extract the lowest 8 bits const byte = Number(value & 0xffn) bytes.unshift(byte) value >>= 8n if (type_length) { if (bytes.length >= type_length) break // fixed length } else { // for nonnegative: stop when top byte has signBit = 0 AND shifted value == 0n // for negative: stop when top byte has signBit = 1 AND shifted value == -1n const sign = byte & 0x80 if (!sign && value === 0n || sign && value === -1n) { break } } } return new Uint8Array(bytes) } /** * @param {number | undefined} value * @returns {Uint8Array | undefined} */ export function unconvertFloat16(value) { if (value === undefined || value === null) return if (Number.isNaN(value)) return new Uint8Array([0x00, 0x7e]) const sign = value < 0 || Object.is(value, -0) ? 1 : 0 const abs = Math.abs(value) // infinities if (!isFinite(abs)) return new Uint8Array([0x00, sign << 7 | 0x7c]) // ±0 if (abs === 0) return new Uint8Array([0x00, sign << 7]) // write as f32 to get raw bits const buf = new ArrayBuffer(4) new Float32Array(buf)[0] = abs const bits32 = new Uint32Array(buf)[0] let exp32 = bits32 >>> 23 & 0xff let mant32 = bits32 & 0x7fffff // convert 32‑bit exponent to unbiased, then to 16‑bit exp32 -= 127 // handle numbers too small for a normal 16‑bit exponent if (exp32 < -14) { // sub‑normal: shift mantissa so that result = mant * 2^-14 const shift = -14 - exp32 mant32 = (mant32 | 0x800000) >> shift + 13 // round‑to‑nearest‑even if (mant32 & 1) mant32 += 1 const bits16 = sign << 15 | mant32 return new Uint8Array([bits16 & 0xff, bits16 >> 8]) } // overflow if (exp32 > 15) return new Uint8Array([0x00, sign << 7 | 0x7c]) // normal number let exp16 = exp32 + 15 mant32 = mant32 + 0x1000 // add rounding bit // handle mantissa overflow after rounding if (mant32 & 0x800000) { mant32 = 0 if (++exp16 === 31) // became infinity return new Uint8Array([0x00, sign << 7 | 0x7c]) } const bits16 = sign << 15 | exp16 << 10 | mant32 >> 13 return new Uint8Array([bits16 & 0xff, bits16 >> 8]) }