mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Float16
This commit is contained in:
parent
93ff818508
commit
edabe74bd6
@ -56,10 +56,10 @@
|
||||
"devDependencies": {
|
||||
"@babel/eslint-parser": "7.27.0",
|
||||
"@types/node": "22.14.1",
|
||||
"@vitest/coverage-v8": "3.1.1",
|
||||
"eslint": "9.25.0",
|
||||
"@vitest/coverage-v8": "3.1.2",
|
||||
"eslint": "9.25.1",
|
||||
"eslint-plugin-jsdoc": "50.6.9",
|
||||
"typescript": "5.8.3",
|
||||
"vitest": "3.1.1"
|
||||
"vitest": "3.1.2"
|
||||
}
|
||||
}
|
||||
|
||||
@ -9,7 +9,7 @@ const dayMillis = 86400000 // 1 day in milliseconds
|
||||
* @returns {DecodedArray}
|
||||
*/
|
||||
export function unconvert(element, values) {
|
||||
const ctype = element.converted_type
|
||||
const { converted_type: ctype, logical_type: ltype } = element
|
||||
if (ctype === 'DECIMAL') {
|
||||
const factor = 10 ** (element.scale || 0)
|
||||
return values.map(v => {
|
||||
@ -32,6 +32,9 @@ export function unconvert(element, values) {
|
||||
const encoder = new TextEncoder()
|
||||
return values.map(v => encoder.encode(JSON.stringify(v)))
|
||||
}
|
||||
if (ltype?.type === 'FLOAT16') {
|
||||
return Array.from(values).map(unconvertFloat16)
|
||||
}
|
||||
if (ctype === 'UTF8') {
|
||||
if (!Array.isArray(values)) throw new Error('strings must be an array')
|
||||
const encoder = new TextEncoder()
|
||||
@ -148,8 +151,8 @@ export function unconvertDecimal({ type, type_length }, value) {
|
||||
} else {
|
||||
// for nonnegative: stop when top byte has signBit = 0 AND shifted value == 0n
|
||||
// for negative: stop when top byte has signBit = 1 AND shifted value == -1n
|
||||
const signBit = byte & 0x80
|
||||
if (!signBit && value === 0n || signBit && value === -1n) {
|
||||
const sign = byte & 0x80
|
||||
if (!sign && value === 0n || sign && value === -1n) {
|
||||
break
|
||||
}
|
||||
}
|
||||
@ -157,3 +160,62 @@ export function unconvertDecimal({ type, type_length }, value) {
|
||||
|
||||
return new Uint8Array(bytes)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number | undefined} value
|
||||
* @returns {Uint8Array | undefined}
|
||||
*/
|
||||
export function unconvertFloat16(value) {
|
||||
if (value === undefined || value === null) return
|
||||
if (Number.isNaN(value)) return new Uint8Array([0x00, 0x7e])
|
||||
|
||||
const sign = value < 0 || Object.is(value, -0) ? 1 : 0
|
||||
const abs = Math.abs(value)
|
||||
|
||||
// infinities
|
||||
if (!isFinite(abs)) return new Uint8Array([0x00, sign << 7 | 0x7c])
|
||||
|
||||
// ±0
|
||||
if (abs === 0) return new Uint8Array([0x00, sign << 7])
|
||||
|
||||
// write as f32 to get raw bits
|
||||
const buf = new ArrayBuffer(4)
|
||||
new Float32Array(buf)[0] = abs
|
||||
const bits32 = new Uint32Array(buf)[0]
|
||||
|
||||
let exp32 = bits32 >>> 23 & 0xff
|
||||
let mant32 = bits32 & 0x7fffff
|
||||
|
||||
// convert 32‑bit exponent to unbiased, then to 16‑bit
|
||||
exp32 -= 127
|
||||
|
||||
// handle numbers too small for a normal 16‑bit exponent
|
||||
if (exp32 < -14) {
|
||||
// sub‑normal: shift mantissa so that result = mant * 2^-14
|
||||
const shift = -14 - exp32
|
||||
mant32 = (mant32 | 0x800000) >> shift + 13
|
||||
|
||||
// round‑to‑nearest‑even
|
||||
if (mant32 & 1) mant32 += 1
|
||||
|
||||
const bits16 = sign << 15 | mant32
|
||||
return new Uint8Array([bits16 & 0xff, bits16 >> 8])
|
||||
}
|
||||
|
||||
// overflow
|
||||
if (exp32 > 15) return new Uint8Array([0x00, sign << 7 | 0x7c])
|
||||
|
||||
// normal number
|
||||
let exp16 = exp32 + 15
|
||||
mant32 = mant32 + 0x1000 // add rounding bit
|
||||
|
||||
// handle mantissa overflow after rounding
|
||||
if (mant32 & 0x800000) {
|
||||
mant32 = 0
|
||||
if (++exp16 === 31) // became infinity
|
||||
return new Uint8Array([0x00, sign << 7 | 0x7c])
|
||||
}
|
||||
|
||||
const bits16 = sign << 15 | exp16 << 10 | mant32 >> 13
|
||||
return new Uint8Array([bits16 & 0xff, bits16 >> 8])
|
||||
}
|
||||
|
||||
BIN
test/files/float16_nonzeros_and_nans.parquet
Normal file
BIN
test/files/float16_nonzeros_and_nans.parquet
Normal file
Binary file not shown.
@ -1,6 +1,7 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { unconvert, unconvertDecimal, unconvertMinMax } from '../src/unconvert.js'
|
||||
import { unconvert, unconvertDecimal, unconvertFloat16, unconvertMinMax } from '../src/unconvert.js'
|
||||
import { convertMetadata } from 'hyparquet/src/metadata.js'
|
||||
import { parseFloat16 } from 'hyparquet/src/convert.js'
|
||||
|
||||
/**
|
||||
* @import {SchemaElement} from 'hyparquet'
|
||||
@ -200,6 +201,43 @@ describe('unconvertDecimal', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('unconvertFloat16', () => {
|
||||
it('should convert number to Float16 array', () => {
|
||||
expect(unconvertFloat16(undefined)).toBeUndefined()
|
||||
expect(unconvertFloat16(0)).toEqual(new Uint8Array([0x00, 0x00]))
|
||||
expect(unconvertFloat16(-0)).toEqual(new Uint8Array([0x00, 0x80]))
|
||||
expect(unconvertFloat16(NaN)).toEqual(new Uint8Array([0x00, 0x7e]))
|
||||
expect(unconvertFloat16(Infinity)).toEqual(new Uint8Array([0x00, 0x7c]))
|
||||
expect(unconvertFloat16(-Infinity)).toEqual(new Uint8Array([0x00, 0xfc]))
|
||||
expect(unconvertFloat16(0.5)).toEqual(new Uint8Array([0x00, 0x38]))
|
||||
expect(unconvertFloat16(-0.5)).toEqual(new Uint8Array([0x00, 0xb8]))
|
||||
expect(unconvertFloat16(1)).toEqual(new Uint8Array([0x00, 0x3c]))
|
||||
expect(unconvertFloat16(-1)).toEqual(new Uint8Array([0x00, 0xbc]))
|
||||
expect(unconvertFloat16(0.000244140625)).toEqual(new Uint8Array([0x00, 0x0c]))
|
||||
// largest normal
|
||||
expect(unconvertFloat16(65504)).toEqual(new Uint8Array([0xff, 0x7b]))
|
||||
expect(unconvertFloat16(65505)).toEqual(new Uint8Array([0xff, 0x7b]))
|
||||
// subnormal
|
||||
expect(unconvertFloat16(Math.pow(2, -24))).toEqual(new Uint8Array([0x02, 0x00]))
|
||||
// mantissa overflow
|
||||
expect(unconvertFloat16(2047.9999)).toEqual(new Uint8Array([0x00, 0x68]))
|
||||
})
|
||||
|
||||
it('should round-trip Float16', () => {
|
||||
expect(parseFloat16(unconvertFloat16(0))).toEqual(0)
|
||||
expect(parseFloat16(unconvertFloat16(-0))).toEqual(-0)
|
||||
expect(parseFloat16(unconvertFloat16(NaN))).toEqual(NaN)
|
||||
expect(parseFloat16(unconvertFloat16(Infinity))).toEqual(Infinity)
|
||||
expect(parseFloat16(unconvertFloat16(-Infinity))).toEqual(-Infinity)
|
||||
expect(parseFloat16(unconvertFloat16(0.5))).toEqual(0.5)
|
||||
expect(parseFloat16(unconvertFloat16(-0.5))).toEqual(-0.5)
|
||||
expect(parseFloat16(unconvertFloat16(1))).toEqual(1)
|
||||
expect(parseFloat16(unconvertFloat16(-1))).toEqual(-1)
|
||||
expect(parseFloat16(unconvertFloat16(65504))).toEqual(65504)
|
||||
expect(parseFloat16(unconvertFloat16(0.000244140625))).toEqual(0.000244140625)
|
||||
})
|
||||
})
|
||||
|
||||
/**
|
||||
* BigInt parseDecimal
|
||||
* @param {Uint8Array} bytes
|
||||
|
||||
Loading…
Reference in New Issue
Block a user