2024-02-26 20:20:48 +00:00
|
|
|
import { describe, expect, it } from 'vitest'
|
2025-06-10 01:02:31 +00:00
|
|
|
import { DEFAULT_PARSERS, convert, parseDecimal, parseFloat16 } from '../src/convert.js'
|
2024-02-26 20:20:48 +00:00
|
|
|
|
|
|
|
|
/**
|
2025-06-03 20:47:55 +00:00
|
|
|
* @import {ColumnDecoder, SchemaElement} from '../src/types.js'
|
2024-02-26 20:20:48 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
describe('convert function', () => {
|
|
|
|
|
const name = 'name'
|
2025-06-10 01:02:31 +00:00
|
|
|
const parsers = DEFAULT_PARSERS
|
|
|
|
|
|
2024-02-26 20:20:48 +00:00
|
|
|
it('returns the same data if converted_type is undefined', () => {
|
|
|
|
|
const data = [1, 2, 3]
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual(data)
|
2024-02-26 20:20:48 +00:00
|
|
|
})
|
|
|
|
|
|
2024-05-23 05:24:54 +00:00
|
|
|
it('converts byte arrays to utf8', () => {
|
|
|
|
|
const data = [new TextEncoder().encode('foo'), new TextEncoder().encode('bar')]
|
2024-02-26 20:20:48 +00:00
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'UTF8' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual(['foo', 'bar'])
|
2024-05-23 05:24:54 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('converts byte arrays to utf8 default true', () => {
|
|
|
|
|
const data = [new TextEncoder().encode('foo'), new TextEncoder().encode('bar')]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, type: 'BYTE_ARRAY' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual(['foo', 'bar'])
|
2024-05-23 05:24:54 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('preserves byte arrays utf8=false', () => {
|
|
|
|
|
const data = [new TextEncoder().encode('foo'), new TextEncoder().encode('bar')]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, type: 'BYTE_ARRAY' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers, utf8: false })).toEqual([
|
2024-05-23 05:24:54 +00:00
|
|
|
new Uint8Array([102, 111, 111]), new Uint8Array([98, 97, 114]),
|
|
|
|
|
])
|
2024-02-26 20:20:48 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('converts numbers to DECIMAL', () => {
|
|
|
|
|
const data = [100, 200]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'DECIMAL' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([100, 200])
|
2024-02-26 20:20:48 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('converts numbers to DECIMAL with scale', () => {
|
|
|
|
|
const data = [100, 200]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'DECIMAL', scale: 2 }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([1, 2])
|
2024-02-26 20:20:48 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('converts bigint to DECIMAL', () => {
|
2025-02-08 01:49:03 +00:00
|
|
|
const data = [1000n, 2000n]
|
2024-02-26 20:20:48 +00:00
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'DECIMAL' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([1000, 2000])
|
2024-02-26 20:20:48 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('converts bigint to DECIMAL with scale', () => {
|
2025-02-08 01:49:03 +00:00
|
|
|
const data = [10n, 20n]
|
2024-02-26 20:20:48 +00:00
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'DECIMAL', scale: 2 }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([0.1, 0.2])
|
2024-02-26 20:20:48 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('converts byte arrays to DECIMAL', () => {
|
|
|
|
|
const data = [new Uint8Array([0, 0, 0, 100]), new Uint8Array([0, 0, 0, 200])]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'DECIMAL', scale: 0 }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([100, 200])
|
2024-02-26 20:20:48 +00:00
|
|
|
})
|
|
|
|
|
|
2025-02-08 02:52:48 +00:00
|
|
|
it('converts byte array from issue #59 to DECIMAL', () => {
|
|
|
|
|
const data = [new Uint8Array([18, 83, 137, 151, 156, 0])]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'DECIMAL', scale: 10, precision: 14 }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([2015])
|
2025-02-08 02:52:48 +00:00
|
|
|
})
|
|
|
|
|
|
2024-02-26 20:20:48 +00:00
|
|
|
it('converts epoch time to DATE', () => {
|
|
|
|
|
const data = [1, 2] // days since epoch
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'DATE' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([new Date(86400000), new Date(86400000 * 2)])
|
2024-02-26 20:20:48 +00:00
|
|
|
})
|
|
|
|
|
|
2024-05-13 01:12:30 +00:00
|
|
|
it('converts INT96 to DATE', () => {
|
|
|
|
|
// from alltypes_plain.parquet
|
|
|
|
|
const data = [45284764452596988585705472n, 45284764452597048585705472n]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, type: 'INT96' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([new Date('2009-03-01T00:00:00.000Z'), new Date('2009-03-01T00:01:00.000Z')])
|
2024-05-13 01:12:30 +00:00
|
|
|
})
|
|
|
|
|
|
2024-05-23 23:43:26 +00:00
|
|
|
it('converts epoch time to TIMESTAMP_MILLIS', () => {
|
|
|
|
|
const data = [1716506900000n, 1716507000000n]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'TIMESTAMP_MILLIS' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([
|
2024-05-23 23:43:26 +00:00
|
|
|
new Date('2024-05-23T23:28:20.000Z'), new Date('2024-05-23T23:30:00.000Z'),
|
|
|
|
|
])
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('converts epoch time to TIMESTAMP_MICROS', () => {
|
|
|
|
|
const data = [1716506900000000n, 1716507000000000n]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'TIMESTAMP_MICROS' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([
|
2024-05-23 23:43:26 +00:00
|
|
|
new Date('2024-05-23T23:28:20.000Z'), new Date('2024-05-23T23:30:00.000Z'),
|
|
|
|
|
])
|
|
|
|
|
})
|
|
|
|
|
|
2024-02-26 20:20:48 +00:00
|
|
|
it('parses strings to JSON', () => {
|
2024-05-23 23:43:26 +00:00
|
|
|
const encoder = new TextEncoder()
|
2024-02-26 20:20:48 +00:00
|
|
|
const data = ['{"key": true}', '{"quay": 314}']
|
2024-05-23 23:43:26 +00:00
|
|
|
.map(str => encoder.encode(str))
|
2024-02-26 20:20:48 +00:00
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'JSON' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([{ key: true }, { quay: 314 }])
|
2024-02-26 20:20:48 +00:00
|
|
|
})
|
|
|
|
|
|
2024-05-24 23:48:38 +00:00
|
|
|
it('converts uint64', () => {
|
2025-02-08 01:49:03 +00:00
|
|
|
const data = [100n, -100n]
|
2024-05-24 23:48:38 +00:00
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'UINT_64' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual(new BigUint64Array([100n, 18446744073709551516n]))
|
2024-05-24 23:48:38 +00:00
|
|
|
})
|
|
|
|
|
|
2024-05-23 23:43:26 +00:00
|
|
|
it('converts to float16', () => {
|
|
|
|
|
const data = [new Uint8Array([0x00, 0x3c]), new Uint8Array([0x00, 0x40])]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, logical_type: { type: 'FLOAT16' } }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([1, 2])
|
2024-05-23 23:43:26 +00:00
|
|
|
})
|
|
|
|
|
|
2024-05-24 23:48:38 +00:00
|
|
|
it('converts timestamp with units', () => {
|
|
|
|
|
const data = [1716506900000000n, 1716507000000000n]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, logical_type: { type: 'TIMESTAMP', isAdjustedToUTC: true, unit: 'MICROS' } }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(convert(data, { element, parsers })).toEqual([
|
2024-05-24 23:48:38 +00:00
|
|
|
new Date('2024-05-23T23:28:20.000Z'), new Date('2024-05-23T23:30:00.000Z'),
|
|
|
|
|
])
|
|
|
|
|
})
|
|
|
|
|
|
2024-02-26 20:20:48 +00:00
|
|
|
it('throws error for BSON conversion', () => {
|
|
|
|
|
const data = [{}]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'BSON' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(() => convert(data, { element, parsers }))
|
2024-02-26 20:20:48 +00:00
|
|
|
.toThrow('parquet bson not supported')
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('throws error for INTERVAL conversion', () => {
|
|
|
|
|
const data = [{}]
|
|
|
|
|
/** @type {SchemaElement} */
|
2025-06-03 20:47:55 +00:00
|
|
|
const element = { name, converted_type: 'INTERVAL' }
|
2025-06-10 01:02:31 +00:00
|
|
|
expect(() => convert(data, { element, parsers }))
|
2024-02-26 20:20:48 +00:00
|
|
|
.toThrow('parquet interval not supported')
|
|
|
|
|
})
|
2025-06-10 01:02:31 +00:00
|
|
|
|
|
|
|
|
it('respects custom parsers - dateFromDays', () => {
|
|
|
|
|
const data = [1, 2] // days since epoch
|
|
|
|
|
/** @type {SchemaElement} */
|
|
|
|
|
const element = { name, converted_type: 'DATE' }
|
|
|
|
|
/** @type {Pick<ColumnDecoder, "element" | "utf8" | "parsers">} */
|
|
|
|
|
const columnParser = {
|
|
|
|
|
element,
|
|
|
|
|
parsers: {
|
|
|
|
|
...parsers,
|
|
|
|
|
dateFromDays: days => days,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
expect(convert(data, columnParser)).toEqual([ 1, 2 ])
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('respects custom parsers - timestampFromMilliseconds', () => {
|
|
|
|
|
const data = [1716506900000n, 1716507000000n]
|
|
|
|
|
/** @type {SchemaElement} */
|
|
|
|
|
const element = { name, converted_type: 'TIMESTAMP_MILLIS' }
|
|
|
|
|
/** @type {Pick<ColumnDecoder, "element" | "utf8" | "parsers">} */
|
|
|
|
|
const columnParser = {
|
|
|
|
|
element,
|
|
|
|
|
parsers: {
|
|
|
|
|
...parsers,
|
|
|
|
|
timestampFromMilliseconds: millis => Number(millis / 100000n),
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
expect(convert(data, columnParser)).toEqual([ 17165069, 17165070 ])
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('respects custom parsers - timestampFromMicroseconds', () => {
|
|
|
|
|
const data = [1716506900000000n, 1716507000000000n]
|
|
|
|
|
/** @type {SchemaElement} */
|
|
|
|
|
const element = { name, logical_type: { type: 'TIMESTAMP', isAdjustedToUTC: true, unit: 'MICROS' } }
|
|
|
|
|
/** @type {Pick<ColumnDecoder, "element" | "utf8" | "parsers">} */
|
|
|
|
|
const columnParser = {
|
|
|
|
|
element,
|
|
|
|
|
parsers: {
|
|
|
|
|
...parsers,
|
|
|
|
|
timestampFromMicroseconds: micros => Number(micros / 100000000n),
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
expect(convert(data, columnParser)).toEqual([ 17165069, 17165070 ])
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('respects custom parsers - timestampFromNanoseconds', () => {
|
|
|
|
|
// from alltypes_plain.parquet
|
|
|
|
|
const data = [45284764452596988585705472n, 45284764452597048585705472n]
|
|
|
|
|
/** @type {SchemaElement} */
|
|
|
|
|
const element = { name, type: 'INT96' }
|
|
|
|
|
/** @type {Pick<ColumnDecoder, "element" | "utf8" | "parsers">} */
|
|
|
|
|
const columnParser = {
|
|
|
|
|
element,
|
|
|
|
|
parsers: {
|
|
|
|
|
...parsers,
|
|
|
|
|
timestampFromNanoseconds: micros => Number(micros / 100000000000n),
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
expect(convert(data, columnParser)).toEqual([ 12358656, 12358656 ])
|
|
|
|
|
})
|
2024-02-26 20:20:48 +00:00
|
|
|
})
|
2024-05-13 16:22:55 +00:00
|
|
|
|
|
|
|
|
describe('parseFloat16', () => {
|
2024-05-23 06:45:02 +00:00
|
|
|
it('convert float16 numbers', () => {
|
|
|
|
|
expect(parseFloat16(undefined)).toBe(undefined)
|
2024-05-13 16:22:55 +00:00
|
|
|
expect(parseFloat16(new Uint8Array([0x00, 0xbc]))).toBe(-1)
|
|
|
|
|
expect(parseFloat16(new Uint8Array([0x00, 0x00]))).toBe(0)
|
|
|
|
|
expect(parseFloat16(new Uint8Array([0x00, 0x38]))).toBe(0.5)
|
|
|
|
|
expect(parseFloat16(new Uint8Array([0x00, 0x3c]))).toBe(1)
|
|
|
|
|
expect(parseFloat16(new Uint8Array([0x00, 0x40]))).toBe(2)
|
|
|
|
|
})
|
|
|
|
|
|
2024-05-23 06:45:02 +00:00
|
|
|
it('convert float16 -0', () => {
|
2024-05-13 16:22:55 +00:00
|
|
|
expect(parseFloat16(new Uint8Array([0x00, 0x80]))).toBe(-0)
|
|
|
|
|
expect(parseFloat16(new Uint8Array([0x00, 0x80]))).not.toBe(0)
|
|
|
|
|
})
|
|
|
|
|
|
2024-05-23 06:45:02 +00:00
|
|
|
it('convert float16 Infinity', () => {
|
2024-05-13 16:22:55 +00:00
|
|
|
expect(parseFloat16(new Uint8Array([0x00, 0x7c]))).toBe(Infinity)
|
|
|
|
|
expect(parseFloat16(new Uint8Array([0x00, 0xfc]))).toBe(-Infinity)
|
|
|
|
|
})
|
|
|
|
|
|
2024-05-23 06:45:02 +00:00
|
|
|
it('convert float16 NaN', () => {
|
2024-05-13 16:22:55 +00:00
|
|
|
expect(parseFloat16(new Uint8Array([0x00, 0x7e]))).toBeNaN()
|
|
|
|
|
expect(parseFloat16(new Uint8Array([0x01, 0x7e]))).toBeNaN()
|
|
|
|
|
})
|
|
|
|
|
|
2024-05-23 06:45:02 +00:00
|
|
|
it('convert float16 subnormal number', () => {
|
2024-05-13 16:22:55 +00:00
|
|
|
expect(parseFloat16(new Uint8Array([0xff, 0x03])))
|
2025-02-08 02:52:48 +00:00
|
|
|
.toBeCloseTo(2 ** -14 * (1023 / 1024), 5)
|
|
|
|
|
})
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
describe('parseDecimal', () => {
|
|
|
|
|
it('should return 0 for an empty Uint8Array', () => {
|
|
|
|
|
const result = parseDecimal(new Uint8Array())
|
|
|
|
|
expect(result).toBe(0)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('should parse a single byte', () => {
|
|
|
|
|
const result = parseDecimal(new Uint8Array([42]))
|
|
|
|
|
expect(result).toBe(42)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('should parse two bytes in big-endian order', () => {
|
|
|
|
|
const result = parseDecimal(new Uint8Array([1, 0]))
|
|
|
|
|
expect(result).toBe(256)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('should parse three bytes', () => {
|
|
|
|
|
const result = parseDecimal(new Uint8Array([1, 2, 3]))
|
|
|
|
|
expect(result).toBe(66051)
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
it('should parse -1 as a 32-bit number', () => {
|
|
|
|
|
const result = parseDecimal(new Uint8Array([255, 255, 255, 255]))
|
|
|
|
|
expect(result).toBe(-1)
|
2024-05-13 16:22:55 +00:00
|
|
|
})
|
|
|
|
|
})
|