Fix DATE converted type

This commit is contained in:
Kenny Daniel 2025-04-11 18:24:21 -06:00
parent fde7f81893
commit 070d9591b0
No known key found for this signature in database
GPG Key ID: FDF16101AF5AFD3A
5 changed files with 61 additions and 18 deletions

@ -6,7 +6,7 @@
[![minzipped](https://img.shields.io/bundlephobia/minzip/hyparquet-writer)](https://www.npmjs.com/package/hyparquet-writer)
[![workflow status](https://github.com/hyparam/hyparquet-writer/actions/workflows/ci.yml/badge.svg)](https://github.com/hyparam/hyparquet-writer/actions)
[![mit license](https://img.shields.io/badge/License-MIT-orange.svg)](https://opensource.org/licenses/MIT)
![coverage](https://img.shields.io/badge/Coverage-97-darkred)
![coverage](https://img.shields.io/badge/Coverage-95-darkred)
[![dependencies](https://img.shields.io/badge/Dependencies-1-blueviolet)](https://www.npmjs.com/package/hyparquet-writer?activeTab=dependencies)
Hyparquet Writer is a JavaScript library for writing [Apache Parquet](https://parquet.apache.org) files. It is designed to be lightweight, fast and store data very efficiently. It is a companion to the [hyparquet](https://github.com/hyparam/hyparquet) library, which is a JavaScript library for reading parquet files.

@ -102,8 +102,8 @@ export function writeMetadata(writer, metadata) {
* @param {LogicalType | undefined} type
* @returns {ThriftObject | undefined}
*/
function logicalType(type) {
if (type === undefined) return undefined
export function logicalType(type) {
if (!type) return
if (type.type === 'STRING') return { field_1: {} }
if (type.type === 'MAP') return { field_2: {} }
if (type.type === 'LIST') return { field_3: {} }
@ -133,7 +133,6 @@ function logicalType(type) {
if (type.type === 'VARIANT') return { field_16: {} }
if (type.type === 'GEOMETRY') return { field_17: {} }
if (type.type === 'GEOGRAPHY') return { field_18: {} }
throw new Error(`unknown logical type: ${type.type}`)
}
/**
@ -141,8 +140,7 @@ function logicalType(type) {
* @returns {ThriftObject}
*/
function timeUnit(unit) {
if (unit === 'MILLIS') return { field_1: {} }
if (unit === 'MICROS') return { field_2: {} }
if (unit === 'NANOS') return { field_3: {} }
throw new Error(`unknown time unit: ${unit}`)
if (unit === 'MICROS') return { field_2: {} }
return { field_1: {} }
}

@ -20,13 +20,13 @@ export function unconvert(element, values) {
})
}
if (ctype === 'DATE') {
return values.map(v => v.getTime())
return Array.from(values).map(v => v && v.getTime() / dayMillis)
}
if (ctype === 'TIMESTAMP_MILLIS') {
return Array.from(values).map(v => BigInt(v.getTime()))
return Array.from(values).map(v => v && BigInt(v.getTime()))
}
if (ctype === 'TIMESTAMP_MICROS') {
return Array.from(values).map(v => BigInt(v.getTime() * 1000))
return Array.from(values).map(v => v && BigInt(v.getTime() * 1000))
}
if (ctype === 'JSON') {
if (!Array.isArray(values)) throw new Error('JSON must be an array')

@ -1,10 +1,11 @@
import { parquetMetadata } from 'hyparquet'
import { describe, expect, it } from 'vitest'
import { ByteWriter } from '../src/bytewriter.js'
import { writeMetadata } from '../src/metadata.js'
import { logicalType, writeMetadata } from '../src/metadata.js'
/**
* @import {FileMetaData} from 'hyparquet'
* @import {FileMetaData, LogicalType} from 'hyparquet'
* @import {ThriftObject} from '../src/types.js'
* @type {FileMetaData}
*/
export const exampleMetadata = {
@ -190,5 +191,53 @@ describe('writeMetadata', () => {
expect(outputMetadata).toEqual(withKvMetadata)
})
})
describe('logicalType', () => {
it('returns undefined when given undefined', () => {
expect(logicalType(undefined)).toBeUndefined()
})
it('returns correct object for known types', () => {
/** @type {{ input: LogicalType, expected: ThriftObject }[]} */
const testCases = [
{ input: { type: 'STRING' }, expected: { field_1: {} } },
{ input: { type: 'MAP' }, expected: { field_2: {} } },
{ input: { type: 'LIST' }, expected: { field_3: {} } },
{ input: { type: 'ENUM' }, expected: { field_4: {} } },
{
input: { type: 'DECIMAL', scale: 2, precision: 5 },
expected: { field_5: { field_1: 2, field_2: 5 } },
},
{ input: { type: 'DATE' }, expected: { field_6: {} } },
{
input: { type: 'TIME', isAdjustedToUTC: true, unit: 'MILLIS' },
expected: { field_7: { field_1: true, field_2: { field_1: {} } } },
},
{
input: { type: 'TIMESTAMP', isAdjustedToUTC: false, unit: 'MICROS' },
expected: { field_8: { field_1: false, field_2: { field_2: {} } } },
},
{
input: { type: 'TIMESTAMP', isAdjustedToUTC: false, unit: 'NANOS' },
expected: { field_8: { field_1: false, field_2: { field_3: {} } } },
},
{
input: { type: 'INTEGER', bitWidth: 32, isSigned: true },
expected: { field_10: { field_1: 32, field_2: true } },
},
{ input: { type: 'NULL' }, expected: { field_11: {} } },
{ input: { type: 'JSON' }, expected: { field_12: {} } },
{ input: { type: 'BSON' }, expected: { field_13: {} } },
{ input: { type: 'UUID' }, expected: { field_14: {} } },
{ input: { type: 'FLOAT16' }, expected: { field_15: {} } },
{ input: { type: 'VARIANT' }, expected: { field_16: {} } },
{ input: { type: 'GEOMETRY' }, expected: { field_17: {} } },
{ input: { type: 'GEOGRAPHY' }, expected: { field_18: {} } },
]
testCases.forEach(({ input, expected }) => {
expect(logicalType(input)).toEqual(expected)
})
})
})

@ -11,11 +11,7 @@ describe('unconvert', () => {
const schema = { name: 'test', converted_type: 'DATE' }
const input = [new Date('2020-01-01T00:00:00Z'), new Date('2021-01-01T00:00:00Z')]
const result = unconvert(schema, input)
expect(result).toEqual([
new Date('2020-01-01T00:00:00Z').getTime(),
new Date('2021-01-01T00:00:00Z').getTime(),
])
expect(result).toEqual([18262, 18628])
})
it('should convert JSON objects to strings when converted_type = JSON', () => {