From 070d9591b03a29fe034175cd36d2bb93117f8526 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Fri, 11 Apr 2025 18:24:21 -0600 Subject: [PATCH] Fix DATE converted type --- README.md | 2 +- src/metadata.js | 10 +++----- src/unconvert.js | 6 ++--- test/metadata.test.js | 55 +++++++++++++++++++++++++++++++++++++++--- test/unconvert.test.js | 6 +---- 5 files changed, 61 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index c19bc81..c8d7a4a 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ [![minzipped](https://img.shields.io/bundlephobia/minzip/hyparquet-writer)](https://www.npmjs.com/package/hyparquet-writer) [![workflow status](https://github.com/hyparam/hyparquet-writer/actions/workflows/ci.yml/badge.svg)](https://github.com/hyparam/hyparquet-writer/actions) [![mit license](https://img.shields.io/badge/License-MIT-orange.svg)](https://opensource.org/licenses/MIT) -![coverage](https://img.shields.io/badge/Coverage-97-darkred) +![coverage](https://img.shields.io/badge/Coverage-95-darkred) [![dependencies](https://img.shields.io/badge/Dependencies-1-blueviolet)](https://www.npmjs.com/package/hyparquet-writer?activeTab=dependencies) Hyparquet Writer is a JavaScript library for writing [Apache Parquet](https://parquet.apache.org) files. It is designed to be lightweight, fast and store data very efficiently. It is a companion to the [hyparquet](https://github.com/hyparam/hyparquet) library, which is a JavaScript library for reading parquet files. diff --git a/src/metadata.js b/src/metadata.js index a5df77f..76cdebb 100644 --- a/src/metadata.js +++ b/src/metadata.js @@ -102,8 +102,8 @@ export function writeMetadata(writer, metadata) { * @param {LogicalType | undefined} type * @returns {ThriftObject | undefined} */ -function logicalType(type) { - if (type === undefined) return undefined +export function logicalType(type) { + if (!type) return if (type.type === 'STRING') return { field_1: {} } if (type.type === 'MAP') return { field_2: {} } if (type.type === 'LIST') return { field_3: {} } @@ -133,7 +133,6 @@ function logicalType(type) { if (type.type === 'VARIANT') return { field_16: {} } if (type.type === 'GEOMETRY') return { field_17: {} } if (type.type === 'GEOGRAPHY') return { field_18: {} } - throw new Error(`unknown logical type: ${type.type}`) } /** @@ -141,8 +140,7 @@ function logicalType(type) { * @returns {ThriftObject} */ function timeUnit(unit) { - if (unit === 'MILLIS') return { field_1: {} } - if (unit === 'MICROS') return { field_2: {} } if (unit === 'NANOS') return { field_3: {} } - throw new Error(`unknown time unit: ${unit}`) + if (unit === 'MICROS') return { field_2: {} } + return { field_1: {} } } diff --git a/src/unconvert.js b/src/unconvert.js index 219aeb2..3c56cdd 100644 --- a/src/unconvert.js +++ b/src/unconvert.js @@ -20,13 +20,13 @@ export function unconvert(element, values) { }) } if (ctype === 'DATE') { - return values.map(v => v.getTime()) + return Array.from(values).map(v => v && v.getTime() / dayMillis) } if (ctype === 'TIMESTAMP_MILLIS') { - return Array.from(values).map(v => BigInt(v.getTime())) + return Array.from(values).map(v => v && BigInt(v.getTime())) } if (ctype === 'TIMESTAMP_MICROS') { - return Array.from(values).map(v => BigInt(v.getTime() * 1000)) + return Array.from(values).map(v => v && BigInt(v.getTime() * 1000)) } if (ctype === 'JSON') { if (!Array.isArray(values)) throw new Error('JSON must be an array') diff --git a/test/metadata.test.js b/test/metadata.test.js index 3b08ca4..71d0303 100644 --- a/test/metadata.test.js +++ b/test/metadata.test.js @@ -1,10 +1,11 @@ import { parquetMetadata } from 'hyparquet' import { describe, expect, it } from 'vitest' import { ByteWriter } from '../src/bytewriter.js' -import { writeMetadata } from '../src/metadata.js' +import { logicalType, writeMetadata } from '../src/metadata.js' /** - * @import {FileMetaData} from 'hyparquet' + * @import {FileMetaData, LogicalType} from 'hyparquet' + * @import {ThriftObject} from '../src/types.js' * @type {FileMetaData} */ export const exampleMetadata = { @@ -190,5 +191,53 @@ describe('writeMetadata', () => { expect(outputMetadata).toEqual(withKvMetadata) }) - +}) + +describe('logicalType', () => { + it('returns undefined when given undefined', () => { + expect(logicalType(undefined)).toBeUndefined() + }) + + it('returns correct object for known types', () => { + /** @type {{ input: LogicalType, expected: ThriftObject }[]} */ + const testCases = [ + { input: { type: 'STRING' }, expected: { field_1: {} } }, + { input: { type: 'MAP' }, expected: { field_2: {} } }, + { input: { type: 'LIST' }, expected: { field_3: {} } }, + { input: { type: 'ENUM' }, expected: { field_4: {} } }, + { + input: { type: 'DECIMAL', scale: 2, precision: 5 }, + expected: { field_5: { field_1: 2, field_2: 5 } }, + }, + { input: { type: 'DATE' }, expected: { field_6: {} } }, + { + input: { type: 'TIME', isAdjustedToUTC: true, unit: 'MILLIS' }, + expected: { field_7: { field_1: true, field_2: { field_1: {} } } }, + }, + { + input: { type: 'TIMESTAMP', isAdjustedToUTC: false, unit: 'MICROS' }, + expected: { field_8: { field_1: false, field_2: { field_2: {} } } }, + }, + { + input: { type: 'TIMESTAMP', isAdjustedToUTC: false, unit: 'NANOS' }, + expected: { field_8: { field_1: false, field_2: { field_3: {} } } }, + }, + { + input: { type: 'INTEGER', bitWidth: 32, isSigned: true }, + expected: { field_10: { field_1: 32, field_2: true } }, + }, + { input: { type: 'NULL' }, expected: { field_11: {} } }, + { input: { type: 'JSON' }, expected: { field_12: {} } }, + { input: { type: 'BSON' }, expected: { field_13: {} } }, + { input: { type: 'UUID' }, expected: { field_14: {} } }, + { input: { type: 'FLOAT16' }, expected: { field_15: {} } }, + { input: { type: 'VARIANT' }, expected: { field_16: {} } }, + { input: { type: 'GEOMETRY' }, expected: { field_17: {} } }, + { input: { type: 'GEOGRAPHY' }, expected: { field_18: {} } }, + ] + + testCases.forEach(({ input, expected }) => { + expect(logicalType(input)).toEqual(expected) + }) + }) }) diff --git a/test/unconvert.test.js b/test/unconvert.test.js index cf3ca92..bd4fcb9 100644 --- a/test/unconvert.test.js +++ b/test/unconvert.test.js @@ -11,11 +11,7 @@ describe('unconvert', () => { const schema = { name: 'test', converted_type: 'DATE' } const input = [new Date('2020-01-01T00:00:00Z'), new Date('2021-01-01T00:00:00Z')] const result = unconvert(schema, input) - - expect(result).toEqual([ - new Date('2020-01-01T00:00:00Z').getTime(), - new Date('2021-01-01T00:00:00Z').getTime(), - ]) + expect(result).toEqual([18262, 18628]) }) it('should convert JSON objects to strings when converted_type = JSON', () => {