From ae829f7c9b139c921d9c4fb9906363a14d797ee5 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Thu, 3 Apr 2025 20:19:37 -0700 Subject: [PATCH] Move convert to unconvert and test it --- README.md | 6 +++ src/column.js | 2 +- src/{convert.js => unconvert.js} | 0 test/unconvert.test.js | 64 ++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 1 deletion(-) rename src/{convert.js => unconvert.js} (100%) create mode 100644 test/unconvert.test.js diff --git a/README.md b/README.md index cb08f09..18a3b54 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,12 @@ const arrayBuffer = parquetWrite({ }) ``` +## Options + + - `compression`: Boolean defaults to `true`. Set to `false` to disable snappy compression. + - `rowGroupSize`: Integer number of rows to include in each row group. + - `kvMetadata`: Extra key-value metadata to store in the parquet footer. + ## References - https://github.com/hyparam/hyparquet diff --git a/src/column.js b/src/column.js index f6a044a..41e8ada 100644 --- a/src/column.js +++ b/src/column.js @@ -1,5 +1,5 @@ import { Encoding, PageType } from 'hyparquet/src/constants.js' -import { unconvert } from './convert.js' +import { unconvert } from './unconvert.js' import { writeRleBitPackedHybrid } from './encoding.js' import { writePlain } from './plain.js' import { getMaxDefinitionLevel, getMaxRepetitionLevel } from './schema.js' diff --git a/src/convert.js b/src/unconvert.js similarity index 100% rename from src/convert.js rename to src/unconvert.js diff --git a/test/unconvert.test.js b/test/unconvert.test.js new file mode 100644 index 0000000..79180b0 --- /dev/null +++ b/test/unconvert.test.js @@ -0,0 +1,64 @@ +import { describe, expect, it } from 'vitest' +import { unconvert } from '../src/unconvert.js' + +/** + * @import {SchemaElement} from 'hyparquet' + */ +describe('unconvert', () => { + it('should return Date objects when converted_type = DATE', () => { + /** @type {SchemaElement} */ + const schema = { name: 'test', converted_type: 'DATE' } + const input = [new Date('2020-01-01T00:00:00Z'), new Date('2021-01-01T00:00:00Z')] + const result = unconvert(schema, input) + + expect(result).toEqual([ + new Date('2020-01-01T00:00:00Z').getTime(), + new Date('2021-01-01T00:00:00Z').getTime(), + ]) + }) + + it('should convert JSON objects to strings when converted_type = JSON', () => { + /** @type {SchemaElement} */ + const schema = { name: 'test', converted_type: 'JSON' } + const input = [{ foo: 'bar' }, { hello: 'world' }] + const result = unconvert(schema, input) + + // We check that result is an array of Uint8Arrays containing the JSON-encoded bytes + expect(result).toHaveLength(2) + expect(result[0]).toBeInstanceOf(Uint8Array) + expect(new TextDecoder().decode(result[0])).toEqual(JSON.stringify({ foo: 'bar' })) + expect(new TextDecoder().decode(result[1])).toEqual(JSON.stringify({ hello: 'world' })) + }) + + it('should convert string array to Uint8Array when converted_type = UTF8', () => { + /** @type {SchemaElement} */ + const schema = { name: 'test', converted_type: 'UTF8' } + const input = ['hello', 'world'] + const result = unconvert(schema, input) + + expect(result).toHaveLength(2) + expect(result[0]).toBeInstanceOf(Uint8Array) + expect(new TextDecoder().decode(result[0])).toBe('hello') + expect(new TextDecoder().decode(result[1])).toBe('world') + }) + + it('should throw an error when converted_type = UTF8 and values is not an array', () => { + expect(() => unconvert( + { name: 'test', converted_type: 'UTF8' }, + new Uint8Array([1, 2, 3])) + ).toThrow('strings must be an array') + }) + + it('should throw an error when converted_type = JSON and values is not an array', () => { + expect(() => unconvert( + { name: 'test', converted_type: 'JSON' }, + new Uint8Array([1, 2, 3])) + ).toThrow('JSON must be an array') + }) + + it('should return original values if there is no recognized converted_type', () => { + const input = [1, 2, 3] + const result = unconvert({ name: 'test' }, input) + expect(result).toEqual(input) + }) +})