From 6af6f43f448befbb8c54d73b905cc449c2b96521 Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Mon, 31 Mar 2025 23:20:22 -0700 Subject: [PATCH] Export more constants --- .gitignore | 2 +- README.md | 4 ++++ package.json | 10 +++++----- src/column.js | 4 ++-- src/constants.js | 5 ++++- src/plain.js | 2 +- src/snappy.js | 2 -- src/types.d.ts | 3 ++- test/snappy.test.js | 10 ++++------ 9 files changed, 23 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 5c63a76..4c1fb2e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,4 @@ coverage .vscode .DS_Store /*.parquet -types +/types diff --git a/README.md b/README.md index fcee8d3..d625011 100644 --- a/README.md +++ b/README.md @@ -180,6 +180,10 @@ await parquetRead({ The `parquetReadObjects` function defaults to returning an array of objects. +## Parquet Writing + +To create parquet files from javascript, check out the [hyparquet-writer](https://github.com/hyparam/hyparquet-writer) package. + ## Supported Parquet Files The parquet format is known to be a sprawling format which includes options for a wide array of compression schemes, encoding types, and data structures. diff --git a/package.json b/package.json index 683ae61..0b79337 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "hyparquet", "version": "1.10.1", - "description": "parquet file parser for javascript", + "description": "Parquet file parser for JavaScript", "author": "Hyperparam", "homepage": "https://hyperparam.app", "keywords": [ @@ -43,13 +43,13 @@ "test": "vitest run" }, "devDependencies": { - "@types/node": "22.13.13", - "@vitest/coverage-v8": "3.0.9", + "@types/node": "22.13.15", + "@vitest/coverage-v8": "3.1.1", "eslint": "9.23.0", "eslint-plugin-jsdoc": "50.6.9", "hyparquet-compressors": "1.1.1", "typescript": "5.8.2", - "typescript-eslint": "8.28.0", - "vitest": "3.0.9" + "typescript-eslint": "8.29.0", + "vitest": "3.1.1" } } diff --git a/src/column.js b/src/column.js index 61a97ae..3666ee2 100644 --- a/src/column.js +++ b/src/column.js @@ -128,8 +128,8 @@ export function getColumnRange({ dictionary_page_offset, data_page_offset, total * Read parquet header from a buffer. * * @import {ColumnMetaData, DecodedArray, DataReader, PageHeader, ParquetReadOptions, SchemaTree} from '../src/types.d.ts' - * @param {DataReader} reader - parquet file reader - * @returns {PageHeader} metadata object and bytes read + * @param {DataReader} reader + * @returns {PageHeader} */ function parquetHeader(reader) { const header = deserializeTCompactProtocol(reader) diff --git a/src/constants.js b/src/constants.js index bc38702..0d7d2c8 100644 --- a/src/constants.js +++ b/src/constants.js @@ -10,9 +10,10 @@ export const ParquetType = [ 'FIXED_LEN_BYTE_ARRAY', ] +/** @type {import('../src/types.d.ts').Encoding[]} */ export const Encoding = [ 'PLAIN', - undefined, + 'GROUP_VAR_INT', // deprecated 'PLAIN_DICTIONARY', 'RLE', 'BIT_PACKED', // deprecated @@ -23,6 +24,7 @@ export const Encoding = [ 'BYTE_STREAM_SPLIT', ] +/** @type {import('../src/types.d.ts').FieldRepetitionType[]} */ export const FieldRepetitionType = [ 'REQUIRED', 'OPTIONAL', @@ -74,6 +76,7 @@ export const logicalTypeType = [ 'UUID', ] +/** @type {import('../src/types.d.ts').CompressionCodec[]} */ export const CompressionCodec = [ 'UNCOMPRESSED', 'SNAPPY', diff --git a/src/plain.js b/src/plain.js index b1a21da..9cbcc1f 100644 --- a/src/plain.js +++ b/src/plain.js @@ -139,7 +139,7 @@ function readPlainDouble(reader, count) { function readPlainByteArray(reader, count) { const values = new Array(count) for (let i = 0; i < count; i++) { - const length = reader.view.getInt32(reader.offset, true) + const length = reader.view.getUint32(reader.offset, true) reader.offset += 4 values[i] = new Uint8Array(reader.view.buffer, reader.view.byteOffset + reader.offset, length) reader.offset += length diff --git a/src/snappy.js b/src/snappy.js index 7081578..f7bf769 100644 --- a/src/snappy.js +++ b/src/snappy.js @@ -14,7 +14,6 @@ const WORD_MASK = [0, 0xff, 0xffff, 0xffffff, 0xffffffff] * @param {Uint8Array} toArray destination array * @param {number} toPos destination position * @param {number} length number of bytes to copy - * @returns {void} */ function copyBytes(fromArray, fromPos, toArray, toPos, length) { for (let i = 0; i < length; i++) { @@ -28,7 +27,6 @@ function copyBytes(fromArray, fromPos, toArray, toPos, length) { * * @param {Uint8Array} input compressed data * @param {Uint8Array} output output buffer - * @returns {void} */ export function snappyUncompress(input, output) { const inputLength = input.byteLength diff --git a/src/types.d.ts b/src/types.d.ts index 935e195..d9ec539 100644 --- a/src/types.d.ts +++ b/src/types.d.ts @@ -186,6 +186,7 @@ type ColumnCryptoMetaData = Record export type Encoding = 'PLAIN' | + 'GROUP_VAR_INT' | // deprecated 'PLAIN_DICTIONARY' | 'RLE' | 'BIT_PACKED' | // deprecated @@ -209,7 +210,7 @@ export type Compressors = { [K in CompressionCodec]?: (input: Uint8Array, outputLength: number) => Uint8Array } -interface KeyValue { +export interface KeyValue { key: string value?: string } diff --git a/test/snappy.test.js b/test/snappy.test.js index 8718ffe..9323a3b 100644 --- a/test/snappy.test.js +++ b/test/snappy.test.js @@ -3,7 +3,7 @@ import { describe, expect, it } from 'vitest' import { snappyUncompress } from '../src/snappy.js' describe('snappy uncompress', () => { - it('decompresses valid input correctly', async () => { + it('decompresses valid input correctly', () => { const testCases = [ { compressed: [0x00], expected: '' }, { compressed: [0x01, 0x00, 0x68], expected: 'h' }, @@ -37,18 +37,16 @@ describe('snappy uncompress', () => { { compressed: [ 6, 20, 2, 0, 0, 0, 3, 23], expected: new Uint8Array([2, 0, 0, 0, 3, 23]) }, ] - const futures = testCases.map(async ({ compressed, expected }) => { + for (const { compressed, expected } of testCases) { const output = new Uint8Array(expected.length) - await snappyUncompress(new Uint8Array(compressed), output) + snappyUncompress(new Uint8Array(compressed), output) if (typeof expected === 'string') { const outputStr = new TextDecoder().decode(output) expect(outputStr).toBe(expected) } else { expect(output).toEqual(expected) // Uint8Array } - }) - - await Promise.all(futures) + } }) it('decompress hyparquet.jpg.snappy', async () => {