diff --git a/package.json b/package.json index f57f1c9..7f4b782 100644 --- a/package.json +++ b/package.json @@ -44,7 +44,7 @@ }, "scripts": { "build:types": "tsc -p ./tsconfig.build.json", - "coverage": "vitest run --coverage", + "coverage": "vitest run --coverage --coverage.include=src", "lint": "eslint", "lint:fix": "eslint --fix", "prepare": "npm run build:types", diff --git a/src/plain.js b/src/plain.js index e5bd8ae..ce2a367 100644 --- a/src/plain.js +++ b/src/plain.js @@ -60,7 +60,7 @@ function writePlainBoolean(writer, values) { */ function writePlainInt32(writer, values) { for (const value of values) { - if (typeof value !== 'number') throw new Error('parquet expected number value') + if (!Number.isSafeInteger(value)) throw new Error('parquet expected integer value') writer.appendInt32(value) } } @@ -110,7 +110,7 @@ function writePlainByteArray(writer, values) { bytes = new TextEncoder().encode(value) } if (!(bytes instanceof Uint8Array)) { - throw new Error('parquet byte array expected Uint8Array value') + throw new Error('parquet expected Uint8Array value') } writer.appendUint32(bytes.length) writer.appendBytes(bytes) diff --git a/test/plain.test.js b/test/plain.test.js index 8ce90b4..c627fc7 100644 --- a/test/plain.test.js +++ b/test/plain.test.js @@ -80,10 +80,7 @@ describe('writePlain', () => { it('writes BYTE_ARRAY', () => { const writer = new ByteWriter() const strings = ['a', 'b', 'c', 'd'] - // strings must be pre-converted to Uint8Array - const encoder = new TextEncoder() - const bytes = strings.map(s => encoder.encode(s)) - writePlain(writer, bytes, 'BYTE_ARRAY') + writePlain(writer, strings, 'BYTE_ARRAY') let offset = 0 for (const s of strings) { @@ -98,9 +95,43 @@ describe('writePlain', () => { } }) + it('writes FIXED_LENGTH_BYTE_ARRAY', () => { + const writer = new ByteWriter() + const encoder = new TextEncoder() + const strings = ['abcd', 'efgh', 'ijkl'] + .map(s => encoder.encode(s)) + writePlain(writer, strings, 'FIXED_LEN_BYTE_ARRAY') + + let offset = 0 + for (const s of strings) { + for (let i = 0; i < s.length; i++) { + expect(writer.view.getUint8(offset)).toBe(s[i]) + offset += 1 + } + } + }) + it('throws error on unsupported type', () => { const writer = new ByteWriter() expect(() => writePlain(writer, [1, 2, 3], 'INT96')) .toThrow(/parquet unsupported type/i) }) + + it('throws error on type mismatch', () => { + const writer = new ByteWriter() + expect(() => writePlain(writer, [1, 2, 3], 'BOOLEAN')) + .toThrow('parquet expected boolean value') + expect(() => writePlain(writer, [1, 2, 3.5], 'INT32')) + .toThrow('parquet expected integer value') + expect(() => writePlain(writer, [1n, 2n, 3], 'INT64')) + .toThrow('parquet expected bigint value') + expect(() => writePlain(writer, [1, 2, 3n], 'FLOAT')) + .toThrow('parquet expected number value') + expect(() => writePlain(writer, [1, 2, 3n], 'DOUBLE')) + .toThrow('parquet expected number value') + expect(() => writePlain(writer, [1, 2, 3], 'BYTE_ARRAY')) + .toThrow('parquet expected Uint8Array value') + expect(() => writePlain(writer, [1, 2, 3], 'FIXED_LEN_BYTE_ARRAY')) + .toThrow('parquet expected Uint8Array value') + }) }) diff --git a/test/write.buffer.test.js b/test/write.buffer.test.js index c2f600e..a8f2882 100644 --- a/test/write.buffer.test.js +++ b/test/write.buffer.test.js @@ -32,6 +32,12 @@ describe('parquetWriteBuffer', () => { ]) }) + it('serializes a string without converted_type', () => { + const data = ['string1', 'string2', 'string3'] + const file = parquetWriteBuffer({ columnData: [{ name: 'string', data, type: 'BYTE_ARRAY' }] }) + expect(file.byteLength).toBe(162) + }) + it('efficiently serializes sparse booleans', async () => { const bool = Array(10000).fill(null) bool[10] = true @@ -187,8 +193,20 @@ describe('parquetWriteBuffer', () => { }) it('throws for wrong type specified', () => { - expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3], type: 'BOOLEAN' }] })) - .toThrow('parquet expected boolean value') + expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3], type: 'INT64' }] })) + .toThrow('parquet expected bigint value') + expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1n, 2n, 3n], type: 'INT32' }] })) + .toThrow('parquet expected integer value') + expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3n], type: 'INT32' }] })) + .toThrow('parquet expected integer value') + expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3.5], type: 'INT32' }] })) + .toThrow('parquet expected integer value') + expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1n, 2n, 3n], type: 'FLOAT' }] })) + .toThrow('parquet expected number value') + expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1n, 2n, 3n], type: 'DOUBLE' }] })) + .toThrow('parquet expected number value') + expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3], type: 'BYTE_ARRAY' }] })) + .toThrow('parquet expected Uint8Array value') }) it('throws for empty column with no type specified', () => {