Check for safe integers

This commit is contained in:
Kenny Daniel 2025-04-20 19:38:45 -07:00
parent 5a3d6e8d3f
commit 263dae7101
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
4 changed files with 58 additions and 9 deletions

@ -44,7 +44,7 @@
},
"scripts": {
"build:types": "tsc -p ./tsconfig.build.json",
"coverage": "vitest run --coverage",
"coverage": "vitest run --coverage --coverage.include=src",
"lint": "eslint",
"lint:fix": "eslint --fix",
"prepare": "npm run build:types",

@ -60,7 +60,7 @@ function writePlainBoolean(writer, values) {
*/
function writePlainInt32(writer, values) {
for (const value of values) {
if (typeof value !== 'number') throw new Error('parquet expected number value')
if (!Number.isSafeInteger(value)) throw new Error('parquet expected integer value')
writer.appendInt32(value)
}
}
@ -110,7 +110,7 @@ function writePlainByteArray(writer, values) {
bytes = new TextEncoder().encode(value)
}
if (!(bytes instanceof Uint8Array)) {
throw new Error('parquet byte array expected Uint8Array value')
throw new Error('parquet expected Uint8Array value')
}
writer.appendUint32(bytes.length)
writer.appendBytes(bytes)

@ -80,10 +80,7 @@ describe('writePlain', () => {
it('writes BYTE_ARRAY', () => {
const writer = new ByteWriter()
const strings = ['a', 'b', 'c', 'd']
// strings must be pre-converted to Uint8Array
const encoder = new TextEncoder()
const bytes = strings.map(s => encoder.encode(s))
writePlain(writer, bytes, 'BYTE_ARRAY')
writePlain(writer, strings, 'BYTE_ARRAY')
let offset = 0
for (const s of strings) {
@ -98,9 +95,43 @@ describe('writePlain', () => {
}
})
it('writes FIXED_LENGTH_BYTE_ARRAY', () => {
const writer = new ByteWriter()
const encoder = new TextEncoder()
const strings = ['abcd', 'efgh', 'ijkl']
.map(s => encoder.encode(s))
writePlain(writer, strings, 'FIXED_LEN_BYTE_ARRAY')
let offset = 0
for (const s of strings) {
for (let i = 0; i < s.length; i++) {
expect(writer.view.getUint8(offset)).toBe(s[i])
offset += 1
}
}
})
it('throws error on unsupported type', () => {
const writer = new ByteWriter()
expect(() => writePlain(writer, [1, 2, 3], 'INT96'))
.toThrow(/parquet unsupported type/i)
})
it('throws error on type mismatch', () => {
const writer = new ByteWriter()
expect(() => writePlain(writer, [1, 2, 3], 'BOOLEAN'))
.toThrow('parquet expected boolean value')
expect(() => writePlain(writer, [1, 2, 3.5], 'INT32'))
.toThrow('parquet expected integer value')
expect(() => writePlain(writer, [1n, 2n, 3], 'INT64'))
.toThrow('parquet expected bigint value')
expect(() => writePlain(writer, [1, 2, 3n], 'FLOAT'))
.toThrow('parquet expected number value')
expect(() => writePlain(writer, [1, 2, 3n], 'DOUBLE'))
.toThrow('parquet expected number value')
expect(() => writePlain(writer, [1, 2, 3], 'BYTE_ARRAY'))
.toThrow('parquet expected Uint8Array value')
expect(() => writePlain(writer, [1, 2, 3], 'FIXED_LEN_BYTE_ARRAY'))
.toThrow('parquet expected Uint8Array value')
})
})

@ -32,6 +32,12 @@ describe('parquetWriteBuffer', () => {
])
})
it('serializes a string without converted_type', () => {
const data = ['string1', 'string2', 'string3']
const file = parquetWriteBuffer({ columnData: [{ name: 'string', data, type: 'BYTE_ARRAY' }] })
expect(file.byteLength).toBe(162)
})
it('efficiently serializes sparse booleans', async () => {
const bool = Array(10000).fill(null)
bool[10] = true
@ -187,8 +193,20 @@ describe('parquetWriteBuffer', () => {
})
it('throws for wrong type specified', () => {
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3], type: 'BOOLEAN' }] }))
.toThrow('parquet expected boolean value')
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3], type: 'INT64' }] }))
.toThrow('parquet expected bigint value')
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1n, 2n, 3n], type: 'INT32' }] }))
.toThrow('parquet expected integer value')
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3n], type: 'INT32' }] }))
.toThrow('parquet expected integer value')
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3.5], type: 'INT32' }] }))
.toThrow('parquet expected integer value')
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1n, 2n, 3n], type: 'FLOAT' }] }))
.toThrow('parquet expected number value')
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1n, 2n, 3n], type: 'DOUBLE' }] }))
.toThrow('parquet expected number value')
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3], type: 'BYTE_ARRAY' }] }))
.toThrow('parquet expected Uint8Array value')
})
it('throws for empty column with no type specified', () => {