mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
Check for safe integers
This commit is contained in:
parent
5a3d6e8d3f
commit
263dae7101
@ -44,7 +44,7 @@
|
||||
},
|
||||
"scripts": {
|
||||
"build:types": "tsc -p ./tsconfig.build.json",
|
||||
"coverage": "vitest run --coverage",
|
||||
"coverage": "vitest run --coverage --coverage.include=src",
|
||||
"lint": "eslint",
|
||||
"lint:fix": "eslint --fix",
|
||||
"prepare": "npm run build:types",
|
||||
|
||||
@ -60,7 +60,7 @@ function writePlainBoolean(writer, values) {
|
||||
*/
|
||||
function writePlainInt32(writer, values) {
|
||||
for (const value of values) {
|
||||
if (typeof value !== 'number') throw new Error('parquet expected number value')
|
||||
if (!Number.isSafeInteger(value)) throw new Error('parquet expected integer value')
|
||||
writer.appendInt32(value)
|
||||
}
|
||||
}
|
||||
@ -110,7 +110,7 @@ function writePlainByteArray(writer, values) {
|
||||
bytes = new TextEncoder().encode(value)
|
||||
}
|
||||
if (!(bytes instanceof Uint8Array)) {
|
||||
throw new Error('parquet byte array expected Uint8Array value')
|
||||
throw new Error('parquet expected Uint8Array value')
|
||||
}
|
||||
writer.appendUint32(bytes.length)
|
||||
writer.appendBytes(bytes)
|
||||
|
||||
@ -80,10 +80,7 @@ describe('writePlain', () => {
|
||||
it('writes BYTE_ARRAY', () => {
|
||||
const writer = new ByteWriter()
|
||||
const strings = ['a', 'b', 'c', 'd']
|
||||
// strings must be pre-converted to Uint8Array
|
||||
const encoder = new TextEncoder()
|
||||
const bytes = strings.map(s => encoder.encode(s))
|
||||
writePlain(writer, bytes, 'BYTE_ARRAY')
|
||||
writePlain(writer, strings, 'BYTE_ARRAY')
|
||||
|
||||
let offset = 0
|
||||
for (const s of strings) {
|
||||
@ -98,9 +95,43 @@ describe('writePlain', () => {
|
||||
}
|
||||
})
|
||||
|
||||
it('writes FIXED_LENGTH_BYTE_ARRAY', () => {
|
||||
const writer = new ByteWriter()
|
||||
const encoder = new TextEncoder()
|
||||
const strings = ['abcd', 'efgh', 'ijkl']
|
||||
.map(s => encoder.encode(s))
|
||||
writePlain(writer, strings, 'FIXED_LEN_BYTE_ARRAY')
|
||||
|
||||
let offset = 0
|
||||
for (const s of strings) {
|
||||
for (let i = 0; i < s.length; i++) {
|
||||
expect(writer.view.getUint8(offset)).toBe(s[i])
|
||||
offset += 1
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
it('throws error on unsupported type', () => {
|
||||
const writer = new ByteWriter()
|
||||
expect(() => writePlain(writer, [1, 2, 3], 'INT96'))
|
||||
.toThrow(/parquet unsupported type/i)
|
||||
})
|
||||
|
||||
it('throws error on type mismatch', () => {
|
||||
const writer = new ByteWriter()
|
||||
expect(() => writePlain(writer, [1, 2, 3], 'BOOLEAN'))
|
||||
.toThrow('parquet expected boolean value')
|
||||
expect(() => writePlain(writer, [1, 2, 3.5], 'INT32'))
|
||||
.toThrow('parquet expected integer value')
|
||||
expect(() => writePlain(writer, [1n, 2n, 3], 'INT64'))
|
||||
.toThrow('parquet expected bigint value')
|
||||
expect(() => writePlain(writer, [1, 2, 3n], 'FLOAT'))
|
||||
.toThrow('parquet expected number value')
|
||||
expect(() => writePlain(writer, [1, 2, 3n], 'DOUBLE'))
|
||||
.toThrow('parquet expected number value')
|
||||
expect(() => writePlain(writer, [1, 2, 3], 'BYTE_ARRAY'))
|
||||
.toThrow('parquet expected Uint8Array value')
|
||||
expect(() => writePlain(writer, [1, 2, 3], 'FIXED_LEN_BYTE_ARRAY'))
|
||||
.toThrow('parquet expected Uint8Array value')
|
||||
})
|
||||
})
|
||||
|
||||
@ -32,6 +32,12 @@ describe('parquetWriteBuffer', () => {
|
||||
])
|
||||
})
|
||||
|
||||
it('serializes a string without converted_type', () => {
|
||||
const data = ['string1', 'string2', 'string3']
|
||||
const file = parquetWriteBuffer({ columnData: [{ name: 'string', data, type: 'BYTE_ARRAY' }] })
|
||||
expect(file.byteLength).toBe(162)
|
||||
})
|
||||
|
||||
it('efficiently serializes sparse booleans', async () => {
|
||||
const bool = Array(10000).fill(null)
|
||||
bool[10] = true
|
||||
@ -187,8 +193,20 @@ describe('parquetWriteBuffer', () => {
|
||||
})
|
||||
|
||||
it('throws for wrong type specified', () => {
|
||||
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3], type: 'BOOLEAN' }] }))
|
||||
.toThrow('parquet expected boolean value')
|
||||
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3], type: 'INT64' }] }))
|
||||
.toThrow('parquet expected bigint value')
|
||||
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1n, 2n, 3n], type: 'INT32' }] }))
|
||||
.toThrow('parquet expected integer value')
|
||||
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3n], type: 'INT32' }] }))
|
||||
.toThrow('parquet expected integer value')
|
||||
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3.5], type: 'INT32' }] }))
|
||||
.toThrow('parquet expected integer value')
|
||||
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1n, 2n, 3n], type: 'FLOAT' }] }))
|
||||
.toThrow('parquet expected number value')
|
||||
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1n, 2n, 3n], type: 'DOUBLE' }] }))
|
||||
.toThrow('parquet expected number value')
|
||||
expect(() => parquetWriteBuffer({ columnData: [{ name: 'int', data: [1, 2, 3], type: 'BYTE_ARRAY' }] }))
|
||||
.toThrow('parquet expected Uint8Array value')
|
||||
})
|
||||
|
||||
it('throws for empty column with no type specified', () => {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user