mirror of
https://github.com/asadbek064/hyparquet-writer.git
synced 2025-12-05 23:31:54 +00:00
86 lines
3.6 KiB
JavaScript
86 lines
3.6 KiB
JavaScript
import { parquetMetadata, parquetReadObjects } from 'hyparquet'
|
|
import { describe, expect, it } from 'vitest'
|
|
import { parquetWriteBuffer } from '../src/index.js'
|
|
|
|
describe('DELTA_BINARY_PACKED encoding', () => {
|
|
it('writes DELTA_BINARY_PACKED encoding for INT32', async () => {
|
|
const data = [1, 2, 3, 100, 200, 300]
|
|
const file = parquetWriteBuffer({
|
|
columnData: [{ name: 'int', data, encoding: 'DELTA_BINARY_PACKED' }],
|
|
})
|
|
const metadata = parquetMetadata(file)
|
|
expect(metadata.row_groups[0].columns[0].meta_data?.encodings).toEqual(['DELTA_BINARY_PACKED'])
|
|
const result = await parquetReadObjects({ file })
|
|
expect(result).toEqual(data.map(int => ({ int })))
|
|
})
|
|
|
|
it('writes DELTA_BINARY_PACKED encoding for INT64', async () => {
|
|
const data = [1n, 2n, 3n, 100n, 200n, 300n]
|
|
const file = parquetWriteBuffer({
|
|
columnData: [{ name: 'bigint', data, encoding: 'DELTA_BINARY_PACKED' }],
|
|
})
|
|
const metadata = parquetMetadata(file)
|
|
expect(metadata.row_groups[0].columns[0].meta_data?.encodings).toEqual(['DELTA_BINARY_PACKED'])
|
|
const result = await parquetReadObjects({ file })
|
|
expect(result).toEqual(data.map(bigint => ({ bigint })))
|
|
})
|
|
})
|
|
|
|
describe('DELTA_LENGTH_BYTE_ARRAY encoding', () => {
|
|
it('writes DELTA_LENGTH_BYTE_ARRAY encoding for strings', async () => {
|
|
const data = ['hello', 'world', 'foo', 'bar', 'baz', 'qux']
|
|
const file = parquetWriteBuffer({
|
|
columnData: [{ name: 'string', data, encoding: 'DELTA_LENGTH_BYTE_ARRAY' }],
|
|
})
|
|
const metadata = parquetMetadata(file)
|
|
expect(metadata.row_groups[0].columns[0].meta_data?.encodings).toEqual(['DELTA_LENGTH_BYTE_ARRAY'])
|
|
const result = await parquetReadObjects({ file })
|
|
expect(result).toEqual(data.map(string => ({ string })))
|
|
})
|
|
|
|
it('writes DELTA_LENGTH_BYTE_ARRAY encoding for byte arrays', async () => {
|
|
const data = [
|
|
Uint8Array.of(1, 2, 3),
|
|
Uint8Array.of(4, 5, 6, 7),
|
|
Uint8Array.of(8, 9),
|
|
Uint8Array.of(10, 11, 12, 13, 14),
|
|
]
|
|
const file = parquetWriteBuffer({
|
|
columnData: [{ name: 'bytes', data, encoding: 'DELTA_LENGTH_BYTE_ARRAY' }],
|
|
})
|
|
const metadata = parquetMetadata(file)
|
|
expect(metadata.row_groups[0].columns[0].meta_data?.encodings).toEqual(['DELTA_LENGTH_BYTE_ARRAY'])
|
|
const result = await parquetReadObjects({ file, utf8: false })
|
|
expect(result).toEqual(data.map(bytes => ({ bytes })))
|
|
})
|
|
})
|
|
|
|
describe('DELTA_BYTE_ARRAY encoding', () => {
|
|
it('writes DELTA_BYTE_ARRAY encoding for strings with common prefixes', async () => {
|
|
const data = ['apple', 'application', 'apply', 'banana', 'band', 'bandana']
|
|
const file = parquetWriteBuffer({
|
|
columnData: [{ name: 'string', data, encoding: 'DELTA_BYTE_ARRAY' }],
|
|
})
|
|
const metadata = parquetMetadata(file)
|
|
expect(metadata.row_groups[0].columns[0].meta_data?.encodings).toEqual(['DELTA_BYTE_ARRAY'])
|
|
const result = await parquetReadObjects({ file })
|
|
expect(result).toEqual(data.map(string => ({ string })))
|
|
})
|
|
|
|
it('writes DELTA_BYTE_ARRAY encoding for byte arrays', async () => {
|
|
const data = [
|
|
Uint8Array.of(1, 2, 3, 4),
|
|
Uint8Array.of(1, 2, 5, 6),
|
|
Uint8Array.of(1, 2, 7, 8),
|
|
Uint8Array.of(10, 11, 12, 13),
|
|
]
|
|
const file = parquetWriteBuffer({
|
|
columnData: [{ name: 'bytes', data, encoding: 'DELTA_BYTE_ARRAY' }],
|
|
})
|
|
const metadata = parquetMetadata(file)
|
|
expect(metadata.row_groups[0].columns[0].meta_data?.encodings).toEqual(['DELTA_BYTE_ARRAY'])
|
|
const result = await parquetReadObjects({ file, utf8: false })
|
|
expect(result).toEqual(data.map(bytes => ({ bytes })))
|
|
})
|
|
})
|