mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-05 22:41:55 +00:00
Split out schemaTree tests
This commit is contained in:
parent
a40e678214
commit
8484426bc8
26
test/helpers.js
Normal file
26
test/helpers.js
Normal file
@ -0,0 +1,26 @@
|
||||
import fs from 'fs'
|
||||
|
||||
/**
|
||||
* Helper function to read .parquet file into ArrayBuffer
|
||||
*
|
||||
* @param {string} filePath
|
||||
* @returns {Promise<ArrayBuffer>}
|
||||
*/
|
||||
export async function readFileToArrayBuffer(filePath) {
|
||||
const buffer = await fs.promises.readFile(filePath)
|
||||
return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap .parquet file in an AsyncBuffer
|
||||
*
|
||||
* @typedef {import('../src/types.js').AsyncBuffer} AsyncBuffer
|
||||
* @param {string} filePath
|
||||
* @returns {AsyncBuffer}
|
||||
*/
|
||||
export function fileToAsyncBuffer(filePath) {
|
||||
return {
|
||||
byteLength: fs.statSync(filePath).size,
|
||||
slice: async (start, end) => (await readFileToArrayBuffer(filePath)).slice(start, end),
|
||||
}
|
||||
}
|
||||
@ -1,32 +1,7 @@
|
||||
import fs from 'fs'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { parquetMetadata, parquetMetadataAsync, parquetSchema } from '../src/hyparquet.js'
|
||||
import { parquetMetadata, parquetMetadataAsync } from '../src/hyparquet.js'
|
||||
import { toJson } from '../src/toJson.js'
|
||||
|
||||
/**
|
||||
* Helper function to read .parquet file into ArrayBuffer
|
||||
*
|
||||
* @param {string} filePath
|
||||
* @returns {Promise<ArrayBuffer>}
|
||||
*/
|
||||
async function readFileToArrayBuffer(filePath) {
|
||||
const buffer = await fs.promises.readFile(filePath)
|
||||
return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap .parquet file in an AsyncBuffer
|
||||
*
|
||||
* @typedef {import('../src/types.js').AsyncBuffer} AsyncBuffer
|
||||
* @param {string} filePath
|
||||
* @returns {AsyncBuffer}
|
||||
*/
|
||||
function fileToAsyncBuffer(filePath) {
|
||||
return {
|
||||
byteLength: fs.statSync(filePath).size,
|
||||
slice: async (start, end) => (await readFileToArrayBuffer(filePath)).slice(start, end),
|
||||
}
|
||||
}
|
||||
import { fileToAsyncBuffer, readFileToArrayBuffer } from './helpers.js'
|
||||
|
||||
describe('parquetMetadata', () => {
|
||||
it('should parse metadata from addrtype-missing-value.parquet', async () => {
|
||||
@ -77,59 +52,6 @@ describe('parquetMetadataAsync', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('parquetSchema', () => {
|
||||
it('should parse schema from addrtype-missing-value.parquet', async () => {
|
||||
const arrayBuffer = await readFileToArrayBuffer('test/files/addrtype-missing-value.parquet')
|
||||
const metadata = parquetMetadata(arrayBuffer)
|
||||
const result = parquetSchema(metadata)
|
||||
expect(toJson(result)).toEqual({
|
||||
children: [
|
||||
{
|
||||
children: [],
|
||||
count: 1,
|
||||
element: {
|
||||
converted_type: 0,
|
||||
name: 'ADDRTYPE',
|
||||
repetition_type: 1,
|
||||
type: 6,
|
||||
},
|
||||
},
|
||||
],
|
||||
count: 2,
|
||||
element: {
|
||||
name: 'duckdb_schema',
|
||||
num_children: 1,
|
||||
repetition_type: 0,
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
it('should parse schema from rowgroups.parquet', async () => {
|
||||
const arrayBuffer = await readFileToArrayBuffer('test/files/rowgroups.parquet')
|
||||
const metadata = parquetMetadata(arrayBuffer)
|
||||
const result = parquetSchema(metadata)
|
||||
expect(toJson(result)).toEqual({
|
||||
children: [
|
||||
{
|
||||
children: [],
|
||||
count: 1,
|
||||
element: {
|
||||
name: 'numbers',
|
||||
repetition_type: 1,
|
||||
type: 2,
|
||||
},
|
||||
},
|
||||
],
|
||||
count: 2,
|
||||
element: {
|
||||
name: 'schema',
|
||||
num_children: 1,
|
||||
repetition_type: 0,
|
||||
},
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// Parquet v1 from DuckDB
|
||||
const addrtypeMetadata = {
|
||||
version: 1,
|
||||
|
||||
@ -1,32 +1,7 @@
|
||||
import fs from 'fs'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { parquetRead } from '../src/hyparquet.js'
|
||||
import { toJson } from '../src/toJson.js'
|
||||
|
||||
/**
|
||||
* Helper function to read .parquet file into ArrayBuffer
|
||||
*
|
||||
* @param {string} filePath
|
||||
* @returns {Promise<ArrayBuffer>}
|
||||
*/
|
||||
async function readFileToArrayBuffer(filePath) {
|
||||
const buffer = await fs.promises.readFile(filePath)
|
||||
return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength)
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap .parquet file in an AsyncBuffer
|
||||
*
|
||||
* @typedef {import('../src/types.js').AsyncBuffer} AsyncBuffer
|
||||
* @param {string} filePath
|
||||
* @returns {AsyncBuffer}
|
||||
*/
|
||||
function fileToAsyncBuffer(filePath) {
|
||||
return {
|
||||
byteLength: fs.statSync(filePath).size,
|
||||
slice: async (start, end) => (await readFileToArrayBuffer(filePath)).slice(start, end),
|
||||
}
|
||||
}
|
||||
import { fileToAsyncBuffer } from './helpers.js'
|
||||
|
||||
describe('parquetMetadataAsync', () => {
|
||||
it('should parse data from addrtype-missing-value.parquet', async () => {
|
||||
@ -34,7 +9,7 @@ describe('parquetMetadataAsync', () => {
|
||||
await parquetRead({
|
||||
file: asyncBuffer,
|
||||
onComplete: (rows) => {
|
||||
expect(toJson(rows)).toEqual(addrtypeData)
|
||||
expect(rows).toEqual(addrtypeData)
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
62
test/schemaTree.test.js
Normal file
62
test/schemaTree.test.js
Normal file
@ -0,0 +1,62 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
import { parquetMetadata, parquetSchema } from '../src/hyparquet.js'
|
||||
import { readFileToArrayBuffer } from './helpers.js'
|
||||
|
||||
describe('schemaTree', () => {
|
||||
it('should parse schema tree from addrtype-missing-value.parquet', async () => {
|
||||
const arrayBuffer = await readFileToArrayBuffer('test/files/addrtype-missing-value.parquet')
|
||||
const metadata = parquetMetadata(arrayBuffer)
|
||||
const result = parquetSchema(metadata)
|
||||
expect(result).toEqual(addrtypeSchema)
|
||||
})
|
||||
|
||||
it('should parse schema tree from rowgroups.parquet', async () => {
|
||||
const arrayBuffer = await readFileToArrayBuffer('test/files/rowgroups.parquet')
|
||||
const metadata = parquetMetadata(arrayBuffer)
|
||||
const result = parquetSchema(metadata)
|
||||
expect(result).toEqual(rowgroupsSchema)
|
||||
})
|
||||
})
|
||||
|
||||
// Parquet v1 from DuckDB
|
||||
const addrtypeSchema = {
|
||||
children: [
|
||||
{
|
||||
children: [],
|
||||
count: 1,
|
||||
element: {
|
||||
converted_type: 0,
|
||||
name: 'ADDRTYPE',
|
||||
repetition_type: 1,
|
||||
type: 6,
|
||||
},
|
||||
},
|
||||
],
|
||||
count: 2,
|
||||
element: {
|
||||
name: 'duckdb_schema',
|
||||
num_children: 1,
|
||||
repetition_type: 0,
|
||||
},
|
||||
}
|
||||
|
||||
// Parquet v2 from pandas with 2 row groups
|
||||
const rowgroupsSchema = {
|
||||
children: [
|
||||
{
|
||||
children: [],
|
||||
count: 1,
|
||||
element: {
|
||||
name: 'numbers',
|
||||
repetition_type: 1,
|
||||
type: 2,
|
||||
},
|
||||
},
|
||||
],
|
||||
count: 2,
|
||||
element: {
|
||||
name: 'schema',
|
||||
num_children: 1,
|
||||
repetition_type: 0,
|
||||
},
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user