Test read of all sample files

This commit is contained in:
Kenny Daniel 2024-02-13 21:11:34 -08:00
parent 2cf00da9aa
commit 054431c98e
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
4 changed files with 50 additions and 58 deletions

@ -7,18 +7,16 @@
* @returns {unknown} converted object
*/
export function toJson(obj) {
if (typeof obj === 'bigint') {
return Number(obj)
} else if (Array.isArray(obj)) {
return obj.map(toJson)
} else if (obj instanceof Object) {
if (obj === undefined) return null
if (typeof obj === 'bigint') return Number(obj)
if (Array.isArray(obj)) return obj.map(toJson)
if (obj instanceof Object) {
/** @type {Record<string, unknown>} */
const newObj = {}
for (const key of Object.keys(obj)) {
newObj[key] = toJson(obj[key])
}
return newObj
} else {
return obj
}
return obj
}

@ -0,0 +1,12 @@
[
[ "Block" ],
[ "Intersection" ],
[ "Block" ],
[ "Block" ],
[ null ],
[ "Block" ],
[ "Intersection" ],
[ "Block" ],
[ "Block" ],
[ "Intersection" ]
]

17
test/files/rowgroups.json Normal file

@ -0,0 +1,17 @@
[
[ 1 ],
[ 2 ],
[ 3 ],
[ 4 ],
[ 5 ],
[ 6 ],
[ 7 ],
[ 8 ],
[ 9 ],
[ 10 ],
[ 11 ],
[ 12 ],
[ 13 ],
[ 14 ],
[ 15 ]
]

@ -1,58 +1,23 @@
import fs from 'fs'
import { describe, expect, it } from 'vitest'
import { parquetRead } from '../src/hyparquet.js'
import { toJson } from '../src/toJson.js'
import { fileToAsyncBuffer } from './helpers.js'
import { fileToAsyncBuffer, fileToJson } from './helpers.js'
describe('parquetMetadataAsync', () => {
it('should parse data from addrtype-missing-value.parquet', async () => {
const asyncBuffer = fileToAsyncBuffer('test/files/addrtype-missing-value.parquet')
await parquetRead({
file: asyncBuffer,
onComplete: (rows) => {
expect(rows).toEqual(addrtypeData)
},
})
})
it('should parse data from rowgroups.parquet', async () => {
const asyncBuffer = fileToAsyncBuffer('test/files/rowgroups.parquet')
await parquetRead({
file: asyncBuffer,
onComplete: (rows) => {
expect(toJson(rows)).toEqual(rowgroupsData)
},
})
it('should parse metadata from all test files', async () => {
const files = fs.readdirSync('test/files')
for (const file of files) {
if (!file.endsWith('.parquet')) continue
const asyncBuffer = fileToAsyncBuffer(`test/files/${file}`)
await parquetRead({
file: asyncBuffer,
onComplete: (rows) => {
const base = file.replace('.parquet', '')
const expected = fileToJson(`test/files/${base}.json`)
expect(toJson(rows)).toEqual(expected)
},
})
}
})
})
// Parquet v1 from DuckDB
const addrtypeData = [
['Block'],
['Intersection'],
['Block'],
['Block'],
[undefined],
['Block'],
['Intersection'],
['Block'],
['Block'],
['Intersection'],
]
const rowgroupsData = [
[1],
[2],
[3],
[4],
[5],
[6],
[7],
[8],
[9],
[10],
[11],
[12],
[13],
[14],
[15],
]