Hysnappy wasm for faster benchmark.js

This commit is contained in:
Kenny Daniel 2024-02-21 14:16:51 -08:00
parent 8b575ad2d8
commit 319dbb124e
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
3 changed files with 8 additions and 2 deletions

@ -1,4 +1,5 @@
import { createReadStream, createWriteStream, promises as fs } from 'fs'
import { snappyUncompressor } from 'hysnappy'
import { parquetRead } from './src/hyparquet.js'
const url = 'https://huggingface.co/datasets/wikimedia/wikipedia/resolve/main/20231101.en/train-00000-of-00041.parquet'
@ -18,6 +19,7 @@ if (!stat) {
console.log('downloaded example.parquet')
stat = await fs.stat('example.parquet').catch(() => undefined)
}
// asyncBuffer
const file = {
byteLength: stat.size,
@ -30,8 +32,12 @@ const file = {
}
const startTime = performance.now()
console.log('parsing wikipedia data...')
// read parquet file
await parquetRead({ file })
await parquetRead({
file,
compressors: { SNAPPY: snappyUncompressor() }, // hysnappy wasm
})
const ms = performance.now() - startTime
console.log(`parsed ${stat.size.toLocaleString()} bytes in ${ms.toFixed(0)} ms`)

@ -34,6 +34,7 @@
"eslint-plugin-import": "2.29.1",
"eslint-plugin-jsdoc": "48.2.0",
"http-server": "14.1.1",
"hysnappy": "0.2.0",
"typescript": "5.3.3",
"vitest": "1.3.1"
}

@ -1,4 +1,3 @@
import { ParquetType } from './constants.js'
import { readData, readPlain, readRleBitPackedHybrid, widthFromMaxInt } from './encoding.js'
import {
getMaxDefinitionLevel,