Add parquest metadata demo html

This commit is contained in:
Kenny Daniel 2024-01-04 09:27:47 -08:00
parent 58974da324
commit 75abb70d18
No known key found for this signature in database
GPG Key ID: 6A3C5E318BE71391
4 changed files with 84 additions and 7 deletions

73
index.html Normal file

@ -0,0 +1,73 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>hyparquet parquet file parser</title>
<style>
#dropzone {
position: absolute;
bottom: 10px;
box-sizing: border-box;
top: 10px;
left: 10px;
right: 10px;
border: 2px dashed #08e;
border-radius: 10px;
padding: 10px;
display: flex;
align-items: center;
justify-content: center;
color: #444;
font-family: sans-serif;
font-size: 20px;
overflow-y: auto;
white-space: pre-wrap;
}
.over {
background-color: lightblue;
}
</style>
</head>
<body>
<div id="dropzone">Drop .parquet file here</div>
<script type="module">
import { parquetMetadata, toJson } from './dist/metadata.js'
const dropZone = document.getElementById('dropzone')
dropZone.addEventListener('dragover', e => {
e.preventDefault()
e.dataTransfer.dropEffect = 'copy'
dropZone.classList.add('over')
})
dropZone.addEventListener('dragleave', () => {
dropZone.classList.remove('over')
})
dropZone.addEventListener('drop', e => {
e.preventDefault() // prevent dropped file from being "downloaded"
dropZone.classList.remove('over')
const files = e.dataTransfer.files
if (files.length > 0) {
const file = files[0]
const reader = new FileReader()
reader.onload = async (e) => {
const arrayBuffer = e.target.result
const metadata = toJson(parquetMetadata(arrayBuffer))
console.log(metadata)
// display metadata
dropZone.innerText = file.name
dropZone.innerText += `\n${JSON.stringify(metadata, null, 2)}`
}
reader.onerror = e => {
console.error('Error reading file', e)
dropZone.innerText = `Error reading file\n${e.target.error}`
}
reader.readAsArrayBuffer(file)
}
})
</script>
</body>
</html>

@ -20,17 +20,19 @@
"scripts": {
"build": "tsc",
"coverage": "vitest run --coverage",
"demo": "http-server -o",
"lint": "eslint . --ext .ts",
"test": "vitest run"
},
"devDependencies": {
"@types/node": "20.10.6",
"@typescript-eslint/eslint-plugin": "6.17.0",
"@vitest/coverage-v8": "1.1.1",
"@vitest/coverage-v8": "1.1.2",
"eslint": "8.56.0",
"eslint-plugin-import": "2.29.1",
"eslint-plugin-jsdoc": "48.0.2",
"http-server": "14.1.1",
"typescript": "5.3.3",
"vitest": "1.1.1"
"vitest": "1.1.2"
}
}

@ -112,16 +112,18 @@ export function schemaElement(schema: SchemaElement[], name: string[]): any {
/**
* Replace bigints with numbers.
* When parsing parquet files, bigints are used to represent 64-bit integers.
* However, JSON does not support bigints, so it's helpful to convert to numbers.
*/
export function castBigInts(obj: any): any {
export function toJson(obj: unknown): unknown {
if (typeof obj === 'bigint') {
return Number(obj)
} else if (Array.isArray(obj)) {
return obj.map(castBigInts)
return obj.map(toJson)
} else if (typeof obj === 'object') {
const newObj = {}
for (const key of Object.keys(obj)) {
newObj[key] = castBigInts(obj[key])
newObj[key] = toJson(obj[key])
}
return newObj
} else {

@ -1,6 +1,6 @@
import { promises as fs } from 'fs'
import { describe, expect, it } from 'vitest'
import { castBigInts, parquetMetadata } from '../src/metadata'
import { parquetMetadata, toJson } from '../src/metadata'
/**
* Helper function to read .parquet file into ArrayBuffer
@ -56,7 +56,7 @@ describe('parquetMetadata', () => {
created_by: 'DuckDB',
}
const casted = castBigInts(result)
const casted = toJson(result)
expect(casted).toEqual(expectedMetadata)
})