Add dremel assembly demo page

This commit is contained in:
Kenny Daniel 2024-04-28 18:42:15 -07:00
parent bf1b8d79c7
commit 71dd68540d
No known key found for this signature in database
GPG Key ID: 90AB653A8CAD7E45
2 changed files with 136 additions and 0 deletions

47
dremel/dremel.js Normal file

@ -0,0 +1,47 @@
import { assembleObjects } from '../src/assemble.js'
const defs = document.getElementById('defs')
const reps = document.getElementById('reps')
const values = document.getElementById('values')
const valuesWithNulls = document.getElementById('values-with-nulls')
const output = document.getElementById('output')
// update the output on change
defs.addEventListener('keyup', update)
reps.addEventListener('keyup', update)
values.addEventListener('keyup', update)
function update() {
let def = defs.value ? defs.value.split(',').map(Number) : []
const rep = reps.value.split(',').map(Number)
const val = values.value.split(',').map(Number)
const maxDef = Math.max(1, ...def)
const maxRep = Math.max(...rep)
// nullable if any definition level is less than max
const isNullable = def.some(d => d < maxDef)
if (def.length === 0) def = undefined
// update flattened values with nulls
const withNulls = []
let valueIndex = 0
for (let i = 0; i < rep.length; i++) {
if (!isNullable || def[i] === maxDef) withNulls.push(val[valueIndex++])
else withNulls.push('-')
}
valuesWithNulls.innerText = withNulls.join(', ')
// update the output
try {
const out = assembleObjects(def, rep, val, isNullable, maxDef, maxRep)
output.innerText = ''
for (const obj of out) {
const row = JSON.stringify(obj, null, 1)?.replace(/\s+/g, ' ')
output.innerText += (row || 'null') + '\n'
}
output.classList.remove('error')
} catch (e) {
output.innerHTML = e.toString()
output.classList.add('error')
}
}
update()

89
dremel/index.html Normal file

@ -0,0 +1,89 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Dremel assembly - hyparquet</title>
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Mulish:wght@400;600&display=swap"/>
<style>
* {
box-sizing: border-box;
font-family: 'Mulish', 'Helvetica Neue', Helvetica, Arial, sans-serif;
margin: 0;
padding: 0;
}
body {
padding: 20px;
}
p {
margin-bottom: 20px;
}
hr {
margin: 20px 0;
}
label {
display: block;
font-weight: 600;
margin: 10px 0 4px;
}
input {
border: 1px solid #ccc;
border-radius: 4px;
display: block;
font-family: monospace;
font-size: 14px;
padding: 8px;
width: 100%;
}
pre {
font-family: monospace;
font-size: 16px;
line-height: 1.4;
}
.error {
color: #c11;
}
#values-with-nulls {
font-size: 14px;
padding: 0 8px;
color: #555;
}
</style>
</head>
<body>
<h1>Dremel Assembly</h1>
<p>
Online demo of dremel assembly of lists from definition and repetition levels.
</p>
<p>
Google introduced <a href="https://research.google/pubs/dremel-interactive-analysis-of-web-scale-datasets-2/">dremel</a> in 2010 as a columnar storage format for nested data.
The format uses <em>repetition levels</em> and <em>definition levels</em> to encode nested data efficiently.
This demo maps definition and repetition levels to assembled lists.
</p>
<p>
This demo is developed as a learning and debugging tool as part of <a href="https://github.com/hyparam/hyparquet">hyparquet</a>: a parser for apache parquet files.
</p>
<div>
<label>Definition levels</label>
<input id="defs" value="5, 5, 5, 5, 4, 5, 5, 4, 5, 4, 5, 3, 2, 2, 1, 0, 0, 2, 5, 5">
</div>
<div>
<label>Repetition levels</label>
<input id="reps" value="0, 2, 1, 2, 0, 2, 2, 2, 1, 2, 2, 1, 1, 0, 0, 0, 0, 0, 1, 2">
</div>
<div>
<label>Values</label>
<input id="values" value="1, 2, 3, 4, 1, 2, 3, 4, 5, 6">
<pre id="values-with-nulls" title="values with nulls inserted based on definition levels"></pre>
</div>
<hr>
<div>
<label>Output</label>
<pre id="output"></pre>
</div>
<script type="module" src="dremel.js"></script>
</body>
</html>