mirror of
https://github.com/asadbek064/hyparquet.git
synced 2025-12-29 16:36:37 +00:00
Add dremel assembly demo page
This commit is contained in:
parent
bf1b8d79c7
commit
71dd68540d
47
dremel/dremel.js
Normal file
47
dremel/dremel.js
Normal file
@ -0,0 +1,47 @@
|
||||
import { assembleObjects } from '../src/assemble.js'
|
||||
|
||||
const defs = document.getElementById('defs')
|
||||
const reps = document.getElementById('reps')
|
||||
const values = document.getElementById('values')
|
||||
const valuesWithNulls = document.getElementById('values-with-nulls')
|
||||
const output = document.getElementById('output')
|
||||
|
||||
// update the output on change
|
||||
defs.addEventListener('keyup', update)
|
||||
reps.addEventListener('keyup', update)
|
||||
values.addEventListener('keyup', update)
|
||||
|
||||
function update() {
|
||||
let def = defs.value ? defs.value.split(',').map(Number) : []
|
||||
const rep = reps.value.split(',').map(Number)
|
||||
const val = values.value.split(',').map(Number)
|
||||
const maxDef = Math.max(1, ...def)
|
||||
const maxRep = Math.max(...rep)
|
||||
// nullable if any definition level is less than max
|
||||
const isNullable = def.some(d => d < maxDef)
|
||||
if (def.length === 0) def = undefined
|
||||
|
||||
// update flattened values with nulls
|
||||
const withNulls = []
|
||||
let valueIndex = 0
|
||||
for (let i = 0; i < rep.length; i++) {
|
||||
if (!isNullable || def[i] === maxDef) withNulls.push(val[valueIndex++])
|
||||
else withNulls.push('-')
|
||||
}
|
||||
valuesWithNulls.innerText = withNulls.join(', ')
|
||||
|
||||
// update the output
|
||||
try {
|
||||
const out = assembleObjects(def, rep, val, isNullable, maxDef, maxRep)
|
||||
output.innerText = ''
|
||||
for (const obj of out) {
|
||||
const row = JSON.stringify(obj, null, 1)?.replace(/\s+/g, ' ')
|
||||
output.innerText += (row || 'null') + '\n'
|
||||
}
|
||||
output.classList.remove('error')
|
||||
} catch (e) {
|
||||
output.innerHTML = e.toString()
|
||||
output.classList.add('error')
|
||||
}
|
||||
}
|
||||
update()
|
||||
89
dremel/index.html
Normal file
89
dremel/index.html
Normal file
@ -0,0 +1,89 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Dremel assembly - hyparquet</title>
|
||||
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Mulish:wght@400;600&display=swap"/>
|
||||
<style>
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
font-family: 'Mulish', 'Helvetica Neue', Helvetica, Arial, sans-serif;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
body {
|
||||
padding: 20px;
|
||||
}
|
||||
p {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
hr {
|
||||
margin: 20px 0;
|
||||
}
|
||||
label {
|
||||
display: block;
|
||||
font-weight: 600;
|
||||
margin: 10px 0 4px;
|
||||
}
|
||||
input {
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
display: block;
|
||||
font-family: monospace;
|
||||
font-size: 14px;
|
||||
padding: 8px;
|
||||
width: 100%;
|
||||
}
|
||||
pre {
|
||||
font-family: monospace;
|
||||
font-size: 16px;
|
||||
line-height: 1.4;
|
||||
}
|
||||
.error {
|
||||
color: #c11;
|
||||
}
|
||||
#values-with-nulls {
|
||||
font-size: 14px;
|
||||
padding: 0 8px;
|
||||
color: #555;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Dremel Assembly</h1>
|
||||
<p>
|
||||
Online demo of dremel assembly of lists from definition and repetition levels.
|
||||
</p>
|
||||
<p>
|
||||
Google introduced <a href="https://research.google/pubs/dremel-interactive-analysis-of-web-scale-datasets-2/">dremel</a> in 2010 as a columnar storage format for nested data.
|
||||
The format uses <em>repetition levels</em> and <em>definition levels</em> to encode nested data efficiently.
|
||||
This demo maps definition and repetition levels to assembled lists.
|
||||
</p>
|
||||
<p>
|
||||
This demo is developed as a learning and debugging tool as part of <a href="https://github.com/hyparam/hyparquet">hyparquet</a>: a parser for apache parquet files.
|
||||
</p>
|
||||
|
||||
<div>
|
||||
<label>Definition levels</label>
|
||||
<input id="defs" value="5, 5, 5, 5, 4, 5, 5, 4, 5, 4, 5, 3, 2, 2, 1, 0, 0, 2, 5, 5">
|
||||
</div>
|
||||
<div>
|
||||
<label>Repetition levels</label>
|
||||
<input id="reps" value="0, 2, 1, 2, 0, 2, 2, 2, 1, 2, 2, 1, 1, 0, 0, 0, 0, 0, 1, 2">
|
||||
</div>
|
||||
<div>
|
||||
<label>Values</label>
|
||||
<input id="values" value="1, 2, 3, 4, 1, 2, 3, 4, 5, 6">
|
||||
<pre id="values-with-nulls" title="values with nulls inserted based on definition levels"></pre>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
|
||||
<div>
|
||||
<label>Output</label>
|
||||
<pre id="output"></pre>
|
||||
</div>
|
||||
|
||||
<script type="module" src="dremel.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
Loading…
Reference in New Issue
Block a user