From 71dd68540dfd2fa86ce13ab259b59b46fd2c346a Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Sun, 28 Apr 2024 18:42:15 -0700 Subject: [PATCH] Add dremel assembly demo page --- dremel/dremel.js | 47 +++++++++++++++++++++++++ dremel/index.html | 89 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 dremel/dremel.js create mode 100644 dremel/index.html diff --git a/dremel/dremel.js b/dremel/dremel.js new file mode 100644 index 0000000..d1eaae9 --- /dev/null +++ b/dremel/dremel.js @@ -0,0 +1,47 @@ +import { assembleObjects } from '../src/assemble.js' + +const defs = document.getElementById('defs') +const reps = document.getElementById('reps') +const values = document.getElementById('values') +const valuesWithNulls = document.getElementById('values-with-nulls') +const output = document.getElementById('output') + +// update the output on change +defs.addEventListener('keyup', update) +reps.addEventListener('keyup', update) +values.addEventListener('keyup', update) + +function update() { + let def = defs.value ? defs.value.split(',').map(Number) : [] + const rep = reps.value.split(',').map(Number) + const val = values.value.split(',').map(Number) + const maxDef = Math.max(1, ...def) + const maxRep = Math.max(...rep) + // nullable if any definition level is less than max + const isNullable = def.some(d => d < maxDef) + if (def.length === 0) def = undefined + + // update flattened values with nulls + const withNulls = [] + let valueIndex = 0 + for (let i = 0; i < rep.length; i++) { + if (!isNullable || def[i] === maxDef) withNulls.push(val[valueIndex++]) + else withNulls.push('-') + } + valuesWithNulls.innerText = withNulls.join(', ') + + // update the output + try { + const out = assembleObjects(def, rep, val, isNullable, maxDef, maxRep) + output.innerText = '' + for (const obj of out) { + const row = JSON.stringify(obj, null, 1)?.replace(/\s+/g, ' ') + output.innerText += (row || 'null') + '\n' + } + output.classList.remove('error') + } catch (e) { + output.innerHTML = e.toString() + output.classList.add('error') + } +} +update() diff --git a/dremel/index.html b/dremel/index.html new file mode 100644 index 0000000..31c1600 --- /dev/null +++ b/dremel/index.html @@ -0,0 +1,89 @@ + + + + + Dremel assembly - hyparquet + + + + +

Dremel Assembly

+

+ Online demo of dremel assembly of lists from definition and repetition levels. +

+

+ Google introduced dremel in 2010 as a columnar storage format for nested data. + The format uses repetition levels and definition levels to encode nested data efficiently. + This demo maps definition and repetition levels to assembled lists. +

+

+ This demo is developed as a learning and debugging tool as part of hyparquet: a parser for apache parquet files. +

+ +
+ + +
+
+ + +
+
+ + +

+    
+ +
+ +
+ +

+    
+ + + +