Show/hide file layout

This commit is contained in:
Kenny Daniel 2024-01-27 18:55:34 -08:00
parent 03606e22cc
commit 1bdd08b716
No known key found for this signature in database
GPG Key ID: 6A3C5E318BE71391
3 changed files with 68 additions and 41 deletions

@ -79,3 +79,22 @@ nav ul,
padding: 2px 4px;
text-align: right;
}
.collapsed > :not(:first-child) {
display: none;
}
#layout h2 {
cursor: pointer;
user-select: none;
}
#layout h2::before {
content: "▼";
display: inline-block;
font-size: 10px;
margin: 0 4px;
vertical-align: middle;
}
#layout .collapsed h2::before {
content: "▶";
}

88
demo.js

@ -14,21 +14,6 @@ dropzone.addEventListener('dragleave', () => {
dropzone.classList.remove('over')
})
function group(name) {
return `<div>${name}`
}
function cell(name, start, bytes, end) {
return `
<div class="cell">
<label>${name}</label>
<ul>
<li>start ${start}</li>
<li>bytes ${bytes}</li>
<li>end ${end}</li>
</ul>
</div>`
}
dropzone.addEventListener('drop', e => {
e.preventDefault() // prevent dropped file from being "downloaded"
dropzone.classList.remove('over')
@ -47,32 +32,8 @@ function processFile(file) {
const arrayBuffer = e.target.result
const metadata = toJson(parquetMetadata(arrayBuffer))
console.log('metadata', metadata)
// render file layout
let html = '<h2>File layout</h2>'
html += cell('PAR1', 0, 4, 4) // magic number
for (const rowGroupIndex in metadata.row_groups) {
const rowGroup = metadata.row_groups[rowGroupIndex]
html += group(`Row group ${rowGroupIndex} (${rowGroup.total_byte_size} bytes)`)
for (const column of rowGroup.columns) {
const columnName = column.meta_data.path_in_schema.join('.')
let columnOffset = column.meta_data.dictionary_page_offset
if (!columnOffset || column.meta_data.data_page_offset < columnOffset) {
columnOffset = column.meta_data.data_page_offset
}
columnOffset = Number(columnOffset)
const bytes = column.meta_data.total_compressed_size
const end = columnOffset + bytes
html += cell(`Column ${columnName}`, columnOffset, bytes, end)
}
html += '</div>'
}
const metadataStart = arrayBuffer.byteLength - metadata.metadata_length - 4
html += cell('Metadata', metadataStart, metadata.metadata_length, arrayBuffer.byteLength - 4)
html += cell('PAR1', arrayBuffer.byteLength - 4, 4, arrayBuffer.byteLength) // magic number
layout.innerHTML = html
layout.appendChild(fileLayout(metadata, arrayBuffer))
// display metadata
dropzone.innerHTML = `<strong>${file.name}</strong>`
@ -98,3 +59,50 @@ fileInput.addEventListener('change', () => {
processFile(fileInput.files[0])
}
})
// Render file layout
function fileLayout(metadata, arrayBuffer) {
let html = '<h2>File layout</h2>'
html += cell('PAR1', 0, 4, 4) // magic number
for (const rowGroupIndex in metadata.row_groups) {
const rowGroup = metadata.row_groups[rowGroupIndex]
html += group(`Row group ${rowGroupIndex} (${rowGroup.total_byte_size} bytes)`)
for (const column of rowGroup.columns) {
const columnName = column.meta_data.path_in_schema.join('.')
let columnOffset = column.meta_data.dictionary_page_offset
if (!columnOffset || column.meta_data.data_page_offset < columnOffset) {
columnOffset = column.meta_data.data_page_offset
}
columnOffset = Number(columnOffset)
const bytes = column.meta_data.total_compressed_size
const end = columnOffset + bytes
html += cell(`Column ${columnName}`, columnOffset, bytes, end)
}
html += '</div>'
}
const metadataStart = arrayBuffer.byteLength - metadata.metadata_length - 4
html += cell('Metadata', metadataStart, metadata.metadata_length, arrayBuffer.byteLength - 4)
html += cell('PAR1', arrayBuffer.byteLength - 4, 4, arrayBuffer.byteLength) // magic number
const div = document.createElement('div')
div.innerHTML = html
div.classList.add('collapsed') // start collapsed
div.addEventListener('click', () => {
div.classList.toggle('collapsed')
})
return div
}
function group(name) {
return `<div>${name}`
}
function cell(name, start, bytes, end) {
return `
<div class="cell">
<label>${name}</label>
<ul>
<li>start ${start}</li>
<li>bytes ${bytes}</li>
<li>end ${end}</li>
</ul>
</div>`
}

@ -13,7 +13,7 @@
This is a simple online demo of the <a href="https://github.com/hyparam/hyparquet">hyparquet</a> parser for apache parquet files.
</p>
<p>
Drag and drop a parquet file onto the dropzone to see parquet file metadata.
Drag and drop a parquet file onto the dropzone to see parquet data.
</p>
<ul>
<li><a href="https://github.com/hyparam/hyparquet">github</a></li>