diff --git a/demo.css b/demo.css index 15d9996..935b907 100644 --- a/demo.css +++ b/demo.css @@ -79,3 +79,22 @@ nav ul, padding: 2px 4px; text-align: right; } + +.collapsed > :not(:first-child) { + display: none; +} + +#layout h2 { + cursor: pointer; + user-select: none; +} +#layout h2::before { + content: "▼"; + display: inline-block; + font-size: 10px; + margin: 0 4px; + vertical-align: middle; +} +#layout .collapsed h2::before { + content: "▶"; +} diff --git a/demo.js b/demo.js index 89fd8ed..630a47a 100644 --- a/demo.js +++ b/demo.js @@ -14,21 +14,6 @@ dropzone.addEventListener('dragleave', () => { dropzone.classList.remove('over') }) -function group(name) { - return `
${name}` -} -function cell(name, start, bytes, end) { - return ` -
- - -
` -} - dropzone.addEventListener('drop', e => { e.preventDefault() // prevent dropped file from being "downloaded" dropzone.classList.remove('over') @@ -47,32 +32,8 @@ function processFile(file) { const arrayBuffer = e.target.result const metadata = toJson(parquetMetadata(arrayBuffer)) - console.log('metadata', metadata) - // render file layout - let html = '

File layout

' - html += cell('PAR1', 0, 4, 4) // magic number - for (const rowGroupIndex in metadata.row_groups) { - const rowGroup = metadata.row_groups[rowGroupIndex] - html += group(`Row group ${rowGroupIndex} (${rowGroup.total_byte_size} bytes)`) - for (const column of rowGroup.columns) { - const columnName = column.meta_data.path_in_schema.join('.') - - let columnOffset = column.meta_data.dictionary_page_offset - if (!columnOffset || column.meta_data.data_page_offset < columnOffset) { - columnOffset = column.meta_data.data_page_offset - } - columnOffset = Number(columnOffset) - const bytes = column.meta_data.total_compressed_size - const end = columnOffset + bytes - html += cell(`Column ${columnName}`, columnOffset, bytes, end) - } - html += '
' - } - const metadataStart = arrayBuffer.byteLength - metadata.metadata_length - 4 - html += cell('Metadata', metadataStart, metadata.metadata_length, arrayBuffer.byteLength - 4) - html += cell('PAR1', arrayBuffer.byteLength - 4, 4, arrayBuffer.byteLength) // magic number - layout.innerHTML = html + layout.appendChild(fileLayout(metadata, arrayBuffer)) // display metadata dropzone.innerHTML = `${file.name}` @@ -98,3 +59,50 @@ fileInput.addEventListener('change', () => { processFile(fileInput.files[0]) } }) + +// Render file layout +function fileLayout(metadata, arrayBuffer) { + let html = '

File layout

' + html += cell('PAR1', 0, 4, 4) // magic number + for (const rowGroupIndex in metadata.row_groups) { + const rowGroup = metadata.row_groups[rowGroupIndex] + html += group(`Row group ${rowGroupIndex} (${rowGroup.total_byte_size} bytes)`) + for (const column of rowGroup.columns) { + const columnName = column.meta_data.path_in_schema.join('.') + + let columnOffset = column.meta_data.dictionary_page_offset + if (!columnOffset || column.meta_data.data_page_offset < columnOffset) { + columnOffset = column.meta_data.data_page_offset + } + columnOffset = Number(columnOffset) + const bytes = column.meta_data.total_compressed_size + const end = columnOffset + bytes + html += cell(`Column ${columnName}`, columnOffset, bytes, end) + } + html += '' + } + const metadataStart = arrayBuffer.byteLength - metadata.metadata_length - 4 + html += cell('Metadata', metadataStart, metadata.metadata_length, arrayBuffer.byteLength - 4) + html += cell('PAR1', arrayBuffer.byteLength - 4, 4, arrayBuffer.byteLength) // magic number + const div = document.createElement('div') + div.innerHTML = html + div.classList.add('collapsed') // start collapsed + div.addEventListener('click', () => { + div.classList.toggle('collapsed') + }) + return div +} +function group(name) { + return `
${name}` +} +function cell(name, start, bytes, end) { + return ` +
+ + +
` +} diff --git a/index.html b/index.html index 7c55db5..58b3431 100644 --- a/index.html +++ b/index.html @@ -13,7 +13,7 @@ This is a simple online demo of the hyparquet parser for apache parquet files.

- Drag and drop a parquet file onto the dropzone to see parquet file metadata. + Drag and drop a parquet file onto the dropzone to see parquet data.