From 41b3735383e08a6a61689807cafb44d1d429f60b Mon Sep 17 00:00:00 2001 From: Kenny Daniel Date: Mon, 15 Jan 2024 09:09:27 -0800 Subject: [PATCH] Display parquet file layout --- index.html | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 72 insertions(+), 4 deletions(-) diff --git a/index.html b/index.html index d2b4221..e5ccac0 100644 --- a/index.html +++ b/index.html @@ -17,6 +17,7 @@ nav { width: 300px; padding: 10px; + overflow-y: auto; } h1 { font-size: 20pt; @@ -49,12 +50,41 @@ .error { color: #c11; } + #layout { + margin-top: 20px; + } + #layout div { + background-color: rgba(0, 0, 0, 0.05); + border: 1px solid #ccc; + border-radius: 4px; + font-size: 12px; + margin-top: 4px; + padding: 4px; + word-break: break-all; + } + .cell { + display: flex; + } + .cell label { + font-size: 12px; + font-weight: normal; + flex: 1; + justify-content: flex-start; + } + #layout div ul { + list-style: none; + } + #layout div li { + font-size: 10px; + padding: 2px 4px; + text-align: right; + }
@@ -94,11 +125,48 @@ reader.onload = async (e) => { try { const arrayBuffer = e.target.result - const data = toJson(parquetMetadata(arrayBuffer)) + const metadata = toJson(parquetMetadata(arrayBuffer)) + + console.log('metadata', metadata) + + function group(name) { + return `
${name}` + } + function cell(name, start, bytes, end) { + return `
  • start ${start}
  • bytes ${bytes}
  • end ${end}
` + } + + // render file layout + let html = '

File layout

' + html += cell('PAR1', 0, 4, 4) // magic number + for (const rowGroupIndex in metadata.row_groups) { + const rowGroup = metadata.row_groups[rowGroupIndex] + html += group(`Row group ${rowGroupIndex} (${rowGroup.total_byte_size} bytes)`) + for (const column of rowGroup.columns) { + const columnName = column.meta_data.path_in_schema.join('.') + + let columnOffset = column.meta_data.dictionary_page_offset + if (!columnOffset || column.meta_data.data_page_offset < columnOffset) { + columnOffset = column.meta_data.data_page_offset + } + columnOffset = Number(columnOffset) + const bytes = column.meta_data.total_compressed_size + const end = columnOffset + bytes + html += cell(`Column ${columnName}`, columnOffset, bytes, end) + } + html += '
' + } + const metadataStart = arrayBuffer.byteLength - metadata.metadata_length - 4 + html += cell('Metadata', metadataStart, metadata.metadata_length, arrayBuffer.byteLength - 4) + html += cell('PAR1', arrayBuffer.byteLength - 4, 4, arrayBuffer.byteLength) // magic number + layout.innerHTML = html + // display metadata - dropzone.innerHTML = `${file.name}
${JSON.stringify(data, null, 2)}
` + dropzone.innerHTML = `${file.name}` + dropzone.innerHTML += `
${JSON.stringify(metadata, null, 2)}
` } catch (e) { - dropzone.innerHTML = `${file.name}
Error parsing file\n${e}
` + dropzone.innerHTML = `${file.name}` + dropzone.innerHTML += `
Error parsing file\n${e}
` } } reader.onerror = e => {