hyparquet/index.html

182 lines
5.5 KiB
HTML
Raw Normal View History

2024-01-04 17:27:47 +00:00
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>hyparquet parquet file parser</title>
<style>
2024-01-11 23:20:53 +00:00
* {
2024-01-04 17:27:47 +00:00
box-sizing: border-box;
2024-01-11 23:20:53 +00:00
margin: 0;
padding: 0;
}
body {
display: flex;
font-family: sans-serif;
height: 100vh;
}
nav {
width: 300px;
2024-01-04 17:27:47 +00:00
padding: 10px;
2024-01-15 17:09:27 +00:00
overflow-y: auto;
2024-01-11 23:20:53 +00:00
}
h1 {
font-size: 20pt;
}
h2 {
font-size: 12pt;
}
p {
margin: 10px 0;
}
label {
height: 100%;
2024-01-04 17:27:47 +00:00
display: flex;
align-items: center;
justify-content: center;
font-size: 20px;
2024-01-11 23:20:53 +00:00
}
#dropzone {
border: 2px dashed #08e;
border-radius: 10px;
flex: 1;
margin: 10px;
padding: 10px;
color: #444;
overflow: auto;
2024-01-04 17:27:47 +00:00
}
.over {
background-color: lightblue;
}
2024-01-11 23:20:53 +00:00
.error {
color: #c11;
}
2024-01-15 17:09:27 +00:00
#layout {
margin-top: 20px;
}
#layout div {
background-color: rgba(0, 0, 0, 0.05);
border: 1px solid #ccc;
border-radius: 4px;
font-size: 12px;
margin-top: 4px;
padding: 4px;
word-break: break-all;
}
.cell {
display: flex;
}
.cell label {
font-size: 12px;
font-weight: normal;
flex: 1;
justify-content: flex-start;
}
#layout div ul {
list-style: none;
}
#layout div li {
font-size: 10px;
padding: 2px 4px;
text-align: right;
}
2024-01-04 17:27:47 +00:00
</style>
</head>
<body>
2024-01-11 23:20:53 +00:00
<nav>
<h1>hyparquet</h1>
2024-01-15 17:09:27 +00:00
<h2>parquet file reader</h2>
2024-01-11 23:20:53 +00:00
<p>
This is a simple online demo of the <a href="https://github.com/hyparam/hyparquet">hyparquet</a> parser for apache parquet files.
</p>
<p>
Drag and drop a parquet file onto the dropzone to see parquet file metadata.
</p>
<ul>
<li><a href="https://github.com/hyparam/hyparquet">github</a></li>
<li><a href="https://www.npmjs.com/package/hyparquet">npm</a></li>
</ul>
2024-01-15 17:09:27 +00:00
<div id="layout"></div>
2024-01-11 23:20:53 +00:00
</nav>
<div id="dropzone">
<label>Drop .parquet file here</label>
</div>
2024-01-04 17:27:47 +00:00
<script type="module">
2024-01-05 09:39:59 +00:00
import { parquetMetadata, toJson } from './src/hyparquet.js'
2024-01-04 17:27:47 +00:00
2024-01-11 23:20:53 +00:00
dropzone.addEventListener('dragover', e => {
2024-01-04 17:27:47 +00:00
e.preventDefault()
e.dataTransfer.dropEffect = 'copy'
2024-01-11 23:20:53 +00:00
dropzone.classList.add('over')
2024-01-04 17:27:47 +00:00
})
2024-01-11 23:20:53 +00:00
dropzone.addEventListener('dragleave', () => {
dropzone.classList.remove('over')
2024-01-04 17:27:47 +00:00
})
2024-01-11 23:20:53 +00:00
dropzone.addEventListener('drop', e => {
2024-01-04 17:27:47 +00:00
e.preventDefault() // prevent dropped file from being "downloaded"
2024-01-11 23:20:53 +00:00
dropzone.classList.remove('over')
2024-01-04 17:27:47 +00:00
const files = e.dataTransfer.files
if (files.length > 0) {
const file = files[0]
const reader = new FileReader()
reader.onload = async (e) => {
2024-01-11 23:20:53 +00:00
try {
const arrayBuffer = e.target.result
2024-01-15 17:09:27 +00:00
const metadata = toJson(parquetMetadata(arrayBuffer))
console.log('metadata', metadata)
function group(name) {
return `<div>${name}`
}
function cell(name, start, bytes, end) {
return `<div class="cell"><label>${name}</label><ul><li>start ${start}</li><li>bytes ${bytes}</li><li>end ${end}</li></div>`
}
// render file layout
let html = '<h2>File layout</h2>'
html += cell('PAR1', 0, 4, 4) // magic number
for (const rowGroupIndex in metadata.row_groups) {
const rowGroup = metadata.row_groups[rowGroupIndex]
html += group(`Row group ${rowGroupIndex} (${rowGroup.total_byte_size} bytes)`)
for (const column of rowGroup.columns) {
const columnName = column.meta_data.path_in_schema.join('.')
let columnOffset = column.meta_data.dictionary_page_offset
if (!columnOffset || column.meta_data.data_page_offset < columnOffset) {
columnOffset = column.meta_data.data_page_offset
}
columnOffset = Number(columnOffset)
const bytes = column.meta_data.total_compressed_size
const end = columnOffset + bytes
html += cell(`Column ${columnName}`, columnOffset, bytes, end)
}
html += '</div>'
}
const metadataStart = arrayBuffer.byteLength - metadata.metadata_length - 4
html += cell('Metadata', metadataStart, metadata.metadata_length, arrayBuffer.byteLength - 4)
html += cell('PAR1', arrayBuffer.byteLength - 4, 4, arrayBuffer.byteLength) // magic number
layout.innerHTML = html
2024-01-11 23:20:53 +00:00
// display metadata
2024-01-15 17:09:27 +00:00
dropzone.innerHTML = `<strong>${file.name}</strong>`
dropzone.innerHTML += `<pre>${JSON.stringify(metadata, null, 2)}</pre>`
2024-01-11 23:20:53 +00:00
} catch (e) {
2024-01-15 17:09:27 +00:00
dropzone.innerHTML = `<strong>${file.name}</strong>`
dropzone.innerHTML += `<div class="error">Error parsing file\n${e}</div>`
2024-01-11 23:20:53 +00:00
}
2024-01-04 17:27:47 +00:00
}
reader.onerror = e => {
console.error('Error reading file', e)
2024-01-11 23:20:53 +00:00
dropzone.innerText = `Error reading file\n${e.target.error}`
2024-01-04 17:27:47 +00:00
}
reader.readAsArrayBuffer(file)
}
})
</script>
</body>
</html>