hyparquet/demo.js

173 lines
5.4 KiB
JavaScript
Raw Normal View History

2024-02-05 07:28:59 +00:00
import { parquetMetadata, parquetMetadataAsync, toJson } from './src/hyparquet.js'
2024-01-28 02:06:27 +00:00
const dropzone = document.getElementById('dropzone')
const layout = document.getElementById('layout')
2024-02-05 05:21:01 +00:00
const metadataDiv = document.getElementById('metadata')
2024-01-28 02:50:14 +00:00
const fileInput = document.getElementById('file-input')
2024-01-28 02:06:27 +00:00
dropzone.addEventListener('dragover', e => {
e.preventDefault()
e.dataTransfer.dropEffect = 'copy'
dropzone.classList.add('over')
})
dropzone.addEventListener('dragleave', () => {
dropzone.classList.remove('over')
})
dropzone.addEventListener('drop', e => {
e.preventDefault() // prevent dropped file from being "downloaded"
dropzone.classList.remove('over')
2024-02-04 21:34:49 +00:00
const { files, items } = e.dataTransfer
2024-01-28 02:06:27 +00:00
if (files.length > 0) {
const file = files[0]
2024-01-28 02:50:14 +00:00
processFile(file)
}
2024-02-04 21:34:49 +00:00
if (items.length > 0) {
const item = items[0]
if (item.kind === 'string') {
item.getAsString(str => {
2024-02-05 07:37:18 +00:00
if (str.startsWith('http')) {
2024-02-04 21:34:49 +00:00
processUrl(str)
}
})
}
}
2024-01-28 02:50:14 +00:00
})
2024-02-05 07:28:59 +00:00
async function processUrl(url) {
try {
2024-02-05 07:37:18 +00:00
// Check if file is accessible and get its size
const head = await fetch(url, { method: 'HEAD' })
if (!head.ok) {
dropzone.innerHTML = `<strong>${url}</strong>`
dropzone.innerHTML += `<div class="error">Error fetching file\n${head.status} ${head.statusText}</div>`
return
}
const size = head.headers.get('content-length')
if (!size) {
dropzone.innerHTML = `<strong>${url}</strong>`
dropzone.innerHTML += '<div class="error">Error fetching file\nNo content-length header</div>'
return
}
// Construct an AsyncBuffer that fetches file chunks
const asyncBuffer = {
byteLength: Number(size),
slice: async (start, end) => {
const rangeEnd = end === undefined ? '' : end - 1
const res = await fetch(url, {
headers: { Range: `bytes=${start}-${rangeEnd}` },
})
return res.arrayBuffer()
},
}
2024-02-05 07:28:59 +00:00
const metadata = await parquetMetadataAsync(asyncBuffer)
2024-02-05 07:37:18 +00:00
url = `<a href="${url}">${url}</a>`
2024-02-05 07:28:59 +00:00
renderSidebar(asyncBuffer, metadata, url)
} catch (e) {
console.error('Error fetching file', e)
dropzone.innerHTML = `<strong>${url}</strong>`
dropzone.innerHTML += `<div class="error">Error fetching file\n${e}</div>`
}
2024-02-04 21:34:49 +00:00
}
2024-01-28 02:50:14 +00:00
function processFile(file) {
const reader = new FileReader()
reader.onload = e => {
try {
const arrayBuffer = e.target.result
2024-02-05 07:28:59 +00:00
const metadata = parquetMetadata(arrayBuffer)
renderSidebar(arrayBuffer, metadata, file.name)
2024-01-28 02:50:14 +00:00
} catch (e) {
2024-02-05 05:21:01 +00:00
console.error('Error parsing file', e)
2024-01-28 02:50:14 +00:00
dropzone.innerHTML = `<strong>${file.name}</strong>`
dropzone.innerHTML += `<div class="error">Error parsing file\n${e}</div>`
2024-01-28 02:06:27 +00:00
}
2024-01-28 02:50:14 +00:00
}
reader.onerror = e => {
console.error('Error reading file', e)
dropzone.innerText = `Error reading file\n${e.target.error}`
}
reader.readAsArrayBuffer(file)
}
2024-02-05 07:28:59 +00:00
function renderSidebar(asyncBuffer, metadata, name) {
2024-02-04 21:34:49 +00:00
layout.innerHTML = `<strong>${name}</strong>`
// render file layout
2024-02-05 07:28:59 +00:00
layout.appendChild(fileLayout(metadata, asyncBuffer.byteLength))
2024-02-04 21:34:49 +00:00
// display metadata
2024-02-05 05:21:01 +00:00
metadataDiv.innerHTML = ''
metadataDiv.appendChild(fileMetadata(toJson(metadata)))
2024-02-04 21:34:49 +00:00
}
2024-01-28 02:50:14 +00:00
dropzone.addEventListener('click', () => {
fileInput.click()
})
fileInput.addEventListener('change', () => {
if (fileInput.files.length > 0) {
processFile(fileInput.files[0])
2024-01-28 02:06:27 +00:00
}
})
2024-01-28 02:55:34 +00:00
// Render file layout
2024-02-05 07:28:59 +00:00
function fileLayout(metadata, byteLength) {
2024-01-28 02:55:34 +00:00
let html = '<h2>File layout</h2>'
html += cell('PAR1', 0, 4, 4) // magic number
for (const rowGroupIndex in metadata.row_groups) {
const rowGroup = metadata.row_groups[rowGroupIndex]
2024-02-05 07:37:18 +00:00
html += group(`Row group ${rowGroupIndex} (${rowGroup.total_byte_size.toLocaleString()} bytes)`)
2024-01-28 02:55:34 +00:00
for (const column of rowGroup.columns) {
const columnName = column.meta_data.path_in_schema.join('.')
let columnOffset = column.meta_data.dictionary_page_offset
if (!columnOffset || column.meta_data.data_page_offset < columnOffset) {
columnOffset = column.meta_data.data_page_offset
}
columnOffset = Number(columnOffset)
2024-02-05 05:21:01 +00:00
const bytes = Number(column.meta_data.total_compressed_size)
2024-01-28 02:55:34 +00:00
const end = columnOffset + bytes
html += cell(`Column ${columnName}`, columnOffset, bytes, end)
}
html += '</div>'
}
2024-02-05 07:28:59 +00:00
const metadataStart = byteLength - metadata.metadata_length - 4
html += cell('Metadata', metadataStart, metadata.metadata_length, byteLength - 4)
html += cell('PAR1', byteLength - 4, 4, byteLength) // magic number
2024-01-28 02:55:34 +00:00
const div = document.createElement('div')
div.innerHTML = html
div.classList.add('collapsed') // start collapsed
2024-01-28 03:29:21 +00:00
div.children[0].addEventListener('click', () => {
2024-01-28 02:55:34 +00:00
div.classList.toggle('collapsed')
})
return div
}
function group(name) {
return `<div>${name}`
}
function cell(name, start, bytes, end) {
return `
<div class="cell">
<label>${name}</label>
<ul>
2024-02-05 07:37:18 +00:00
<li>start ${start.toLocaleString()}</li>
<li>bytes ${bytes.toLocaleString()}</li>
<li>end ${end.toLocaleString()}</li>
2024-01-28 02:55:34 +00:00
</ul>
</div>`
}
2024-01-28 03:29:21 +00:00
// Render metadata
function fileMetadata(metadata) {
let html = '<h2>Metadata</h2>'
html += `<pre>${JSON.stringify(metadata, null, 2)}</pre>`
const div = document.createElement('div')
div.innerHTML = html
div.classList.add('collapsed') // start collapsed
div.children[0].addEventListener('click', () => {
div.classList.toggle('collapsed')
})
return div
}