2024-05-05 17:35:23 +00:00
|
|
|
import { parquetMetadata, parquetMetadataAsync, parquetRead, parquetSchema, toJson } from './src/hyparquet.js'
|
2024-01-28 02:06:27 +00:00
|
|
|
|
|
|
|
|
const dropzone = document.getElementById('dropzone')
|
2024-05-05 17:35:23 +00:00
|
|
|
const fileInput = document.getElementById('file-input')
|
|
|
|
|
const content = document.getElementById('content')
|
|
|
|
|
const welcome = document.getElementById('welcome')
|
2024-05-13 02:36:30 +00:00
|
|
|
const label = document.getElementById('filename')
|
2024-05-05 17:35:23 +00:00
|
|
|
|
2024-01-28 02:06:27 +00:00
|
|
|
const layout = document.getElementById('layout')
|
2024-02-05 05:21:01 +00:00
|
|
|
const metadataDiv = document.getElementById('metadata')
|
2024-01-28 02:06:27 +00:00
|
|
|
|
2024-05-05 17:35:23 +00:00
|
|
|
let enterCount = 0
|
|
|
|
|
|
|
|
|
|
dropzone.addEventListener('dragenter', e => {
|
2024-01-28 02:06:27 +00:00
|
|
|
e.dataTransfer.dropEffect = 'copy'
|
|
|
|
|
dropzone.classList.add('over')
|
2024-05-05 17:35:23 +00:00
|
|
|
enterCount++
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
dropzone.addEventListener('dragover', e => {
|
|
|
|
|
e.preventDefault()
|
2024-01-28 02:06:27 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
dropzone.addEventListener('dragleave', () => {
|
2024-05-05 17:35:23 +00:00
|
|
|
enterCount--
|
|
|
|
|
if (!enterCount) dropzone.classList.remove('over')
|
2024-01-28 02:06:27 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
|
|
dropzone.addEventListener('drop', e => {
|
|
|
|
|
e.preventDefault() // prevent dropped file from being "downloaded"
|
|
|
|
|
dropzone.classList.remove('over')
|
|
|
|
|
|
2024-02-04 21:34:49 +00:00
|
|
|
const { files, items } = e.dataTransfer
|
2024-01-28 02:06:27 +00:00
|
|
|
if (files.length > 0) {
|
|
|
|
|
const file = files[0]
|
2024-01-28 02:50:14 +00:00
|
|
|
processFile(file)
|
|
|
|
|
}
|
2024-02-04 21:34:49 +00:00
|
|
|
if (items.length > 0) {
|
|
|
|
|
const item = items[0]
|
|
|
|
|
if (item.kind === 'string') {
|
|
|
|
|
item.getAsString(str => {
|
2024-02-05 07:37:18 +00:00
|
|
|
if (str.startsWith('http')) {
|
2024-02-04 21:34:49 +00:00
|
|
|
processUrl(str)
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-01-28 02:50:14 +00:00
|
|
|
})
|
|
|
|
|
|
2024-02-05 07:28:59 +00:00
|
|
|
async function processUrl(url) {
|
2024-05-05 17:35:23 +00:00
|
|
|
content.innerHTML = ''
|
2024-02-05 07:28:59 +00:00
|
|
|
try {
|
2024-02-05 07:37:18 +00:00
|
|
|
// Check if file is accessible and get its size
|
|
|
|
|
const head = await fetch(url, { method: 'HEAD' })
|
|
|
|
|
if (!head.ok) {
|
2024-05-05 17:35:23 +00:00
|
|
|
content.innerHTML = `<strong>${url}</strong>`
|
|
|
|
|
content.innerHTML += `<div class="error">Error fetching file\n${head.status} ${head.statusText}</div>`
|
2024-02-05 07:37:18 +00:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
const size = head.headers.get('content-length')
|
|
|
|
|
if (!size) {
|
2024-05-05 17:35:23 +00:00
|
|
|
content.innerHTML = `<strong>${url}</strong>`
|
|
|
|
|
content.innerHTML += '<div class="error">Error fetching file\nNo content-length header</div>'
|
2024-02-05 07:37:18 +00:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
// Construct an AsyncBuffer that fetches file chunks
|
|
|
|
|
const asyncBuffer = {
|
|
|
|
|
byteLength: Number(size),
|
|
|
|
|
slice: async (start, end) => {
|
|
|
|
|
const rangeEnd = end === undefined ? '' : end - 1
|
2024-05-05 17:35:23 +00:00
|
|
|
console.log(`Fetch ${url} bytes=${start}-${rangeEnd}`)
|
2024-02-05 07:37:18 +00:00
|
|
|
const res = await fetch(url, {
|
|
|
|
|
headers: { Range: `bytes=${start}-${rangeEnd}` },
|
|
|
|
|
})
|
|
|
|
|
return res.arrayBuffer()
|
|
|
|
|
},
|
|
|
|
|
}
|
2024-02-05 07:28:59 +00:00
|
|
|
const metadata = await parquetMetadataAsync(asyncBuffer)
|
2024-05-05 17:35:23 +00:00
|
|
|
await render(asyncBuffer, metadata, `<a href="${url}">${url}</a>`)
|
2024-02-05 07:28:59 +00:00
|
|
|
} catch (e) {
|
|
|
|
|
console.error('Error fetching file', e)
|
2024-05-05 17:35:23 +00:00
|
|
|
content.innerHTML = `<strong>${url}</strong>`
|
|
|
|
|
content.innerHTML += `<div class="error">Error fetching file\n${e}</div>`
|
2024-02-05 07:28:59 +00:00
|
|
|
}
|
2024-02-04 21:34:49 +00:00
|
|
|
}
|
|
|
|
|
|
2024-01-28 02:50:14 +00:00
|
|
|
function processFile(file) {
|
2024-05-05 17:35:23 +00:00
|
|
|
content.innerHTML = ''
|
2024-01-28 02:50:14 +00:00
|
|
|
const reader = new FileReader()
|
2024-05-05 17:35:23 +00:00
|
|
|
reader.onload = async e => {
|
2024-01-28 02:50:14 +00:00
|
|
|
try {
|
|
|
|
|
const arrayBuffer = e.target.result
|
2024-02-05 07:28:59 +00:00
|
|
|
const metadata = parquetMetadata(arrayBuffer)
|
2024-05-05 17:35:23 +00:00
|
|
|
await render(arrayBuffer, metadata, file.name)
|
2024-01-28 02:50:14 +00:00
|
|
|
} catch (e) {
|
2024-02-05 05:21:01 +00:00
|
|
|
console.error('Error parsing file', e)
|
2024-05-05 17:35:23 +00:00
|
|
|
content.innerHTML = `<strong>${file.name}</strong>`
|
|
|
|
|
content.innerHTML += `<div class="error">Error parsing file\n${e}</div>`
|
2024-01-28 02:06:27 +00:00
|
|
|
}
|
2024-01-28 02:50:14 +00:00
|
|
|
}
|
|
|
|
|
reader.onerror = e => {
|
|
|
|
|
console.error('Error reading file', e)
|
2024-05-05 17:35:23 +00:00
|
|
|
content.innerHTML = `<strong>${file.name}</strong>`
|
|
|
|
|
content.innerHTML += `<div class="error">Error reading file\n${e.target.error}</div>`
|
2024-01-28 02:50:14 +00:00
|
|
|
}
|
|
|
|
|
reader.readAsArrayBuffer(file)
|
|
|
|
|
}
|
|
|
|
|
|
2024-05-05 17:35:23 +00:00
|
|
|
async function render(asyncBuffer, metadata, name) {
|
|
|
|
|
renderSidebar(asyncBuffer, metadata, name)
|
|
|
|
|
|
|
|
|
|
const { children } = parquetSchema(metadata)
|
|
|
|
|
const header = children.map(child => child.element.name)
|
|
|
|
|
|
|
|
|
|
const startTime = performance.now()
|
|
|
|
|
await parquetRead({
|
|
|
|
|
file: asyncBuffer,
|
|
|
|
|
rowEnd: 1000,
|
|
|
|
|
onComplete(data) {
|
|
|
|
|
const ms = performance.now() - startTime
|
|
|
|
|
console.log(`parsed ${name} in ${ms.toFixed(0)} ms`)
|
|
|
|
|
content.appendChild(renderTable(header, data))
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-05 07:28:59 +00:00
|
|
|
function renderSidebar(asyncBuffer, metadata, name) {
|
2024-05-13 02:36:30 +00:00
|
|
|
label.innerText = name
|
2024-02-04 21:34:49 +00:00
|
|
|
// render file layout
|
2024-05-13 03:41:39 +00:00
|
|
|
layout.innerHTML = ''
|
2024-02-05 07:28:59 +00:00
|
|
|
layout.appendChild(fileLayout(metadata, asyncBuffer.byteLength))
|
2024-02-04 21:34:49 +00:00
|
|
|
// display metadata
|
2024-02-05 05:21:01 +00:00
|
|
|
metadataDiv.innerHTML = ''
|
|
|
|
|
metadataDiv.appendChild(fileMetadata(toJson(metadata)))
|
2024-02-04 21:34:49 +00:00
|
|
|
}
|
|
|
|
|
|
2024-05-05 17:35:23 +00:00
|
|
|
welcome.addEventListener('click', () => {
|
2024-01-28 02:50:14 +00:00
|
|
|
fileInput.click()
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
fileInput.addEventListener('change', () => {
|
|
|
|
|
if (fileInput.files.length > 0) {
|
|
|
|
|
processFile(fileInput.files[0])
|
2024-01-28 02:06:27 +00:00
|
|
|
}
|
|
|
|
|
})
|
2024-01-28 02:55:34 +00:00
|
|
|
|
|
|
|
|
// Render file layout
|
2024-02-05 07:28:59 +00:00
|
|
|
function fileLayout(metadata, byteLength) {
|
2024-01-28 02:55:34 +00:00
|
|
|
let html = '<h2>File layout</h2>'
|
|
|
|
|
html += cell('PAR1', 0, 4, 4) // magic number
|
|
|
|
|
for (const rowGroupIndex in metadata.row_groups) {
|
|
|
|
|
const rowGroup = metadata.row_groups[rowGroupIndex]
|
2024-02-05 07:37:18 +00:00
|
|
|
html += group(`Row group ${rowGroupIndex} (${rowGroup.total_byte_size.toLocaleString()} bytes)`)
|
2024-01-28 02:55:34 +00:00
|
|
|
for (const column of rowGroup.columns) {
|
|
|
|
|
const columnName = column.meta_data.path_in_schema.join('.')
|
|
|
|
|
|
|
|
|
|
let columnOffset = column.meta_data.dictionary_page_offset
|
|
|
|
|
if (!columnOffset || column.meta_data.data_page_offset < columnOffset) {
|
|
|
|
|
columnOffset = column.meta_data.data_page_offset
|
|
|
|
|
}
|
|
|
|
|
columnOffset = Number(columnOffset)
|
2024-02-05 05:21:01 +00:00
|
|
|
const bytes = Number(column.meta_data.total_compressed_size)
|
2024-01-28 02:55:34 +00:00
|
|
|
const end = columnOffset + bytes
|
|
|
|
|
html += cell(`Column ${columnName}`, columnOffset, bytes, end)
|
|
|
|
|
}
|
|
|
|
|
html += '</div>'
|
|
|
|
|
}
|
2024-02-05 07:28:59 +00:00
|
|
|
const metadataStart = byteLength - metadata.metadata_length - 4
|
|
|
|
|
html += cell('Metadata', metadataStart, metadata.metadata_length, byteLength - 4)
|
|
|
|
|
html += cell('PAR1', byteLength - 4, 4, byteLength) // magic number
|
2024-01-28 02:55:34 +00:00
|
|
|
const div = document.createElement('div')
|
|
|
|
|
div.innerHTML = html
|
|
|
|
|
div.classList.add('collapsed') // start collapsed
|
2024-01-28 03:29:21 +00:00
|
|
|
div.children[0].addEventListener('click', () => {
|
2024-01-28 02:55:34 +00:00
|
|
|
div.classList.toggle('collapsed')
|
|
|
|
|
})
|
|
|
|
|
return div
|
|
|
|
|
}
|
|
|
|
|
function group(name) {
|
|
|
|
|
return `<div>${name}`
|
|
|
|
|
}
|
|
|
|
|
function cell(name, start, bytes, end) {
|
|
|
|
|
return `
|
|
|
|
|
<div class="cell">
|
|
|
|
|
<label>${name}</label>
|
|
|
|
|
<ul>
|
2024-02-05 07:37:18 +00:00
|
|
|
<li>start ${start.toLocaleString()}</li>
|
|
|
|
|
<li>bytes ${bytes.toLocaleString()}</li>
|
|
|
|
|
<li>end ${end.toLocaleString()}</li>
|
2024-01-28 02:55:34 +00:00
|
|
|
</ul>
|
|
|
|
|
</div>`
|
|
|
|
|
}
|
2024-01-28 03:29:21 +00:00
|
|
|
|
|
|
|
|
// Render metadata
|
|
|
|
|
function fileMetadata(metadata) {
|
|
|
|
|
let html = '<h2>Metadata</h2>'
|
|
|
|
|
html += `<pre>${JSON.stringify(metadata, null, 2)}</pre>`
|
|
|
|
|
const div = document.createElement('div')
|
|
|
|
|
div.innerHTML = html
|
|
|
|
|
div.classList.add('collapsed') // start collapsed
|
|
|
|
|
div.children[0].addEventListener('click', () => {
|
|
|
|
|
div.classList.toggle('collapsed')
|
|
|
|
|
})
|
|
|
|
|
return div
|
|
|
|
|
}
|
2024-05-05 17:35:23 +00:00
|
|
|
|
|
|
|
|
function renderTable(header, data) {
|
|
|
|
|
const table = document.createElement('table')
|
|
|
|
|
const thead = document.createElement('thead')
|
|
|
|
|
const tbody = document.createElement('tbody')
|
|
|
|
|
const headerRow = document.createElement('tr')
|
|
|
|
|
for (const columnName of header) {
|
|
|
|
|
const th = document.createElement('th')
|
|
|
|
|
th.innerText = columnName
|
|
|
|
|
headerRow.appendChild(th)
|
|
|
|
|
}
|
|
|
|
|
thead.appendChild(headerRow)
|
|
|
|
|
table.appendChild(thead)
|
|
|
|
|
for (const row of data) {
|
|
|
|
|
const tr = document.createElement('tr')
|
|
|
|
|
for (const value of Object.values(row)) {
|
|
|
|
|
const td = document.createElement('td')
|
2024-05-05 21:24:21 +00:00
|
|
|
td.innerText = stringify(value)
|
2024-05-05 17:35:23 +00:00
|
|
|
tr.appendChild(td)
|
|
|
|
|
}
|
|
|
|
|
tbody.appendChild(tr)
|
|
|
|
|
}
|
|
|
|
|
table.appendChild(tbody)
|
|
|
|
|
return table
|
|
|
|
|
}
|
2024-05-05 21:24:21 +00:00
|
|
|
|
|
|
|
|
function stringify(value) {
|
|
|
|
|
if (value === undefined) return ''
|
2024-05-13 01:12:30 +00:00
|
|
|
value = toJson(value)
|
2024-05-05 21:24:21 +00:00
|
|
|
if (typeof value === 'string') return value
|
2024-05-13 01:12:30 +00:00
|
|
|
if (typeof value === 'object') return JSON.stringify(value)
|
2024-05-05 21:24:21 +00:00
|
|
|
return value
|
|
|
|
|
}
|