diff --git a/.eslintrc.json b/.eslintrc.json index 7ff2925..e0e1ea6 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -1,5 +1,6 @@ { "env": { + "browser": true, "node": true }, "parser": "@typescript-eslint/parser", diff --git a/demo.css b/demo.css new file mode 100644 index 0000000..5aa0e74 --- /dev/null +++ b/demo.css @@ -0,0 +1,75 @@ +* { + box-sizing: border-box; + margin: 0; + padding: 0; +} +body { + display: flex; + font-family: sans-serif; + height: 100vh; +} +nav { + width: 300px; + padding: 10px; + overflow-y: auto; +} +h1 { + font-size: 20pt; +} +h2 { + font-size: 12pt; +} +p { + margin: 10px 0; +} +label { + height: 100%; + display: flex; + align-items: center; + justify-content: center; + font-size: 20px; +} +#dropzone { + border: 2px dashed #08e; + border-radius: 10px; + flex: 1; + margin: 10px; + padding: 10px; + color: #444; + overflow: auto; +} +.over { + background-color: lightblue; +} +.error { + color: #c11; +} +#layout { + margin-top: 20px; +} +#layout div { + background-color: rgba(0, 0, 0, 0.05); + border: 1px solid #ccc; + border-radius: 4px; + font-size: 12px; + margin-top: 4px; + padding: 4px; + word-break: break-all; +} +.cell { + display: flex; +} +.cell label { + font-size: 12px; + font-weight: normal; + flex: 1; + justify-content: flex-start; +} +#layout div ul { + list-style: none; +} +#layout div li { + font-size: 10px; + padding: 2px 4px; + text-align: right; +} diff --git a/demo.js b/demo.js new file mode 100644 index 0000000..4f8f66f --- /dev/null +++ b/demo.js @@ -0,0 +1,85 @@ +import { parquetMetadata, toJson } from './src/hyparquet.js' + +const dropzone = document.getElementById('dropzone') +const layout = document.getElementById('layout') + +dropzone.addEventListener('dragover', e => { + e.preventDefault() + e.dataTransfer.dropEffect = 'copy' + dropzone.classList.add('over') +}) + +dropzone.addEventListener('dragleave', () => { + dropzone.classList.remove('over') +}) + +function group(name) { + return `
${name}` +} +function cell(name, start, bytes, end) { + return ` +
+ + +
` +} + +dropzone.addEventListener('drop', e => { + e.preventDefault() // prevent dropped file from being "downloaded" + dropzone.classList.remove('over') + + const { files } = e.dataTransfer + if (files.length > 0) { + const file = files[0] + const reader = new FileReader() + reader.onload = e => { + try { + const arrayBuffer = e.target.result + const metadata = toJson(parquetMetadata(arrayBuffer)) + + console.log('metadata', metadata) + + // render file layout + let html = '

File layout

' + html += cell('PAR1', 0, 4, 4) // magic number + for (const rowGroupIndex in metadata.row_groups) { + const rowGroup = metadata.row_groups[rowGroupIndex] + html += group(`Row group ${rowGroupIndex} (${rowGroup.total_byte_size} bytes)`) + for (const column of rowGroup.columns) { + const columnName = column.meta_data.path_in_schema.join('.') + + let columnOffset = column.meta_data.dictionary_page_offset + if (!columnOffset || column.meta_data.data_page_offset < columnOffset) { + columnOffset = column.meta_data.data_page_offset + } + columnOffset = Number(columnOffset) + const bytes = column.meta_data.total_compressed_size + const end = columnOffset + bytes + html += cell(`Column ${columnName}`, columnOffset, bytes, end) + } + html += '
' + } + const metadataStart = arrayBuffer.byteLength - metadata.metadata_length - 4 + html += cell('Metadata', metadataStart, metadata.metadata_length, arrayBuffer.byteLength - 4) + html += cell('PAR1', arrayBuffer.byteLength - 4, 4, arrayBuffer.byteLength) // magic number + layout.innerHTML = html + + // display metadata + dropzone.innerHTML = `${file.name}` + dropzone.innerHTML += `
${JSON.stringify(metadata, null, 2)}
` + } catch (e) { + dropzone.innerHTML = `${file.name}` + dropzone.innerHTML += `
Error parsing file\n${e}
` + } + } + reader.onerror = e => { + console.error('Error reading file', e) + dropzone.innerText = `Error reading file\n${e.target.error}` + } + reader.readAsArrayBuffer(file) + } +}) diff --git a/index.html b/index.html index e5ccac0..d9bd797 100644 --- a/index.html +++ b/index.html @@ -3,83 +3,7 @@ hyparquet parquet file parser - +