2024-09-14 22:12:03 +00:00
|
|
|
import HighTable, { DataFrame, sortableDataFrame } from 'hightable'
|
2024-09-12 07:43:28 +00:00
|
|
|
import { compressors } from 'hyparquet-compressors'
|
2024-09-16 02:29:31 +00:00
|
|
|
import React, { useEffect, useState } from 'react'
|
2024-09-14 22:12:03 +00:00
|
|
|
import { parquetReadObjects } from '../src/hyparquet.js'
|
|
|
|
|
import { FileMetaData, parquetMetadataAsync, parquetSchema } from '../src/metadata.js'
|
2024-09-12 07:43:28 +00:00
|
|
|
import type { AsyncBuffer } from '../src/types.js'
|
|
|
|
|
import { asyncBufferFromUrl } from '../src/utils.js'
|
2024-09-14 00:40:18 +00:00
|
|
|
import Dropdown from './Dropdown.js'
|
2024-09-12 07:43:28 +00:00
|
|
|
import Dropzone from './Dropzone.js'
|
|
|
|
|
import Layout from './Layout.js'
|
2024-09-14 00:40:18 +00:00
|
|
|
import ParquetLayout from './ParquetLayout.js'
|
|
|
|
|
import ParquetMetadata from './ParquetMetadata.js'
|
|
|
|
|
|
|
|
|
|
type Lens = 'table' | 'metadata' | 'layout'
|
2024-09-12 07:43:28 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Hyparquet demo viewer page
|
2024-09-16 02:29:31 +00:00
|
|
|
* @param {Object} props
|
|
|
|
|
* @param {string} [props.url]
|
2024-09-12 07:43:28 +00:00
|
|
|
* @returns {ReactNode}
|
|
|
|
|
*/
|
2024-09-16 02:29:31 +00:00
|
|
|
export default function App({ url }: { url?: string }) {
|
2024-09-12 07:43:28 +00:00
|
|
|
const [progress, setProgress] = useState<number>()
|
|
|
|
|
const [error, setError] = useState<Error>()
|
|
|
|
|
const [df, setDf] = useState<DataFrame>()
|
|
|
|
|
const [name, setName] = useState<string>()
|
2024-09-14 00:40:18 +00:00
|
|
|
const [lens, setLens] = useState<Lens>('table')
|
|
|
|
|
const [metadata, setMetadata] = useState<FileMetaData>()
|
2024-09-12 07:43:28 +00:00
|
|
|
const [byteLength, setByteLength] = useState<number>()
|
|
|
|
|
|
2024-09-16 02:29:31 +00:00
|
|
|
useEffect(() => {
|
|
|
|
|
if (!df && url) {
|
|
|
|
|
asyncBufferFromUrl(url).then(asyncBuffer => setAsyncBuffer(url, asyncBuffer))
|
|
|
|
|
}
|
|
|
|
|
}, [ url ])
|
|
|
|
|
|
2024-09-12 07:43:28 +00:00
|
|
|
async function onFileDrop(file: File) {
|
2024-09-16 02:29:31 +00:00
|
|
|
// Clear query string
|
|
|
|
|
history.pushState({}, '', location.pathname)
|
2024-09-14 22:12:03 +00:00
|
|
|
setAsyncBuffer(file.name, await file.arrayBuffer())
|
2024-09-12 07:43:28 +00:00
|
|
|
}
|
|
|
|
|
async function onUrlDrop(url: string) {
|
2024-09-16 02:29:31 +00:00
|
|
|
// Add key=url to query string
|
|
|
|
|
const params = new URLSearchParams(location.search)
|
|
|
|
|
params.set('key', url)
|
|
|
|
|
history.pushState({}, '', `${location.pathname}?${params}`)
|
2024-09-14 22:12:03 +00:00
|
|
|
setAsyncBuffer(url, await asyncBufferFromUrl(url))
|
|
|
|
|
}
|
|
|
|
|
async function setAsyncBuffer(name: string, asyncBuffer: AsyncBuffer) {
|
2024-09-16 02:29:31 +00:00
|
|
|
// TODO: Replace welcome with spinner
|
2024-09-12 07:43:28 +00:00
|
|
|
const metadata = await parquetMetadataAsync(asyncBuffer)
|
2024-09-14 00:40:18 +00:00
|
|
|
setMetadata(metadata)
|
2024-09-14 22:12:03 +00:00
|
|
|
setName(name)
|
2024-09-12 07:43:28 +00:00
|
|
|
setByteLength(asyncBuffer.byteLength)
|
2024-09-14 22:12:03 +00:00
|
|
|
let df = parquetDataFrame(asyncBuffer, metadata)
|
|
|
|
|
if (df.numRows <= 10000) {
|
|
|
|
|
df = sortableDataFrame(df)
|
|
|
|
|
}
|
|
|
|
|
setDf(df)
|
2024-09-12 07:43:28 +00:00
|
|
|
document.getElementById('welcome')?.remove()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return <Layout progress={progress} error={error}>
|
|
|
|
|
<Dropzone
|
|
|
|
|
onError={(e) => setError(e)}
|
|
|
|
|
onFileDrop={onFileDrop}
|
|
|
|
|
onUrlDrop={onUrlDrop}>
|
2024-09-14 00:40:18 +00:00
|
|
|
{metadata && df && <>
|
2024-09-12 07:43:28 +00:00
|
|
|
<div className='top-header'>{name}</div>
|
|
|
|
|
<div className='view-header'>
|
|
|
|
|
{byteLength !== undefined && <span title={byteLength.toLocaleString() + ' bytes'}>{formatFileSize(byteLength)}</span>}
|
|
|
|
|
<span>{df.numRows.toLocaleString()} rows</span>
|
2024-09-14 00:40:18 +00:00
|
|
|
<Dropdown label={lens}>
|
|
|
|
|
<button onClick={() => setLens('table')}>Table</button>
|
|
|
|
|
<button onClick={() => setLens('metadata')}>Metadata</button>
|
|
|
|
|
<button onClick={() => setLens('layout')}>Layout</button>
|
|
|
|
|
</Dropdown>
|
2024-09-12 07:43:28 +00:00
|
|
|
</div>
|
2024-09-20 20:29:36 +00:00
|
|
|
{lens === 'table' && <HighTable data={df} onError={setError} />}
|
2024-09-14 00:40:18 +00:00
|
|
|
{lens === 'metadata' && <ParquetMetadata metadata={metadata} />}
|
|
|
|
|
{lens === 'layout' && <ParquetLayout byteLength={byteLength!} metadata={metadata} />}
|
2024-09-12 07:43:28 +00:00
|
|
|
</>}
|
|
|
|
|
</Dropzone>
|
|
|
|
|
</Layout>
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Convert a parquet file into a dataframe.
|
|
|
|
|
*
|
|
|
|
|
* @param {AsyncBuffer} file - parquet file asyncbuffer
|
|
|
|
|
* @param {FileMetaData} metadata - parquet file metadata
|
|
|
|
|
* @returns {DataFrame} dataframe
|
|
|
|
|
*/
|
|
|
|
|
function parquetDataFrame(file: AsyncBuffer, metadata: FileMetaData): DataFrame {
|
|
|
|
|
const { children } = parquetSchema(metadata)
|
|
|
|
|
return {
|
|
|
|
|
header: children.map(child => child.element.name),
|
|
|
|
|
numRows: Number(metadata.num_rows),
|
|
|
|
|
/**
|
|
|
|
|
* @param {number} rowStart
|
|
|
|
|
* @param {number} rowEnd
|
|
|
|
|
* @returns {Promise<any[][]>}
|
|
|
|
|
*/
|
|
|
|
|
rows(rowStart, rowEnd) {
|
|
|
|
|
console.log(`reading rows ${rowStart}-${rowEnd}`)
|
2024-09-14 22:12:03 +00:00
|
|
|
return parquetReadObjects({ file, compressors, rowStart, rowEnd })
|
2024-09-12 07:43:28 +00:00
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns the file size in human readable format.
|
|
|
|
|
*
|
|
|
|
|
* @param {number} bytes file size in bytes
|
|
|
|
|
* @returns {string} formatted file size string
|
|
|
|
|
*/
|
|
|
|
|
function formatFileSize(bytes: number): string {
|
|
|
|
|
const sizes = ['b', 'kb', 'mb', 'gb', 'tb']
|
|
|
|
|
if (bytes === 0) return '0 b'
|
|
|
|
|
const i = Math.floor(Math.log2(bytes) / 10)
|
|
|
|
|
if (i === 0) return bytes + ' b'
|
|
|
|
|
const base = bytes / Math.pow(1024, i)
|
|
|
|
|
return (base < 10 ? base.toFixed(1) : Math.round(base)) + ' ' + sizes[i]
|
|
|
|
|
}
|