hyparquet/demo/App.tsx

125 lines
4.3 KiB
TypeScript
Raw Normal View History

2024-10-20 00:02:03 +00:00
import HighTable, { DataFrame, rowCache } from 'hightable'
2024-09-16 02:29:31 +00:00
import React, { useEffect, useState } from 'react'
import { FileMetaData, parquetMetadataAsync, parquetSchema } from '../src/metadata.js'
import { byteLengthFromUrl } from '../src/utils.js'
import Dropdown from './Dropdown.js'
2024-09-12 07:43:28 +00:00
import Dropzone from './Dropzone.js'
import Layout from './Layout.js'
import ParquetLayout from './ParquetLayout.js'
import ParquetMetadata from './ParquetMetadata.js'
import { AsyncBufferFrom, asyncBufferFrom, parquetQueryWorker } from './workers/parquetWorkerClient.js'
type Lens = 'table' | 'metadata' | 'layout'
2024-09-12 07:43:28 +00:00
/**
* Hyparquet demo viewer page
2024-09-16 02:29:31 +00:00
* @param {Object} props
* @param {string} [props.url]
2024-09-12 07:43:28 +00:00
* @returns {ReactNode}
*/
2024-09-16 02:29:31 +00:00
export default function App({ url }: { url?: string }) {
2024-09-12 07:43:28 +00:00
const [progress, setProgress] = useState<number>()
const [error, setError] = useState<Error>()
const [df, setDf] = useState<DataFrame>()
const [name, setName] = useState<string>()
const [lens, setLens] = useState<Lens>('table')
const [metadata, setMetadata] = useState<FileMetaData>()
2024-09-12 07:43:28 +00:00
const [byteLength, setByteLength] = useState<number>()
2024-09-16 02:29:31 +00:00
useEffect(() => {
if (!df && url) {
onUrlDrop(url)
2024-09-16 02:29:31 +00:00
}
}, [ url ])
2024-09-12 07:43:28 +00:00
async function onFileDrop(file: File) {
2024-09-16 02:29:31 +00:00
// Clear query string
history.pushState({}, '', location.pathname)
setAsyncBuffer(file.name, { file, byteLength: file.size })
2024-09-12 07:43:28 +00:00
}
async function onUrlDrop(url: string) {
2024-09-16 02:29:31 +00:00
// Add key=url to query string
const params = new URLSearchParams(location.search)
params.set('key', url)
history.pushState({}, '', `${location.pathname}?${params}`)
try {
const byteLength = await byteLengthFromUrl(url)
setAsyncBuffer(url, { url, byteLength })
} catch (e) {
setError(e as Error)
}
}
async function setAsyncBuffer(name: string, from: AsyncBufferFrom) {
2024-09-16 02:29:31 +00:00
// TODO: Replace welcome with spinner
const asyncBuffer = await asyncBufferFrom(from)
2024-09-12 07:43:28 +00:00
const metadata = await parquetMetadataAsync(asyncBuffer)
setMetadata(metadata)
setName(name)
setByteLength(from.byteLength)
2024-10-20 00:02:03 +00:00
let df = parquetDataFrame(from, metadata)
df = rowCache(df)
setDf(df)
2024-09-12 07:43:28 +00:00
document.getElementById('welcome')?.remove()
}
return <Layout progress={progress} error={error}>
<Dropzone
onError={(e) => setError(e)}
onFileDrop={onFileDrop}
onUrlDrop={onUrlDrop}>
{metadata && df && <>
2024-09-12 07:43:28 +00:00
<div className='top-header'>{name}</div>
<div className='view-header'>
{byteLength !== undefined && <span title={byteLength.toLocaleString() + ' bytes'}>{formatFileSize(byteLength)}</span>}
<span>{df.numRows.toLocaleString()} rows</span>
<Dropdown label={lens}>
<button onClick={() => setLens('table')}>Table</button>
<button onClick={() => setLens('metadata')}>Metadata</button>
<button onClick={() => setLens('layout')}>Layout</button>
</Dropdown>
2024-09-12 07:43:28 +00:00
</div>
2024-10-24 05:51:08 +00:00
{lens === 'table' && <HighTable cacheKey={name} data={df} onError={setError} />}
{lens === 'metadata' && <ParquetMetadata metadata={metadata} />}
{lens === 'layout' && <ParquetLayout byteLength={byteLength!} metadata={metadata} />}
2024-09-12 07:43:28 +00:00
</>}
</Dropzone>
</Layout>
}
/**
* Convert a parquet file into a dataframe.
*/
function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData): DataFrame {
2024-09-12 07:43:28 +00:00
const { children } = parquetSchema(metadata)
return {
header: children.map(child => child.element.name),
numRows: Number(metadata.num_rows),
/**
* @param {number} rowStart
* @param {number} rowEnd
2024-09-15 04:12:30 +00:00
* @param {string} orderBy
2024-09-12 07:43:28 +00:00
* @returns {Promise<any[][]>}
*/
2024-09-15 04:12:30 +00:00
rows(rowStart, rowEnd, orderBy) {
console.log(`reading rows ${rowStart}-${rowEnd}`, orderBy)
2024-10-29 06:03:31 +00:00
return parquetQueryWorker({ from, metadata, rowStart, rowEnd, orderBy })
2024-09-12 07:43:28 +00:00
},
2024-09-15 04:12:30 +00:00
sortable: true,
2024-09-12 07:43:28 +00:00
}
}
/**
* Returns the file size in human readable format.
*
* @param {number} bytes file size in bytes
* @returns {string} formatted file size string
*/
function formatFileSize(bytes: number): string {
const sizes = ['b', 'kb', 'mb', 'gb', 'tb']
if (bytes === 0) return '0 b'
const i = Math.floor(Math.log2(bytes) / 10)
if (i === 0) return bytes + ' b'
const base = bytes / Math.pow(1024, i)
return (base < 10 ? base.toFixed(1) : Math.round(base)) + ' ' + sizes[i]
}