import HighTable, { DataFrame, sortableDataFrame } from 'hightable' import { compressors } from 'hyparquet-compressors' import React, { useEffect, useState } from 'react' import { parquetReadObjects } from '../src/hyparquet.js' import { FileMetaData, parquetMetadataAsync, parquetSchema } from '../src/metadata.js' import type { AsyncBuffer } from '../src/types.js' import { asyncBufferFromUrl } from '../src/utils.js' import Dropdown from './Dropdown.js' import Dropzone from './Dropzone.js' import Layout from './Layout.js' import ParquetLayout from './ParquetLayout.js' import ParquetMetadata from './ParquetMetadata.js' type Lens = 'table' | 'metadata' | 'layout' /** * Hyparquet demo viewer page * @param {Object} props * @param {string} [props.url] * @returns {ReactNode} */ export default function App({ url }: { url?: string }) { const [progress, setProgress] = useState() const [error, setError] = useState() const [df, setDf] = useState() const [name, setName] = useState() const [lens, setLens] = useState('table') const [metadata, setMetadata] = useState() const [byteLength, setByteLength] = useState() useEffect(() => { if (!df && url) { asyncBufferFromUrl(url).then(asyncBuffer => setAsyncBuffer(url, asyncBuffer)) } }, [ url ]) async function onFileDrop(file: File) { // Clear query string history.pushState({}, '', location.pathname) setAsyncBuffer(file.name, await file.arrayBuffer()) } async function onUrlDrop(url: string) { // Add key=url to query string const params = new URLSearchParams(location.search) params.set('key', url) history.pushState({}, '', `${location.pathname}?${params}`) setAsyncBuffer(url, await asyncBufferFromUrl(url)) } async function setAsyncBuffer(name: string, asyncBuffer: AsyncBuffer) { // TODO: Replace welcome with spinner const metadata = await parquetMetadataAsync(asyncBuffer) setMetadata(metadata) setName(name) setByteLength(asyncBuffer.byteLength) let df = parquetDataFrame(asyncBuffer, metadata) if (df.numRows <= 10000) { df = sortableDataFrame(df) } setDf(df) document.getElementById('welcome')?.remove() } return setError(e)} onFileDrop={onFileDrop} onUrlDrop={onUrlDrop}> {metadata && df && <>
{name}
{byteLength !== undefined && {formatFileSize(byteLength)}} {df.numRows.toLocaleString()} rows
{lens === 'table' && } {lens === 'metadata' && } {lens === 'layout' && } }
} /** * Convert a parquet file into a dataframe. * * @param {AsyncBuffer} file - parquet file asyncbuffer * @param {FileMetaData} metadata - parquet file metadata * @returns {DataFrame} dataframe */ function parquetDataFrame(file: AsyncBuffer, metadata: FileMetaData): DataFrame { const { children } = parquetSchema(metadata) return { header: children.map(child => child.element.name), numRows: Number(metadata.num_rows), /** * @param {number} rowStart * @param {number} rowEnd * @returns {Promise} */ rows(rowStart, rowEnd) { console.log(`reading rows ${rowStart}-${rowEnd}`) return parquetReadObjects({ file, compressors, rowStart, rowEnd }) }, } } /** * Returns the file size in human readable format. * * @param {number} bytes file size in bytes * @returns {string} formatted file size string */ function formatFileSize(bytes: number): string { const sizes = ['b', 'kb', 'mb', 'gb', 'tb'] if (bytes === 0) return '0 b' const i = Math.floor(Math.log2(bytes) / 10) if (i === 0) return bytes + ' b' const base = bytes / Math.pow(1024, i) return (base < 10 ? base.toFixed(1) : Math.round(base)) + ' ' + sizes[i] }