import HighTable, { DataFrame } from 'hightable' import { compressors } from 'hyparquet-compressors' import React, { useState } from 'react' import { FileMetaData, parquetMetadata, parquetMetadataAsync, parquetSchema } from '../src/metadata.js' import { parquetRead } from '../src/read.js' import type { AsyncBuffer } from '../src/types.js' import { asyncBufferFromUrl } from '../src/utils.js' import Dropzone from './Dropzone.js' import Layout from './Layout.js' /** * Hyparquet demo viewer page * @returns {ReactNode} */ export default function App() { const [progress, setProgress] = useState() const [error, setError] = useState() const [df, setDf] = useState() const [name, setName] = useState() const [byteLength, setByteLength] = useState() async function onFileDrop(file: File) { const arrayBuffer = await file.arrayBuffer() const metadata = parquetMetadata(arrayBuffer) setName(file.name) setByteLength(file.size) setDf(parquetDataFrame(arrayBuffer, metadata)) document.getElementById('welcome')?.remove() } async function onUrlDrop(url: string) { const asyncBuffer = await asyncBufferFromUrl(url) const metadata = await parquetMetadataAsync(asyncBuffer) setName(url) setByteLength(asyncBuffer.byteLength) setDf(parquetDataFrame(asyncBuffer, metadata)) document.getElementById('welcome')?.remove() } return setError(e)} onFileDrop={onFileDrop} onUrlDrop={onUrlDrop}> {df && <>
{name}
{byteLength !== undefined && {formatFileSize(byteLength)}} {df.numRows.toLocaleString()} rows
}
} /** * Convert a parquet file into a dataframe. * * @param {AsyncBuffer} file - parquet file asyncbuffer * @param {FileMetaData} metadata - parquet file metadata * @returns {DataFrame} dataframe */ function parquetDataFrame(file: AsyncBuffer, metadata: FileMetaData): DataFrame { const { children } = parquetSchema(metadata) return { header: children.map(child => child.element.name), numRows: Number(metadata.num_rows), /** * @param {number} rowStart * @param {number} rowEnd * @returns {Promise} */ rows(rowStart, rowEnd) { console.log(`reading rows ${rowStart}-${rowEnd}`) return new Promise((resolve, reject) => { parquetRead({ file, compressors, rowStart, rowEnd, onComplete: resolve }) .catch(reject) }) }, } } /** * Returns the file size in human readable format. * * @param {number} bytes file size in bytes * @returns {string} formatted file size string */ function formatFileSize(bytes: number): string { const sizes = ['b', 'kb', 'mb', 'gb', 'tb'] if (bytes === 0) return '0 b' const i = Math.floor(Math.log2(bytes) / 10) if (i === 0) return bytes + ' b' const base = bytes / Math.pow(1024, i) return (base < 10 ? base.toFixed(1) : Math.round(base)) + ' ' + sizes[i] }