diff --git a/README.md b/README.md index 15cf09e..a8fb32d 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Dependency free since 2023! Hyparquet is a lightweight, pure JavaScript library for parsing [Apache Parquet](https://parquet.apache.org) files. Apache Parquet is a popular columnar storage format that is widely used in data engineering, data science, and machine learning applications for efficiently storing and processing large datasets. -Hyparquet allows you to read and extract data from Parquet files directly in JavaScript environments, both in Node.js and in the browser. It is designed to be fast, memory-efficient, and easy to use. +Hyparquet allows you to read and extract data from Parquet files directly in JavaScript environments, both in Node.js and in the browser, without any dependencies. Designed for performance and ease of use, hyparquet is ideal for data engineering, data science, and machine learning applications that require efficient data processing. ## Demo @@ -53,7 +53,7 @@ npm install hyparquet To read the entire contents of a parquet file in a node.js environment: -```js +```javascript const { asyncBufferFromFile, parquetRead } = await import('hyparquet') await parquetRead({ file: await asyncBufferFromFile(filename), @@ -78,7 +78,7 @@ await parquetRead({ You can read just the metadata, including schema and data statistics using the `parquetMetadata` function: -```js +```javascript const { parquetMetadata } = await import('hyparquet') const fs = await import('fs') @@ -91,7 +91,7 @@ If you're in a browser environment, you'll probably get parquet file data from e To load parquet data in the browser from a remote server using `fetch`: -```js +```javascript import { parquetMetadata } from 'hyparquet' const res = await fetch(url) @@ -108,7 +108,7 @@ Hyparquet is designed to load only the minimal amount of data needed to fulfill You can filter rows by number, or columns by name, and columns will be returned in the same order they were requested: -```js +```javascript import { parquetRead } from 'hyparquet' await parquetRead({ @@ -125,7 +125,7 @@ await parquetRead({ By default, data returned in the `onComplete` function will be one array of columns per row. If you would like each row to be an object with each key the name of the column, set the option `rowFormat` to `object`. -```js +```javascript import { parquetRead } from 'hyparquet' await parquetRead({ @@ -151,7 +151,7 @@ interface AsyncBuffer { You can read parquet files asynchronously using HTTP Range requests so that only the necessary byte ranges from a `url` will be fetched: -```js +```javascript import { parquetRead } from 'hyparquet' const url = 'https://hyperparam-public.s3.amazonaws.com/wiki-en-00000-of-00041.parquet' diff --git a/demo/ParquetLayout.tsx b/demo/ParquetLayout.tsx index c7996e8..fae5a4a 100644 --- a/demo/ParquetLayout.tsx +++ b/demo/ParquetLayout.tsx @@ -8,6 +8,13 @@ interface LayoutProps { metadata: FileMetaData } +/** + * Renders the file layout of a parquet file as nested rowgroups and columns. + * @param {Object} props + * @param {number} props.byteLength + * @param {FileMetaData} props.metadata + * @returns {ReactNode} + */ export default function ParquetLayout({ byteLength, metadata }: LayoutProps) { const metadataStart = byteLength - metadata.metadata_length - 4 const metadataEnd = byteLength - 4 diff --git a/demo/ParquetMetadata.tsx b/demo/ParquetMetadata.tsx index 84f0f0f..7aa8820 100644 --- a/demo/ParquetMetadata.tsx +++ b/demo/ParquetMetadata.tsx @@ -6,6 +6,12 @@ interface MetadataProps { metadata: FileMetaData } +/** + * Renders the metadata of a parquet file as JSON. + * @param {Object} props + * @param {FileMetaData} props.metadata + * @returns {ReactNode} + */ export default function ParquetMetadata({ metadata }: MetadataProps) { return {JSON.stringify(toJson(metadata), null, ' ')} diff --git a/demo/bundle.min.js b/demo/bundle.min.js index ca5a91b..1bb7b5b 100644 --- a/demo/bundle.min.js +++ b/demo/bundle.min.js @@ -1,4 +1,4 @@ -!function(e){"function"==typeof define&&define.amd?define(e):e()}((function(){"use strict";function e(e){return e&&e.__esModule&&Object.prototype.hasOwnProperty.call(e,"default")?e.default:e}var n,t={exports:{}},r={};t.exports=function(){if(n)return r;n=1;var e=Symbol.for("react.element"),t=Symbol.for("react.portal"),l=Symbol.for("react.fragment"),a=Symbol.for("react.strict_mode"),o=Symbol.for("react.profiler"),i=Symbol.for("react.provider"),u=Symbol.for("react.context"),s=Symbol.for("react.forward_ref"),c=Symbol.for("react.suspense"),f=Symbol.for("react.memo"),d=Symbol.for("react.lazy"),p=Symbol.iterator,h={isMounted:function(){return!1},enqueueForceUpdate:function(){},enqueueReplaceState:function(){},enqueueSetState:function(){}},g=Object.assign,m={};function A(e,n,t){this.props=e,this.context=n,this.refs=m,this.updater=t||h}function w(){}function y(e,n,t){this.props=e,this.context=n,this.refs=m,this.updater=t||h}A.prototype.isReactComponent={},A.prototype.setState=function(e,n){if("object"!=typeof e&&"function"!=typeof e&&null!=e)throw Error("setState(...): takes an object of state variables to update or a function which returns an object of state variables.");this.updater.enqueueSetState(this,e,n,"setState")},A.prototype.forceUpdate=function(e){this.updater.enqueueForceUpdate(this,e,"forceUpdate")},w.prototype=A.prototype;var v=y.prototype=new w;v.constructor=y,g(v,A.prototype),v.isPureReactComponent=!0;var b=Array.isArray,E=Object.prototype.hasOwnProperty,I={current:null},S={key:!0,ref:!0,__self:!0,__source:!0};function C(n,t,r){var l,a={},o=null,i=null;if(null!=t)for(l in void 0!==t.ref&&(i=t.ref),void 0!==t.key&&(o=""+t.key),t)E.call(t,l)&&!S.hasOwnProperty(l)&&(a[l]=t[l]);var u=arguments.length-2;if(1===u)a.children=r;else if(1>>1,a=e[r];if(!(0>>1;rl(u,t))sl(c,u)?(e[r]=c,e[s]=t,r=s):(e[r]=u,e[i]=t,r=i);else{if(!(sl(c,t)))break e;e[r]=c,e[s]=t,r=s}}}return n}function l(e,n){var t=e.sortIndex-n.sortIndex;return 0!==t?t:e.id-n.id}if("object"==typeof performance&&"function"==typeof performance.now){var a=performance;e.unstable_now=function(){return a.now()}}else{var o=Date,i=o.now();e.unstable_now=function(){return o.now()-i}}var u=[],s=[],c=1,f=null,d=3,p=!1,h=!1,g=!1,m="function"==typeof setTimeout?setTimeout:null,A="function"==typeof clearTimeout?clearTimeout:null,w="undefined"!=typeof setImmediate?setImmediate:null;function y(e){for(var l=t(s);null!==l;){if(null===l.callback)r(s);else{if(!(l.startTime<=e))break;r(s),l.sortIndex=l.expirationTime,n(u,l)}l=t(s)}}function v(e){if(g=!1,y(e),!h)if(null!==t(u))h=!0,P(b);else{var n=t(s);null!==n&&Q(v,n.startTime-e)}}function b(n,l){h=!1,g&&(g=!1,A(C),C=-1),p=!0;var a=d;try{for(y(l),f=t(u);null!==f&&(!(f.expirationTime>l)||n&&!B());){var o=f.callback;if("function"==typeof o){f.callback=null,d=f.priorityLevel;var i=o(f.expirationTime<=l);l=e.unstable_now(),"function"==typeof i?f.callback=i:f===t(u)&&r(u),y(l)}else r(u);f=t(u)}if(null!==f)var c=!0;else{var m=t(s);null!==m&&Q(v,m.startTime-l),c=!1}return c}finally{f=null,d=a,p=!1}}"undefined"!=typeof navigator&&void 0!==navigator.scheduling&&void 0!==navigator.scheduling.isInputPending&&navigator.scheduling.isInputPending.bind(navigator.scheduling);var E,I=!1,S=null,C=-1,k=5,x=-1;function B(){return!(e.unstable_now()-xe||125o?(r.sortIndex=a,n(s,r),null===t(u)&&r===t(s)&&(g?(A(C),C=-1):g=!0,Q(v,a-o))):(r.sortIndex=i,n(u,r),h||p||(h=!0,P(b))),r},e.unstable_shouldYield=B,e.unstable_wrapCallback=function(e){var n=d;return function(){var t=d;d=n;try{return e.apply(this,arguments)}finally{d=t}}}}(p)),p)),d.exports} +!function(e){"function"==typeof define&&define.amd?define(e):e()}((function(){"use strict";function e(e){return e&&e.__esModule&&Object.prototype.hasOwnProperty.call(e,"default")?e.default:e}var n,t,r={exports:{}},l={};function a(){if(n)return l;n=1;var e=Symbol.for("react.element"),t=Symbol.for("react.portal"),r=Symbol.for("react.fragment"),a=Symbol.for("react.strict_mode"),o=Symbol.for("react.profiler"),i=Symbol.for("react.provider"),u=Symbol.for("react.context"),s=Symbol.for("react.forward_ref"),c=Symbol.for("react.suspense"),f=Symbol.for("react.memo"),d=Symbol.for("react.lazy"),p=Symbol.iterator;var h={isMounted:function(){return!1},enqueueForceUpdate:function(){},enqueueReplaceState:function(){},enqueueSetState:function(){}},g=Object.assign,m={};function A(e,n,t){this.props=e,this.context=n,this.refs=m,this.updater=t||h}function w(){}function y(e,n,t){this.props=e,this.context=n,this.refs=m,this.updater=t||h}A.prototype.isReactComponent={},A.prototype.setState=function(e,n){if("object"!=typeof e&&"function"!=typeof e&&null!=e)throw Error("setState(...): takes an object of state variables to update or a function which returns an object of state variables.");this.updater.enqueueSetState(this,e,n,"setState")},A.prototype.forceUpdate=function(e){this.updater.enqueueForceUpdate(this,e,"forceUpdate")},w.prototype=A.prototype;var v=y.prototype=new w;v.constructor=y,g(v,A.prototype),v.isPureReactComponent=!0;var b=Array.isArray,E=Object.prototype.hasOwnProperty,I={current:null},S={key:!0,ref:!0,__self:!0,__source:!0};function C(n,t,r){var l,a={},o=null,i=null;if(null!=t)for(l in void 0!==t.ref&&(i=t.ref),void 0!==t.key&&(o=""+t.key),t)E.call(t,l)&&!S.hasOwnProperty(l)&&(a[l]=t[l]);var u=arguments.length-2;if(1===u)a.children=r;else if(1>>1,a=e[r];if(!(0>>1;rl(u,t))sl(c,u)?(e[r]=c,e[s]=t,r=s):(e[r]=u,e[i]=t,r=i);else{if(!(sl(c,t)))break e;e[r]=c,e[s]=t,r=s}}}return n}function l(e,n){var t=e.sortIndex-n.sortIndex;return 0!==t?t:e.id-n.id}if("object"==typeof performance&&"function"==typeof performance.now){var a=performance;e.unstable_now=function(){return a.now()}}else{var o=Date,i=o.now();e.unstable_now=function(){return o.now()-i}}var u=[],s=[],c=1,f=null,d=3,p=!1,h=!1,g=!1,m="function"==typeof setTimeout?setTimeout:null,A="function"==typeof clearTimeout?clearTimeout:null,w="undefined"!=typeof setImmediate?setImmediate:null;function y(e){for(var l=t(s);null!==l;){if(null===l.callback)r(s);else{if(!(l.startTime<=e))break;r(s),l.sortIndex=l.expirationTime,n(u,l)}l=t(s)}}function v(e){if(g=!1,y(e),!h)if(null!==t(u))h=!0,T(b);else{var n=t(s);null!==n&&Q(v,n.startTime-e)}}function b(n,l){h=!1,g&&(g=!1,A(C),C=-1),p=!0;var a=d;try{for(y(l),f=t(u);null!==f&&(!(f.expirationTime>l)||n&&!B());){var o=f.callback;if("function"==typeof o){f.callback=null,d=f.priorityLevel;var i=o(f.expirationTime<=l);l=e.unstable_now(),"function"==typeof i?f.callback=i:f===t(u)&&r(u),y(l)}else r(u);f=t(u)}if(null!==f)var c=!0;else{var m=t(s);null!==m&&Q(v,m.startTime-l),c=!1}return c}finally{f=null,d=a,p=!1}}"undefined"!=typeof navigator&&void 0!==navigator.scheduling&&void 0!==navigator.scheduling.isInputPending&&navigator.scheduling.isInputPending.bind(navigator.scheduling);var E,I=!1,S=null,C=-1,k=5,x=-1;function B(){return!(e.unstable_now()-xe||125o?(r.sortIndex=a,n(s,r),null===t(u)&&r===t(s)&&(g?(A(C),C=-1):g=!0,Q(v,a-o))):(r.sortIndex=i,n(u,r),h||p||(h=!0,T(b))),r},e.unstable_shouldYield=B,e.unstable_wrapCallback=function(e){var n=d;return function(){var t=d;d=n;try{return e.apply(this,arguments)}finally{d=t}}}}(w)),w)),A.exports} /** * @license React * react-dom.production.min.js @@ -7,5 +7,5 @@ * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. - */!function e(){if("undefined"!=typeof __REACT_DEVTOOLS_GLOBAL_HOOK__&&"function"==typeof __REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE)try{__REACT_DEVTOOLS_GLOBAL_HOOK__.checkDCE(e)}catch(e){console.error(e)}}(),c.exports=function(){if(o)return f;o=1;var e=i,n=h();function t(e){for(var n="https://reactjs.org/docs/error-decoder.html?invariant="+e,t=1;t