forked from sheetjs/docs.sheetjs.com
		
	stream
This commit is contained in:
		
							parent
							
								
									4c9a9ada4f
								
							
						
					
					
						commit
						a77f991b86
					
				
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -1,3 +1,4 @@
 | 
			
		||||
.*.sw*
 | 
			
		||||
*.bak
 | 
			
		||||
package-lock.json
 | 
			
		||||
pnpm-lock.yaml
 | 
			
		||||
 | 
			
		||||
@ -76,6 +76,7 @@ WK3
 | 
			
		||||
WK4
 | 
			
		||||
WKS
 | 
			
		||||
WK_
 | 
			
		||||
WPS
 | 
			
		||||
WQ
 | 
			
		||||
WQ1
 | 
			
		||||
WQ2
 | 
			
		||||
@ -97,6 +98,7 @@ macrosheets
 | 
			
		||||
tooltip
 | 
			
		||||
tooltips
 | 
			
		||||
标文通
 | 
			
		||||
电子表格
 | 
			
		||||
 | 
			
		||||
# Other terms
 | 
			
		||||
1.x
 | 
			
		||||
@ -117,6 +119,7 @@ BOM
 | 
			
		||||
Base64
 | 
			
		||||
Base64-encoded
 | 
			
		||||
Big5
 | 
			
		||||
BitBucket
 | 
			
		||||
Booleans
 | 
			
		||||
Browserify
 | 
			
		||||
Bundlers
 | 
			
		||||
@ -151,6 +154,8 @@ ExpressJS
 | 
			
		||||
ExtendScript
 | 
			
		||||
Fastify
 | 
			
		||||
FileReader
 | 
			
		||||
FileReaderSync
 | 
			
		||||
FileSaver
 | 
			
		||||
GBK
 | 
			
		||||
GatsbyJS
 | 
			
		||||
GitLab
 | 
			
		||||
 | 
			
		||||
@ -30,7 +30,7 @@ For production use, it is highly encouraged to download and host the script.
 | 
			
		||||
:::note Browser Compatibility
 | 
			
		||||
 | 
			
		||||
ESM is supported in Web Workers in the Chromium family of browsers (including
 | 
			
		||||
Chrome and Edge) as well as in Webkit-based browsers (including Safari).
 | 
			
		||||
Chrome and Edge) as well as in browsers powered by WebKit (including Safari).
 | 
			
		||||
 | 
			
		||||
For support in legacy browsers like Firefox, `importScripts` should be used.
 | 
			
		||||
 | 
			
		||||
@ -102,7 +102,7 @@ self.addEventListener('message', async(e) => {
 | 
			
		||||
    const ab = await res.arrayBuffer();
 | 
			
		||||
 | 
			
		||||
    /* Parse file */
 | 
			
		||||
    const wb = XLSX.read(ab);
 | 
			
		||||
    const wb = XLSX.read(ab, {dense: true});
 | 
			
		||||
    const ws = wb.Sheets[wb.SheetNames[0]];
 | 
			
		||||
 | 
			
		||||
    /* Generate HTML */
 | 
			
		||||
@ -162,7 +162,7 @@ self.addEventListener('message', async(e) => {
 | 
			
		||||
SheetJS,in,Web,Workers
 | 
			
		||||
வணக்கம்,สวัสดี,你好,가지마
 | 
			
		||||
1,2,3,4\`;
 | 
			
		||||
    const wb = XLSX.read(csv, { type: "string" });
 | 
			
		||||
    const wb = XLSX.read(csv, { type: "string", dense: true });
 | 
			
		||||
 | 
			
		||||
    /* Write XLSB data (Uint8Array) */
 | 
			
		||||
    const u8 = XLSX.write(wb, { bookType: "xlsb", type: "buffer" });
 | 
			
		||||
@ -234,7 +234,7 @@ self.addEventListener('message', (e) => {
 | 
			
		||||
    const ab = new FileReaderSync().readAsArrayBuffer(e.data.file);
 | 
			
		||||
 | 
			
		||||
    /* Parse file */
 | 
			
		||||
    const wb = XLSX.read(ab);
 | 
			
		||||
    const wb = XLSX.read(ab, {dense: true});
 | 
			
		||||
    const ws = wb.Sheets[wb.SheetNames[0]];
 | 
			
		||||
 | 
			
		||||
    /* Generate HTML */
 | 
			
		||||
@ -264,4 +264,125 @@ self.addEventListener('message', (e) => {
 | 
			
		||||
    <div dangerouslySetInnerHTML={{ __html }}/>
 | 
			
		||||
  </> );
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Streaming Write
 | 
			
		||||
 | 
			
		||||
A more general discussion, including row-oriented processing demos, is included
 | 
			
		||||
in the ["Large Datasets"](/docs/demos/stream#browser) demo.
 | 
			
		||||
 | 
			
		||||
#### File System Access API
 | 
			
		||||
 | 
			
		||||
:::note
 | 
			
		||||
 | 
			
		||||
At the time of writing, the File System Access API is only available in Chromium
 | 
			
		||||
and Chromium-based browsers like Chrome and Edge.
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
The following live demo fetches and parses a file in a Web Worker.  The script:
 | 
			
		||||
 | 
			
		||||
- prompts user to save file (`window.showSaveFilePicker` in the main thread)
 | 
			
		||||
- passes the URL and the file object to the Web Worker
 | 
			
		||||
- loads the SheetJS library in the Web Worker
 | 
			
		||||
- fetches the requested URL and parses the workbook from the Worker
 | 
			
		||||
- creates a Writable Stream from the file object.
 | 
			
		||||
- uses `XLSX.stream.to_csv` to generate CSV rows of the first worksheet
 | 
			
		||||
  + on each row, the data is written to the file stream
 | 
			
		||||
  + every 10th row, a progress message is sent back to the main thread
 | 
			
		||||
  + at the end, a completion message is sent back to the main thread
 | 
			
		||||
 | 
			
		||||
The demo has a URL input box.  Feel free to change the URL.  For example,
 | 
			
		||||
 | 
			
		||||
`https://raw.githubusercontent.com/SheetJS/test_files/master/large_strings.xls`
 | 
			
		||||
is an XLS file over 50 MB
 | 
			
		||||
 | 
			
		||||
`https://raw.githubusercontent.com/SheetJS/libreoffice_test-files/master/calc/xlsx-import/perf/8-by-300000-cells.xlsx`
 | 
			
		||||
is an XLSX file with 300000 rows (approximately 20 MB)
 | 
			
		||||
 | 
			
		||||
```jsx live
 | 
			
		||||
function SheetJSFetchCSVStreamFile() {
 | 
			
		||||
  const [state, setState]  = React.useState("");
 | 
			
		||||
  const [cnt, setCnt] = React.useState(0);
 | 
			
		||||
  const [url, setUrl] = React.useState("https://oss.sheetjs.com/test_files/large_strings.xlsx");
 | 
			
		||||
 | 
			
		||||
  return ( <>
 | 
			
		||||
    <b>URL: </b><input type="text" value={url} onChange={(e) => setUrl(e.target.value)} size="80"/>
 | 
			
		||||
    <button onClick={async() => {
 | 
			
		||||
      /* this mantra embeds the worker source in the function */
 | 
			
		||||
      const worker = new Worker(URL.createObjectURL(new Blob([`\
 | 
			
		||||
/* load standalone script from CDN */
 | 
			
		||||
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
 | 
			
		||||
 | 
			
		||||
function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
 | 
			
		||||
  XLSX.stream.set_readable(() => ({
 | 
			
		||||
    __done: false,
 | 
			
		||||
    // this function will be assigned by the SheetJS stream methods
 | 
			
		||||
    _read: function() { this.__done = true; },
 | 
			
		||||
    // this function is called by the stream methods
 | 
			
		||||
    push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
 | 
			
		||||
    resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
 | 
			
		||||
  }));
 | 
			
		||||
  return XLSX.stream.to_csv(ws, opts);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* this callback will run once the main context sends a message */
 | 
			
		||||
self.addEventListener('message', async(e) => {
 | 
			
		||||
  try {
 | 
			
		||||
    postMessage({state: "fetching"});
 | 
			
		||||
    /* Fetch file */
 | 
			
		||||
    const res = await fetch(e.data.url);
 | 
			
		||||
    const ab = await res.arrayBuffer();
 | 
			
		||||
 | 
			
		||||
    /* Parse file */
 | 
			
		||||
    let len = ab.byteLength;
 | 
			
		||||
    if(len < 1024) len += " bytes"; else { len /= 1024;
 | 
			
		||||
      if(len < 1024) len += " KB"; else { len /= 1024; len += " MB"; }
 | 
			
		||||
    }
 | 
			
		||||
    postMessage({state: "parsing"});
 | 
			
		||||
    const wb = XLSX.read(ab, {dense: true});
 | 
			
		||||
    const ws = wb.Sheets[wb.SheetNames[0]];
 | 
			
		||||
 | 
			
		||||
    /* Generate CSV rows */
 | 
			
		||||
    postMessage({state: "begin"});
 | 
			
		||||
    const wstream = await e.data.wFile.createWritable();
 | 
			
		||||
    let rows = 0;
 | 
			
		||||
    const strm = sheet_to_csv_cb(ws, async(csv) => {
 | 
			
		||||
      if(csv != null) {
 | 
			
		||||
        await wstream.write(csv);
 | 
			
		||||
        if(!(++rows % 100)) postMessage({ state: "processing", rows });
 | 
			
		||||
      } else {
 | 
			
		||||
        await wstream.close();
 | 
			
		||||
        postMessage({state: "done", rows });
 | 
			
		||||
      }
 | 
			
		||||
    });
 | 
			
		||||
    strm.resume();
 | 
			
		||||
  } catch(e) {
 | 
			
		||||
    /* Pass the error message back */
 | 
			
		||||
    postMessage({error: String(e.message || e) });
 | 
			
		||||
  }
 | 
			
		||||
}, false);
 | 
			
		||||
      `])));
 | 
			
		||||
      /* when the worker sends back data, add it to the DOM */
 | 
			
		||||
      worker.onmessage = function(e) {
 | 
			
		||||
        if(e.data.error) return setHTML(e.data.error);
 | 
			
		||||
        else if(e.data.state) {
 | 
			
		||||
          setState(e.data.state);
 | 
			
		||||
          if(e.data.rows) setCnt(e.data.rows);
 | 
			
		||||
        }
 | 
			
		||||
      };
 | 
			
		||||
      setCnt(0); setState("");
 | 
			
		||||
 | 
			
		||||
      /* Show picker and get handle to file */
 | 
			
		||||
      const wFile = await window.showSaveFilePicker({
 | 
			
		||||
        suggestedName: "SheetJSStream.csv",
 | 
			
		||||
        types: [ { description: 'csv', accept: { 'text/csv': ['.csv'] } } ]
 | 
			
		||||
      });
 | 
			
		||||
 | 
			
		||||
      /* post a message to the worker with the URL to fetch */
 | 
			
		||||
      if(wFile) worker.postMessage({url, wFile});
 | 
			
		||||
    }}><b>Click to Start</b></button>
 | 
			
		||||
    <pre>State: <b>{state}</b><br/>Number of rows: <b>{cnt}</b></pre>
 | 
			
		||||
  </> );
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										354
									
								
								docz/docs/03-demos/08-stream.md
									
									
									
									
									
										Normal file
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										354
									
								
								docz/docs/03-demos/08-stream.md
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,354 @@
 | 
			
		||||
---
 | 
			
		||||
title: Large Datasets
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
For maximal compatibility, the library reads entire files at once and generates
 | 
			
		||||
files at once. Browsers and other JS engines enforce tight memory limits.  In
 | 
			
		||||
these cases, the library offers strategies to optimize for memory or space by
 | 
			
		||||
using platform-specific APIs.
 | 
			
		||||
 | 
			
		||||
## Dense Mode
 | 
			
		||||
 | 
			
		||||
The `dense` option (supported in `read`, `readFile` and `aoa_to_sheet`) creates
 | 
			
		||||
worksheet objects that use arrays of arrays under the hood:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
var dense_wb = XLSX.read(ab, {dense: true});
 | 
			
		||||
 | 
			
		||||
var dense_sheet = XLSX.utils.aoa_to_sheet(aoa);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
<details><summary><b>Historical Note</b> (click to show)</summary>
 | 
			
		||||
 | 
			
		||||
The earliest versions of the library aimed for IE6+ compatibility.  In early
 | 
			
		||||
testing, both in Chrome 26 and in IE6, the most efficient worksheet storage for
 | 
			
		||||
small sheets was a large object whose keys were cell addresses.
 | 
			
		||||
 | 
			
		||||
Over time, V8 (the engine behind Chrome and NodeJS) evolved in a way that made
 | 
			
		||||
the array of arrays approach more efficient but reduced the performance of the
 | 
			
		||||
large object approach.
 | 
			
		||||
 | 
			
		||||
In the interest of preserving backwards compatibility, the library opts to make
 | 
			
		||||
the array of arrays approach available behind a special `dense` option.
 | 
			
		||||
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
The various API functions will seamlessly handle dense and sparse worksheets.
 | 
			
		||||
 | 
			
		||||
## Streaming Write
 | 
			
		||||
 | 
			
		||||
The streaming write functions are available in the `XLSX.stream` object.  They
 | 
			
		||||
take the same arguments as the normal write functions:
 | 
			
		||||
 | 
			
		||||
- `XLSX.stream.to_csv` is the streaming version of `XLSX.utils.sheet_to_csv`.
 | 
			
		||||
- `XLSX.stream.to_html` is the streaming version of `XLSX.utils.sheet_to_html`.
 | 
			
		||||
- `XLSX.stream.to_json` is the streaming version of `XLSX.utils.sheet_to_json`.
 | 
			
		||||
 | 
			
		||||
"Stream" refers to the NodeJS push streams API.
 | 
			
		||||
 | 
			
		||||
<details><summary><b>Historical Note</b> (click to show)</summary>
 | 
			
		||||
 | 
			
		||||
NodeJS push streams were introduced in 2012.
 | 
			
		||||
 | 
			
		||||
The first streaming write function, `to_csv`, was introduced in April 2017.  It
 | 
			
		||||
used and still uses the same NodeJS streaming API.
 | 
			
		||||
 | 
			
		||||
Years later, browser vendors are settling on a different stream API.
 | 
			
		||||
 | 
			
		||||
For maximal compatibility, the library uses NodeJS push streams.
 | 
			
		||||
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
### NodeJS
 | 
			
		||||
 | 
			
		||||
:::note
 | 
			
		||||
 | 
			
		||||
In a CommonJS context, NodeJS Streams and `fs` immediately work with SheetJS:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
const XLSX = require("xlsx"); // "just works"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
In NodeJS ESM, the dependency must be loaded manually:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
import * as XLSX from 'xlsx';
 | 
			
		||||
import { Readable } from 'stream';
 | 
			
		||||
 | 
			
		||||
XLSX.stream.set_readable(Readable); // manually load stream helpers
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Additionally, for file-related operations in NodeJS ESM, `fs` must be loaded:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
import * as XLSX from 'xlsx';
 | 
			
		||||
import * as fs from 'fs';
 | 
			
		||||
 | 
			
		||||
XLSX.set_fs(fs); // manually load fs helpers
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
**It is strongly encouraged to use CommonJS in NodeJS whenever possible.**
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
This example reads a worksheet passed as an argument to the script, pulls the
 | 
			
		||||
first worksheet, converts to CSV and writes to `out.csv`:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
var XLSX = require("xlsx");
 | 
			
		||||
var workbook = XLSX.readFile(process.argv[2]);
 | 
			
		||||
var worksheet = workbook.Sheets[workbook.SheetNames[0]];
 | 
			
		||||
// highlight-next-line
 | 
			
		||||
var stream = XLSX.stream.to_csv(worksheet);
 | 
			
		||||
 | 
			
		||||
var output_file_name = "out.csv";
 | 
			
		||||
// highlight-next-line
 | 
			
		||||
stream.pipe(fs.createWriteStream(output_file_name));
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
`stream.to_json` uses Object-mode streams. A `Transform` stream can be used to
 | 
			
		||||
generate a normal stream for streaming to a file or the screen:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
var XLSX = require("xlsx");
 | 
			
		||||
var workbook = XLSX.readFile(process.argv[2], {dense: true});
 | 
			
		||||
var worksheet = workbook.Sheets[workbook.SheetNames[0]];
 | 
			
		||||
/* to_json returns an object-mode stream */
 | 
			
		||||
// highlight-next-line
 | 
			
		||||
var stream = XLSX.stream.to_json(worksheet, {raw:true});
 | 
			
		||||
 | 
			
		||||
/* this Transform stream converts JS objects to text and prints to screen */
 | 
			
		||||
var conv = new Transform({writableObjectMode:true});
 | 
			
		||||
conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); };
 | 
			
		||||
conv.pipe(process.stdout);
 | 
			
		||||
 | 
			
		||||
// highlight-next-line
 | 
			
		||||
stream.pipe(conv);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Browser
 | 
			
		||||
 | 
			
		||||
<details><summary><b>Live Demo</b> (click to show)</summary>
 | 
			
		||||
 | 
			
		||||
The following live demo fetches and parses a file in a Web Worker.  The `to_csv`
 | 
			
		||||
streaming function is used to generate CSV rows and pass back to the main thread
 | 
			
		||||
for further processing.
 | 
			
		||||
 | 
			
		||||
:::note
 | 
			
		||||
 | 
			
		||||
For Chromium browsers, the File System Access API provides a modern worker-only
 | 
			
		||||
approach. [The Web Workers demo](/docs/demos/worker#streaming-write) includes a
 | 
			
		||||
live example of CSV streaming write.
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
The demo has a URL input box.  Feel free to change the URL.  For example,
 | 
			
		||||
 | 
			
		||||
`https://raw.githubusercontent.com/SheetJS/test_files/master/large_strings.xls`
 | 
			
		||||
is an XLS file over 50 MB
 | 
			
		||||
 | 
			
		||||
`https://raw.githubusercontent.com/SheetJS/libreoffice_test-files/master/calc/xlsx-import/perf/8-by-300000-cells.xlsx`
 | 
			
		||||
is an XLSX file with 300000 rows (approximately 20 MB)
 | 
			
		||||
 | 
			
		||||
```jsx live
 | 
			
		||||
function SheetJSFetchCSVStreamWorker() {
 | 
			
		||||
  const [__html, setHTML] = React.useState("");
 | 
			
		||||
  const [state, setState]  = React.useState("");
 | 
			
		||||
  const [cnt, setCnt] = React.useState(0);
 | 
			
		||||
  const [url, setUrl] = React.useState("https://oss.sheetjs.com/test_files/large_strings.xlsx");
 | 
			
		||||
 | 
			
		||||
  return ( <>
 | 
			
		||||
    <b>URL: </b><input type="text" value={url} onChange={(e) => setUrl(e.target.value)} size="80"/>
 | 
			
		||||
    <button onClick={() => {
 | 
			
		||||
      /* this mantra embeds the worker source in the function */
 | 
			
		||||
      const worker = new Worker(URL.createObjectURL(new Blob([`\
 | 
			
		||||
/* load standalone script from CDN */
 | 
			
		||||
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
 | 
			
		||||
 | 
			
		||||
function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
 | 
			
		||||
  XLSX.stream.set_readable(() => ({
 | 
			
		||||
    __done: false,
 | 
			
		||||
    // this function will be assigned by the SheetJS stream methods
 | 
			
		||||
    _read: function() { this.__done = true; },
 | 
			
		||||
    // this function is called by the stream methods
 | 
			
		||||
    push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
 | 
			
		||||
    resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
 | 
			
		||||
  }));
 | 
			
		||||
  return XLSX.stream.to_csv(ws, opts);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* this callback will run once the main context sends a message */
 | 
			
		||||
self.addEventListener('message', async(e) => {
 | 
			
		||||
  try {
 | 
			
		||||
    postMessage({state: "fetching " + e.data.url});
 | 
			
		||||
    /* Fetch file */
 | 
			
		||||
    const res = await fetch(e.data.url);
 | 
			
		||||
    const ab = await res.arrayBuffer();
 | 
			
		||||
 | 
			
		||||
    /* Parse file */
 | 
			
		||||
    let len = ab.byteLength;
 | 
			
		||||
    if(len < 1024) len += " bytes"; else { len /= 1024;
 | 
			
		||||
      if(len < 1024) len += " KB"; else { len /= 1024; len += " MB"; }
 | 
			
		||||
    }
 | 
			
		||||
    postMessage({state: "parsing " + len});
 | 
			
		||||
    const wb = XLSX.read(ab, {dense: true});
 | 
			
		||||
    const ws = wb.Sheets[wb.SheetNames[0]];
 | 
			
		||||
 | 
			
		||||
    /* Generate CSV rows */
 | 
			
		||||
    postMessage({state: "csv"});
 | 
			
		||||
    const strm = sheet_to_csv_cb(ws, (csv) => {
 | 
			
		||||
      if(csv != null) postMessage({csv});
 | 
			
		||||
      else postMessage({state: "done"});
 | 
			
		||||
    });
 | 
			
		||||
    strm.resume();
 | 
			
		||||
  } catch(e) {
 | 
			
		||||
    /* Pass the error message back */
 | 
			
		||||
    postMessage({error: String(e.message || e) });
 | 
			
		||||
  }
 | 
			
		||||
}, false);
 | 
			
		||||
      `])));
 | 
			
		||||
      /* when the worker sends back data, add it to the DOM */
 | 
			
		||||
      worker.onmessage = function(e) {
 | 
			
		||||
        if(e.data.error) return setHTML(e.data.error);
 | 
			
		||||
        else if(e.data.state) return setState(e.data.state);
 | 
			
		||||
        setHTML(e.data.csv);
 | 
			
		||||
        setCnt(cnt => cnt+1);
 | 
			
		||||
      };
 | 
			
		||||
      setCnt(0); setState("");
 | 
			
		||||
      /* post a message to the worker with the URL to fetch */
 | 
			
		||||
      worker.postMessage({url});
 | 
			
		||||
    }}><b>Click to Start</b></button>
 | 
			
		||||
    <pre>State: <b>{state}</b><br/>Number of rows: <b>{cnt}</b></pre>
 | 
			
		||||
    <pre dangerouslySetInnerHTML={{ __html }}/>
 | 
			
		||||
  </> );
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
NodeJS streaming APIs are not available in the browser.  The following function
 | 
			
		||||
supplies a pseudo stream object compatible with the `to_csv` function:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
 | 
			
		||||
  XLSX.stream.set_readable(() => ({
 | 
			
		||||
    __done: false,
 | 
			
		||||
    // this function will be assigned by the SheetJS stream methods
 | 
			
		||||
    _read: function() { this.__done = true; },
 | 
			
		||||
    // this function is called by the stream methods
 | 
			
		||||
    push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
 | 
			
		||||
    resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
 | 
			
		||||
  }));
 | 
			
		||||
  return XLSX.stream.to_csv(ws, opts);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// assuming `workbook` is a workbook, stream the first sheet
 | 
			
		||||
const ws = workbook.Sheets[workbook.SheetNames[0]];
 | 
			
		||||
const strm = sheet_to_csv_cb(ws, (csv)=>{ if(csv != null) console.log(csv); });
 | 
			
		||||
strm.resume();
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
#### Web Workers
 | 
			
		||||
 | 
			
		||||
For processing large files in the browser, it is strongly encouraged to use Web
 | 
			
		||||
Workers. The [Worker demo](/docs/demos/worker#streaming-write) includes examples
 | 
			
		||||
using the File System Access API.
 | 
			
		||||
 | 
			
		||||
Typically, the file and stream processing occurs in the Web Worker.  CSV rows
 | 
			
		||||
can be sent back to the main thread in the callback:
 | 
			
		||||
 | 
			
		||||
```js title="worker.js"
 | 
			
		||||
/* load standalone script from CDN */
 | 
			
		||||
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
 | 
			
		||||
 | 
			
		||||
function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
 | 
			
		||||
  XLSX.stream.set_readable(() => ({
 | 
			
		||||
    __done: false,
 | 
			
		||||
    // this function will be assigned by the SheetJS stream methods
 | 
			
		||||
    _read: function() { this.__done = true; },
 | 
			
		||||
    // this function is called by the stream methods
 | 
			
		||||
    push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
 | 
			
		||||
    resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
 | 
			
		||||
  }));
 | 
			
		||||
  return XLSX.stream.to_csv(ws, opts);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* this callback will run once the main context sends a message */
 | 
			
		||||
self.addEventListener('message', async(e) => {
 | 
			
		||||
  try {
 | 
			
		||||
    postMessage({state: "fetching " + e.data.url});
 | 
			
		||||
    /* Fetch file */
 | 
			
		||||
    const res = await fetch(e.data.url);
 | 
			
		||||
    const ab = await res.arrayBuffer();
 | 
			
		||||
 | 
			
		||||
    /* Parse file */
 | 
			
		||||
    postMessage({state: "parsing"});
 | 
			
		||||
    const wb = XLSX.read(ab, {dense: true});
 | 
			
		||||
    const ws = wb.Sheets[wb.SheetNames[0]];
 | 
			
		||||
 | 
			
		||||
    /* Generate CSV rows */
 | 
			
		||||
    postMessage({state: "csv"});
 | 
			
		||||
    const strm = sheet_to_csv_cb(ws, (csv) => {
 | 
			
		||||
      if(csv != null) postMessage({csv});
 | 
			
		||||
      else postMessage({state: "done"});
 | 
			
		||||
    });
 | 
			
		||||
    strm.resume();
 | 
			
		||||
  } catch(e) {
 | 
			
		||||
    /* Pass the error message back */
 | 
			
		||||
    postMessage({error: String(e.message || e) });
 | 
			
		||||
  }
 | 
			
		||||
}, false);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The main thread will receive messages with CSV rows for further processing:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
worker.onmessage = function(e) {
 | 
			
		||||
  if(e.data.error) { console.error(e.data.error); /* show an error message */ }
 | 
			
		||||
  else if(e.data.state) { console.info(e.data.state); /* current state */ }
 | 
			
		||||
  else {
 | 
			
		||||
    /* e.data.csv is the row generated by the stream */
 | 
			
		||||
    console.log(e.data.csv);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Deno
 | 
			
		||||
 | 
			
		||||
Deno does not support NodeJS streams in normal execution, so a wrapper is used.
 | 
			
		||||
This example fetches <https://sheetjs.com/pres.numbers> and prints CSV rows:
 | 
			
		||||
 | 
			
		||||
```ts title="sheet2csv.ts"
 | 
			
		||||
// @deno-types="https://cdn.sheetjs.com/xlsx-latest/package/types/index.d.ts"
 | 
			
		||||
import { stream, Sheet2CSVOpts, WorkSheet } from 'https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs';
 | 
			
		||||
 | 
			
		||||
interface Resumable { resume:()=>void; };
 | 
			
		||||
/* Generate row strings from a worksheet */
 | 
			
		||||
function sheet_to_csv_cb(ws: WorkSheet, cb:(d:string|null)=>void, opts: Sheet2CSVOpts = {}, batch = 1000): Resumable {
 | 
			
		||||
  stream.set_readable(() => ({
 | 
			
		||||
    __done: false,
 | 
			
		||||
    // this function will be assigned by the SheetJS stream methods
 | 
			
		||||
    _read: function() { this.__done = true; },
 | 
			
		||||
    // this function is called by the stream methods
 | 
			
		||||
    push: function(d: any) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
 | 
			
		||||
    resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
 | 
			
		||||
  }));
 | 
			
		||||
  return stream.to_csv(ws, opts) as Resumable;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Callback invoked on each row (string) and at the end (null) */
 | 
			
		||||
const csv_cb = (d:string|null) => {
 | 
			
		||||
  if(d == null) return;
 | 
			
		||||
  /* The strings include line endings, so raw write ops should be used */
 | 
			
		||||
  Deno.stdout.write(new TextEncoder().encode(d));
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* Fetch https://sheetjs.com/pres.numbers, parse, and get first worksheet */
 | 
			
		||||
import { read } from 'https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs';
 | 
			
		||||
const ab = await (await fetch("https://sheetjs.com/pres.numbers")).arrayBuffer();
 | 
			
		||||
const wb = read(ab, { dense: true });
 | 
			
		||||
const ws = wb.Sheets[wb.SheetNames[0]];
 | 
			
		||||
 | 
			
		||||
/* Create and start CSV stream */
 | 
			
		||||
sheet_to_csv_cb(ws, csv_cb).resume();
 | 
			
		||||
```
 | 
			
		||||
@ -1061,8 +1061,8 @@ Access http://localhost:8080 in your web browser.
 | 
			
		||||
 | 
			
		||||
:::note
 | 
			
		||||
 | 
			
		||||
The [Vite section of the Content demo](/docs/demos/content#vitejs) covers SheetJS-powered
 | 
			
		||||
asset loaders, suitable for static sites pulling data from fixed spreadsheets.
 | 
			
		||||
The [Vite section of the Content demo](/docs/demos/content#vitejs) covers asset
 | 
			
		||||
loaders. They are ideal for static sites pulling data from sheets at build time.
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -76,8 +76,7 @@ self.addEventListener('message', (e) => {
 | 
			
		||||
 | 
			
		||||
<details><summary><b>IE10 Binary Strings</b> (click to show)</summary>
 | 
			
		||||
 | 
			
		||||
In IE10, binary strings are more performant than `ArrayBuffer`. `XLSX.read`
 | 
			
		||||
supports binary strings with `type: "binary"`:
 | 
			
		||||
`XLSX.read` supports binary strings with `type: "binary"`:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
// usage: file_bs_to_wb(file, function(wb) { /* wb is a workbook object */ });
 | 
			
		||||
 | 
			
		||||
@ -840,103 +840,6 @@ Readable Stream.
 | 
			
		||||
- `XLSX.stream.to_html` is the streaming version of `XLSX.utils.sheet_to_html`.
 | 
			
		||||
- `XLSX.stream.to_json` is the streaming version of `XLSX.utils.sheet_to_json`.
 | 
			
		||||
 | 
			
		||||
<Tabs>
 | 
			
		||||
  <TabItem value="nodejs" label="NodeJS">
 | 
			
		||||
 | 
			
		||||
:::note
 | 
			
		||||
 | 
			
		||||
In a CommonJS context, NodeJS Streams and `fs` immediately work with SheetJS:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
const XLSX = require("xlsx"); // "just works"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
In NodeJS ESM, the dependency must be loaded manually:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
import * as XLSX from 'xlsx';
 | 
			
		||||
import { Readable } from 'stream';
 | 
			
		||||
 | 
			
		||||
XLSX.stream.set_readable(Readable); // manually load stream helpers
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Additionally, for file-related operations in NodeJS ESM, `fs` must be loaded:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
import * as XLSX from 'xlsx';
 | 
			
		||||
import * as fs from 'fs';
 | 
			
		||||
 | 
			
		||||
XLSX.set_fs(fs); // manually load fs helpers
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
**It is strongly encouraged to use CommonJS in NodeJS whenever possible.**
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
This example reads a worksheet passed as an argument to the script, pulls the
 | 
			
		||||
first worksheet, converts to CSV and writes to `out.csv`:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
const workbook = XLSX.readFile(process.argv[2]);
 | 
			
		||||
const worksheet = workbook.Sheets[workbook.SheetNames[0]];
 | 
			
		||||
// highlight-next-line
 | 
			
		||||
const stream = XLSX.stream.to_csv(worksheet);
 | 
			
		||||
 | 
			
		||||
const output_file_name = "out.csv";
 | 
			
		||||
// highlight-next-line
 | 
			
		||||
stream.pipe(fs.createWriteStream(output_file_name));
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
`stream.to_json` uses Object-mode streams. A `Transform` stream can be used to
 | 
			
		||||
generate a normal stream for streaming to a file or the screen:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
/* to_json returns an object-mode stream */
 | 
			
		||||
// highlight-next-line
 | 
			
		||||
var stream = XLSX.stream.to_json(worksheet, {raw:true});
 | 
			
		||||
 | 
			
		||||
/* this Transform stream converts JS objects to text and prints to screen */
 | 
			
		||||
var conv = new Transform({writableObjectMode:true});
 | 
			
		||||
conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); };
 | 
			
		||||
conv.pipe(process.stdout);
 | 
			
		||||
 | 
			
		||||
// highlight-next-line
 | 
			
		||||
stream.pipe(conv);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
  </TabItem>
 | 
			
		||||
  <TabItem value="deno" label="Deno">
 | 
			
		||||
 | 
			
		||||
Deno does not support NodeJS streams in normal execution, so a wrapper is used.
 | 
			
		||||
This demo converts a worksheet to CSV and prints each row to the screen:
 | 
			
		||||
 | 
			
		||||
```ts
 | 
			
		||||
// @deno-types="https://cdn.sheetjs.com/xlsx-latest/package/types/index.d.ts"
 | 
			
		||||
import {utils, stream, set_cptable} from 'https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs';
 | 
			
		||||
 | 
			
		||||
/* `Readable` will be compatible with how SheetJS uses `stream.Readable` */
 | 
			
		||||
function NodeReadableCB(cb:(d:any)=>void) {
 | 
			
		||||
  var rd = {
 | 
			
		||||
    __done: false,
 | 
			
		||||
    _read: function() {},
 | 
			
		||||
    push: function(d: any) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
 | 
			
		||||
    resume: function pump() {for(var i = 0; i < 10000 && !this.__done; ++i) rd._read(); if(!rd.__done) setTimeout(pump, 0); }
 | 
			
		||||
  };
 | 
			
		||||
  return rd;
 | 
			
		||||
}
 | 
			
		||||
function NodeReadable(rd: any) { return function() { return rd; }; }
 | 
			
		||||
/* The callback gets each CSV row.  It will be `null` when the stream is drained */
 | 
			
		||||
const rt = NodeReadableCB((d: any) => { if(d != null) console.log(d); });
 | 
			
		||||
const Readable = NodeReadable(rt);
 | 
			
		||||
stream.set_readable(Readable);
 | 
			
		||||
 | 
			
		||||
/* wire up and start the stream */
 | 
			
		||||
const rd = stream.to_csv(worksheet);
 | 
			
		||||
rd.resume();
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
  </TabItem>
 | 
			
		||||
</Tabs>
 | 
			
		||||
 | 
			
		||||
Examples are included in ["Large Datasets"](/docs/demos/stream#streaming-write)
 | 
			
		||||
 | 
			
		||||
<https://sheetaki.now.sh/> pipes write streams to nodejs response.
 | 
			
		||||
 | 
			
		||||
@ -599,7 +599,7 @@ Z.TEST
 | 
			
		||||
 | 
			
		||||
In some cases, seemingly valid formulae may be rejected by spreadsheet software.
 | 
			
		||||
 | 
			
		||||
`EVALUATE` unprefixed function is supported in WPS Office formulae.  It is not
 | 
			
		||||
valid in a cell formula in Excel. It can be used in an Excel defined name when
 | 
			
		||||
exporting to XLSM format but not XLSX. This is a limitation of Excel.  Since WPS
 | 
			
		||||
Office accepts files with `EVALUATE`, the writer does not warn or throw errors.
 | 
			
		||||
`EVALUATE` is a supported function in WPS Office.  It is not valid in a cell
 | 
			
		||||
formula in Excel. It can be used in an Excel defined name when exporting to XLSM
 | 
			
		||||
format but not XLSX. This is a limitation of Excel.  Since WPS Office accepts
 | 
			
		||||
files with `EVALUATE`, the writer does not warn or throw errors.
 | 
			
		||||
@ -70,7 +70,7 @@ The read functions accept an options argument:
 | 
			
		||||
  errors on single worksheets, allowing you to read from the worksheets that do
 | 
			
		||||
  parse properly. Setting `WTF:true` forces those errors to be thrown.
 | 
			
		||||
- By default, "sparse" mode worksheets are generated. Individual cells are
 | 
			
		||||
  accessed by indexing the worksheet object with an A1-style address.  "dense"
 | 
			
		||||
  accessed by indexing the worksheet object with an A1-Style address.  "dense"
 | 
			
		||||
  worksheets store cells in an array of arrays at `sheet["!data"]`.
 | 
			
		||||
 | 
			
		||||
### Input Type
 | 
			
		||||
 | 
			
		||||
@ -37,8 +37,8 @@ manifest with error messages such as `Invalid string length`.
 | 
			
		||||
 | 
			
		||||
There are memory bottlenecks associated with string addresses. A number of bugs
 | 
			
		||||
have been reported to the V8 and Chromium projects on this subject. While those
 | 
			
		||||
bugs are being resolved, for sheets containing >100K rows, dense mode worksheets
 | 
			
		||||
should be used.
 | 
			
		||||
bugs are being resolved, for sheets containing hundreds of thousands of rows,
 | 
			
		||||
dense mode worksheets should be used.
 | 
			
		||||
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -15,11 +15,11 @@
 | 
			
		||||
  },
 | 
			
		||||
  "dependencies": {
 | 
			
		||||
    "@cmfcmf/docusaurus-search-local": "0.11.0",
 | 
			
		||||
    "@docusaurus/core": "2.1.0",
 | 
			
		||||
    "@docusaurus/plugin-client-redirects": "2.1.0",
 | 
			
		||||
    "@docusaurus/preset-classic": "2.1.0",
 | 
			
		||||
    "@docusaurus/theme-common": "2.1.0",
 | 
			
		||||
    "@docusaurus/theme-live-codeblock": "2.1.0",
 | 
			
		||||
    "@docusaurus/core": "2.2.0",
 | 
			
		||||
    "@docusaurus/plugin-client-redirects": "2.2.0",
 | 
			
		||||
    "@docusaurus/preset-classic": "2.2.0",
 | 
			
		||||
    "@docusaurus/theme-common": "2.2.0",
 | 
			
		||||
    "@docusaurus/theme-live-codeblock": "2.2.0",
 | 
			
		||||
    "@mdx-js/react": "1.6.22",
 | 
			
		||||
    "clsx": "1.2.1",
 | 
			
		||||
    "prism-react-renderer": "1.3.5",
 | 
			
		||||
@ -28,7 +28,7 @@
 | 
			
		||||
    "xlsx": "https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz"
 | 
			
		||||
  },
 | 
			
		||||
  "devDependencies": {
 | 
			
		||||
    "@docusaurus/module-type-aliases": "2.1.0"
 | 
			
		||||
    "@docusaurus/module-type-aliases": "2.2.0"
 | 
			
		||||
  },
 | 
			
		||||
  "browserslist": {
 | 
			
		||||
    "production": [
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user