forked from sheetjs/docs.sheetjs.com
		
	pst
This commit is contained in:
		
							parent
							
								
									35d76f9a62
								
							
						
					
					
						commit
						66c787222e
					
				@ -156,7 +156,7 @@ export class AppComponent {
 | 
			
		||||
 | 
			
		||||
:::note
 | 
			
		||||
 | 
			
		||||
This demo was last run on 2023-07-24 using Angular CLI `16.1.5`
 | 
			
		||||
This demo was last run on 2023-10-22 using Angular CLI `16.2.7`
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
@ -169,7 +169,7 @@ npx @angular/cli analytics disable -g
 | 
			
		||||
1) Create a new project:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
npx @angular/cli new --minimal --defaults --no-interactive sheetjs-angular
 | 
			
		||||
npx @angular/cli@16.2.7 new --minimal --defaults --no-interactive sheetjs-angular
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
2) Install the SheetJS dependency and start the dev server:
 | 
			
		||||
@ -184,10 +184,11 @@ npm start`}
 | 
			
		||||
 | 
			
		||||
3) Open a web browser and access the displayed URL (`http://localhost:4200`)
 | 
			
		||||
 | 
			
		||||
4) Replace `src/app/app.component.ts` with the code snippet.
 | 
			
		||||
4) Replace `src/app/app.component.ts` with the previous code snippet.
 | 
			
		||||
 | 
			
		||||
The page will refresh and show a table with an Export button.  Click the button
 | 
			
		||||
and the page will attempt to download `SheetJSAngularAoO.xlsx`.
 | 
			
		||||
The page will refresh and show a table with an Export button. Click the button
 | 
			
		||||
and the page will attempt to download `SheetJSAngularAoO.xlsx`. Open the file
 | 
			
		||||
with a spreadsheet editor.
 | 
			
		||||
 | 
			
		||||
5) Stop the dev server and build the site:
 | 
			
		||||
 | 
			
		||||
@ -263,7 +264,7 @@ export class AppComponent {
 | 
			
		||||
 | 
			
		||||
:::note
 | 
			
		||||
 | 
			
		||||
This demo was last run on 2023-07-24 using Angular CLI `16.1.5`
 | 
			
		||||
This demo was last run on 2023-10-22 using Angular CLI `16.2.7`
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
@ -276,7 +277,7 @@ npx @angular/cli analytics disable -g
 | 
			
		||||
1) Create a new project:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
npx @angular/cli new --minimal --defaults --no-interactive sheetjs-angular
 | 
			
		||||
npx @angular/cli@16.2.7 new --minimal --defaults --no-interactive sheetjs-angular
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
2) Install the SheetJS dependency and start the dev server:
 | 
			
		||||
@ -291,10 +292,11 @@ npm start`}
 | 
			
		||||
 | 
			
		||||
3) Open a web browser and access the displayed URL (`http://localhost:4200`)
 | 
			
		||||
 | 
			
		||||
4) Replace `src/app/app.component.ts` with the code snippet.
 | 
			
		||||
4) Replace `src/app/app.component.ts` with the previous code snippet.
 | 
			
		||||
 | 
			
		||||
The page will refresh and show a table with an Export button.  Click the button
 | 
			
		||||
and the page will attempt to download `SheetJSAngularHTML.xlsx`.
 | 
			
		||||
The page will refresh and show a table with an Export button. Click the button
 | 
			
		||||
and the page will attempt to download `SheetJSAngularHTML.xlsx`. Open the file
 | 
			
		||||
with a spreadsheet editor.
 | 
			
		||||
 | 
			
		||||
5) Stop the dev server and build the site:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -40,7 +40,7 @@ import { read, utils, writeFileXLSX } from 'xlsx';
 | 
			
		||||
:::warning Parcel Bug
 | 
			
		||||
 | 
			
		||||
Errors of the form `Could not statically evaluate fs call` stem from a Parcel
 | 
			
		||||
bug. Upgrade to Parcel version 1.5.0 or later.
 | 
			
		||||
bug[^1]. Upgrade to Parcel version 1.5.0 or later.
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
@ -164,3 +164,5 @@ npx http-server dist
 | 
			
		||||
 | 
			
		||||
Access the displayed URL (typically `http://localhost:8080/`) in a web browser.
 | 
			
		||||
Click on "Click here to export" to generate a file.
 | 
			
		||||
 | 
			
		||||
[^1]: See [Issue 523 in the Parcel issue tracker](https://github.com/parcel-bundler/parcel/pull/523#issuecomment-357486164)
 | 
			
		||||
@ -2,7 +2,7 @@
 | 
			
		||||
title: Sheets in ExpressJS
 | 
			
		||||
sidebar_label: ExpressJS
 | 
			
		||||
pagination_prev: demos/net/network
 | 
			
		||||
pagination_next: demos/net/email
 | 
			
		||||
pagination_next: demos/net/email/index
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
import current from '/version.js';
 | 
			
		||||
 | 
			
		||||
@ -2,7 +2,7 @@
 | 
			
		||||
title: Sheets in Drash
 | 
			
		||||
sidebar_label: Drash
 | 
			
		||||
pagination_prev: demos/net/network
 | 
			
		||||
pagination_next: demos/net/email
 | 
			
		||||
pagination_next: demos/net/email/index
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
import current from '/version.js';
 | 
			
		||||
 | 
			
		||||
@ -2,7 +2,7 @@
 | 
			
		||||
title: Sheets in Elysia
 | 
			
		||||
sidebar_label: ElysiaJS
 | 
			
		||||
pagination_prev: demos/net/network
 | 
			
		||||
pagination_next: demos/net/email
 | 
			
		||||
pagination_next: demos/net/email/index
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
import current from '/version.js';
 | 
			
		||||
 | 
			
		||||
@ -2,7 +2,7 @@
 | 
			
		||||
title: Sheets in NestJS
 | 
			
		||||
sidebar_label: NestJS
 | 
			
		||||
pagination_prev: demos/net/network
 | 
			
		||||
pagination_next: demos/net/email
 | 
			
		||||
pagination_next: demos/net/email/index
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
import current from '/version.js';
 | 
			
		||||
 | 
			
		||||
@ -2,7 +2,7 @@
 | 
			
		||||
title: Sheets in FastifyJS
 | 
			
		||||
sidebar_label: FastifyJS
 | 
			
		||||
pagination_prev: demos/net/network
 | 
			
		||||
pagination_next: demos/net/email
 | 
			
		||||
pagination_next: demos/net/email/index
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
import current from '/version.js';
 | 
			
		||||
 | 
			
		||||
@ -1,7 +1,7 @@
 | 
			
		||||
---
 | 
			
		||||
title: HTTP Server Processing
 | 
			
		||||
pagination_prev: demos/net/network
 | 
			
		||||
pagination_next: demos/net/email
 | 
			
		||||
pagination_next: demos/net/email/index
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
import current from '/version.js';
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										388
									
								
								docz/docs/03-demos/03-net/04-email/11-pst.md
									
									
									
									
									
										Normal file
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										388
									
								
								docz/docs/03-demos/03-net/04-email/11-pst.md
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,388 @@
 | 
			
		||||
---
 | 
			
		||||
title: Sheets in PST Mailboxes
 | 
			
		||||
sidebar_label: PST Mailboxes
 | 
			
		||||
pagination_prev: demos/net/server/index
 | 
			
		||||
pagination_next: demos/net/headless
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
import current from '/version.js';
 | 
			
		||||
import Tabs from '@theme/Tabs';
 | 
			
		||||
import TabItem from '@theme/TabItem';
 | 
			
		||||
import CodeBlock from '@theme/CodeBlock';
 | 
			
		||||
 | 
			
		||||
<head>
 | 
			
		||||
  <script src="/pst/pstextractor.js"></script>
 | 
			
		||||
</head>
 | 
			
		||||
 | 
			
		||||
PST (Personal Storage Table) is a common file format for storing messages.
 | 
			
		||||
Electronic discovery commonly involves extracting data from attached
 | 
			
		||||
spreadsheets in e-mail messages stored in PST archives.
 | 
			
		||||
 | 
			
		||||
`pst-extractor`[^1] is a NodeJS module designed for extracting objects from PST
 | 
			
		||||
files. It has been used to extract spreadsheets from the Enron Corpus[^2] and
 | 
			
		||||
other large mailboxes.
 | 
			
		||||
 | 
			
		||||
[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing
 | 
			
		||||
data from spreadsheets.
 | 
			
		||||
 | 
			
		||||
This demo uses `pst-extractor` and SheetJS to read spreadsheets. We'll explore
 | 
			
		||||
how to load SheetJS in a NodeJS script or website, extract spreadsheets files,
 | 
			
		||||
and generate HTML and CSV views of the underlying data.
 | 
			
		||||
 | 
			
		||||
The ["Live Demo"](#live-demo) reads PST files. Individual spreadsheets within
 | 
			
		||||
the file can be downloaded or previewed in the browser.
 | 
			
		||||
 | 
			
		||||
:::note
 | 
			
		||||
 | 
			
		||||
This demo was last tested on 2023 October 22 against `pst-extractor` 1.9.0
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
## Overview
 | 
			
		||||
 | 
			
		||||
The [SheetJS NodeJS module](/docs/getting-started/installation/nodejs) can be
 | 
			
		||||
imported from scripts that use `pst-extractor`.
 | 
			
		||||
 | 
			
		||||
### Parsing PST Files
 | 
			
		||||
 | 
			
		||||
The `pst-extractor` module exposes a `PSTFile` class. The constructor requires a
 | 
			
		||||
proper NodeJS buffer.
 | 
			
		||||
 | 
			
		||||
The following snippet reads and parses `enron.pst` from the local filesystem.
 | 
			
		||||
`fs.readFileSync`[^3] accepts a filename and returns a Buffer:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
const fs = require("fs"), PSTExtractor = require("pst-extractor");
 | 
			
		||||
const file = fs.readFileSync("enron.pst");
 | 
			
		||||
const pst = new (PSTExtractor.PSTFile)(file);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Walking the Tree
 | 
			
		||||
 | 
			
		||||
`pst-extractor` presents a tree-like structure to inspect the contents of the
 | 
			
		||||
PST file. It is recommended to use recursive functions to walk the tree.
 | 
			
		||||
 | 
			
		||||
The following tree walker will collect all XLSX and XLS attachments:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
/* walk the PST file and add all attachments to the specified array */
 | 
			
		||||
function walk(f,arr) {
 | 
			
		||||
  if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr);
 | 
			
		||||
  if(f.contentCount <= 0) return;
 | 
			
		||||
  for(let e = f.getNextChild(); e != null; e = f.getNextChild()) {
 | 
			
		||||
    for(let i = 0; i < e.numberOfAttachments; ++i) {
 | 
			
		||||
      var a = e.getAttachment(i);
 | 
			
		||||
      /* XLS spreadsheet test by filename */
 | 
			
		||||
      if(/.xls[xmb]?$/.test(a.filename)) arr.push(a);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* generate a list of attachments */
 | 
			
		||||
const files = [];
 | 
			
		||||
walk(pst.getRootFolder(), files);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Generating Buffers
 | 
			
		||||
 | 
			
		||||
The `PSTAttachment` class holds attachment metadata. To avoid loading everything
 | 
			
		||||
in memory, the raw data is exposed as a custom stream object. Since the SheetJS
 | 
			
		||||
`read` function requires data in a `Buffer` or `Uint8Array`, a helper function
 | 
			
		||||
is used to collect the data:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
/* collect data from the attachment into a "Buffer" */
 | 
			
		||||
function collect(file) {
 | 
			
		||||
  const strm = file.fileInputStream;
 | 
			
		||||
  const data = Buffer.alloc(strm._length.low);
 | 
			
		||||
  strm.readCompletely(data);
 | 
			
		||||
  return data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* collect data from the first attachment */
 | 
			
		||||
const buf0 = collect(files[0]);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Processing Attachments
 | 
			
		||||
 | 
			
		||||
Given a NodeJS Buffer, the SheetJS `read` method[^4] parses the data and returns
 | 
			
		||||
a workbook object[^5]. Individual worksheets can be extracted from the workbook
 | 
			
		||||
and converted to CSV[^6] or HTML[^7].
 | 
			
		||||
 | 
			
		||||
The following example prints the contents of each worksheet in CSV form:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
const XLSX = require("xlsx");
 | 
			
		||||
 | 
			
		||||
/* parse workbook and print CSV contents of each sheet */
 | 
			
		||||
const wb = XLSX.read(buf0);
 | 
			
		||||
wb.SheetNames.forEach(n => {
 | 
			
		||||
  const ws = wb.Sheets[n];
 | 
			
		||||
  const csv = XLSX.utils.sheet_to_csv(ws);
 | 
			
		||||
  console.log(`#### ${file.filename} ! ${n}`);
 | 
			
		||||
  console.log(csv);
 | 
			
		||||
});
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Browser Caveats
 | 
			
		||||
 | 
			
		||||
The [SheetJS Standalone scripts](/docs/getting-started/installation/standalone)
 | 
			
		||||
can be loaded through a `SCRIPT` tag.
 | 
			
		||||
 | 
			
		||||
This demo uses [a special `pst-extractor` build](#browser-build) for the web.
 | 
			
		||||
 | 
			
		||||
Compared to the NodeJS build, browser scripts require special Buffer wrappers.
 | 
			
		||||
For example, the following function will fail since the library does not support
 | 
			
		||||
`ArrayBuffer` objects:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
async function error_fetch_and_parse_pst(url) {
 | 
			
		||||
  const ab = await (await fetch(url)).arrayBuffer();
 | 
			
		||||
  // this will throw an error
 | 
			
		||||
  return new (PSTExtractor.PSTFile)(ab);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The browser build exposes the `Buffer` object in the `PSTExtractor` global:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
async function correct_fetch_and_parse_pst(url) {
 | 
			
		||||
  const ab = await (await fetch(url)).arrayBuffer();
 | 
			
		||||
// highlight-next-line
 | 
			
		||||
  const buf = new PSTExtractor.Buffer(ab);
 | 
			
		||||
  return new (PSTExtractor.PSTFile)(buf);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Browser Build
 | 
			
		||||
 | 
			
		||||
The `pst-extractor` library is designed for NodeJS. Parts of the library expect
 | 
			
		||||
a NodeJS `Buffer`, which does not exist in the browser. A fake `Buffer` can be
 | 
			
		||||
added and exposed in a script.
 | 
			
		||||
 | 
			
		||||
[`pstextractor.js`](pathname:///pst/pstextractor.js) is loaded in the demo page.
 | 
			
		||||
 | 
			
		||||
<details><summary><b>Build instructions</b> (click to show)</summary>
 | 
			
		||||
 | 
			
		||||
1) Initialize a new NodeJS project and install the dependency:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
mkdir pstextract
 | 
			
		||||
cd pstextract
 | 
			
		||||
npm init -y
 | 
			
		||||
npm i --save pst-extractor@1.9.0
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
2) Save the following to `shim.js`:
 | 
			
		||||
 | 
			
		||||
```js title="shim.js"
 | 
			
		||||
const PSTExtractor = require("pst-extractor");
 | 
			
		||||
module.exports = PSTExtractor;
 | 
			
		||||
module.exports.Buffer = Buffer;
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
3) Build the script:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
npx browserify@17.0.0 -s PSTExtractor -o pstextractor.js shim.js
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
## Demos
 | 
			
		||||
 | 
			
		||||
### NodeJS
 | 
			
		||||
 | 
			
		||||
This demo will fetch a [test PST](pathnamme:///pst/enron.pst) and extract all
 | 
			
		||||
embedded spreadsheets. The script can be adapted to read local PST files or pull
 | 
			
		||||
PST files from a different URL.
 | 
			
		||||
 | 
			
		||||
0) Initialize a new project:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
mkdir sheetjs-pst
 | 
			
		||||
cd sheetjs-pst
 | 
			
		||||
npm init -y
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
2) Install the SheetJS NodeJS module and `pst-extractor`:
 | 
			
		||||
 | 
			
		||||
<Tabs groupId="pm">
 | 
			
		||||
  <TabItem value="npm" label="npm">
 | 
			
		||||
<CodeBlock language="bash">{`\
 | 
			
		||||
npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz pst-extractor`}
 | 
			
		||||
</CodeBlock>
 | 
			
		||||
  </TabItem>
 | 
			
		||||
  <TabItem value="pnpm" label="pnpm">
 | 
			
		||||
<CodeBlock language="bash">{`\
 | 
			
		||||
pnpm install https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz pst-extractor`}
 | 
			
		||||
</CodeBlock>
 | 
			
		||||
  </TabItem>
 | 
			
		||||
  <TabItem value="yarn" label="Yarn" default>
 | 
			
		||||
<CodeBlock language="bash">{`\
 | 
			
		||||
yarn add https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz pst-extractor`}
 | 
			
		||||
</CodeBlock>
 | 
			
		||||
  </TabItem>
 | 
			
		||||
</Tabs>
 | 
			
		||||
 | 
			
		||||
2) Download [`SheetJSPST.js`](pathname:///pst/SheetJSPST.js) into project folder:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
curl -LO https://docs.sheetjs.com/pst/SheetJSPST.js
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
3) Run the script:
 | 
			
		||||
 | 
			
		||||
```js
 | 
			
		||||
node SheetJSPST.js
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The process will fetch [the test PST](pathnamme:///pst/enron.pst) and extract
 | 
			
		||||
the embedded spreadsheets. The terminal will display info on the exported files.
 | 
			
		||||
 | 
			
		||||
:::note pass
 | 
			
		||||
 | 
			
		||||
Lines starting with `saving file` show how attachments correspond to files. The
 | 
			
		||||
following line states that the first attachment (index `0`) was originally named
 | 
			
		||||
`RedRockA.xls` and was saved to `file0.xls` on the file system:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
saving file 0 |RedRockA.xls| to file0.xls
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Lines starting with `####` show the attachment file name and the worksheet name.
 | 
			
		||||
The following line explains that there is a worksheet named `"Oct 26, 2001"` in
 | 
			
		||||
the file `RedRockA.xls`:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
#### RedRockA.xls ! Oct 26, 2001
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Every other line is a CSV row from the named worksheet. For example, the first
 | 
			
		||||
four lines of worksheet `"Oct 26, 2001"` in `RedRockA.xls` are shown below:
 | 
			
		||||
 | 
			
		||||
```text
 | 
			
		||||
#### RedRockA.xls ! Oct 26, 2001
 | 
			
		||||
// highlight-start
 | 
			
		||||
RED ROCK EXPANSION PROJECT,,,,,,,,,,,,,,,,,,
 | 
			
		||||
,,,,,,,,,,,,,,,,,,
 | 
			
		||||
,,,, , , ,,,,,,,,,,,,
 | 
			
		||||
SHIPPER,CONTRACT #,Term,MMBtu/d,RECEIPT POINT,DELIVERY POINT,MMBtu/d,,,,,,,,,,,,
 | 
			
		||||
// highlight-end
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
### Live Demo
 | 
			
		||||
 | 
			
		||||
This demo reads PST mailboxes. Due to browser limitations, PST files larger than
 | 
			
		||||
100 MB may crash the browser.
 | 
			
		||||
 | 
			
		||||
After parsing the PST file, the "Attachments" table will list attached XLSX and
 | 
			
		||||
XLS spreadsheets in the file. The "preview" link will display a HTML table with
 | 
			
		||||
the data in the spreadsheet. The "download" link will download the attachment.
 | 
			
		||||
 | 
			
		||||
The [test file](pathname:///pst/enron.pst) was based on the EDRM clean extract
 | 
			
		||||
from the "Enron Corpus" and includes a few XLS attachments.
 | 
			
		||||
 | 
			
		||||
:::caution pass
 | 
			
		||||
 | 
			
		||||
If the live demo shows a message
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
Please reload the page
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
please refresh the page.  This is a known bug in the documentation generator.
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
```jsx live
 | 
			
		||||
function SheetJSPreviewPSTSheets() {
 | 
			
		||||
  const [ files, setFiles ] = React.useState([]);
 | 
			
		||||
  const [ __html, setHTML ] = React.useState("");
 | 
			
		||||
 | 
			
		||||
  /* recursively walk PST and collect attachments */
 | 
			
		||||
  const walk = (f,arr) => {
 | 
			
		||||
    if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr);
 | 
			
		||||
    if(f.contentCount <= 0) return;
 | 
			
		||||
    for(let e = f.getNextChild(); e != null; e = f.getNextChild()) {
 | 
			
		||||
      for(let i = 0; i < e.numberOfAttachments; ++i) {
 | 
			
		||||
        var a = e.getAttachment(i);
 | 
			
		||||
        /* XLS spreadsheet test by filename */
 | 
			
		||||
        if(/.xls[xmb]?$/.test(a.filename)) arr.push(a);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /* collect data from the attachment into a "Buffer" */
 | 
			
		||||
  const collect = (j) => {
 | 
			
		||||
    const strm = files[j].fileInputStream;
 | 
			
		||||
    const data = new PSTExtractor.Buffer(strm._length.low);
 | 
			
		||||
    strm.readCompletely(data);
 | 
			
		||||
    return data;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /* view selected attachment */
 | 
			
		||||
  const view = (j) => {
 | 
			
		||||
    const data = collect(j);
 | 
			
		||||
 | 
			
		||||
    /* parse */
 | 
			
		||||
    const wb = XLSX.read(data);
 | 
			
		||||
 | 
			
		||||
    /* convert first sheet to HTML */
 | 
			
		||||
    const ws = wb.Sheets[wb.SheetNames[0]];
 | 
			
		||||
    setHTML(XLSX.utils.sheet_to_html(ws));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /* process array buffer */
 | 
			
		||||
  const process_ab = (ab) => {
 | 
			
		||||
    const pst = new (PSTExtractor.PSTFile)(new PSTExtractor.Buffer(ab));
 | 
			
		||||
    const data = [];
 | 
			
		||||
    walk(pst.getRootFolder(), data);
 | 
			
		||||
    setFiles(data);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  /* on click, fetch and process file */
 | 
			
		||||
  const doit = async() => {
 | 
			
		||||
    const ab = await (await fetch("/pst/enron.pst")).arrayBuffer();
 | 
			
		||||
    process_ab(ab);
 | 
			
		||||
  };
 | 
			
		||||
  const chg = async(e) => process_ab(await e.target.files[0].arrayBuffer());
 | 
			
		||||
 | 
			
		||||
  /* download selected attachment */
 | 
			
		||||
  const dl = (j) => {
 | 
			
		||||
    const a = document.createElement("a");
 | 
			
		||||
    a.download = files[j].filename;
 | 
			
		||||
    a.href = URL.createObjectURL(new Blob([collect(j)]));
 | 
			
		||||
    document.body.appendChild(a);
 | 
			
		||||
    a.click();
 | 
			
		||||
    document.body.removeChild(a);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if(typeof PSTExtractor == "undefined") return <b>Please reload the page</b>;
 | 
			
		||||
  return ( <>
 | 
			
		||||
    <p>Use the file input to select a file, or click "Use a Sample PST"</p>
 | 
			
		||||
    <input type="file" accept=".pst" onChange={chg}/>
 | 
			
		||||
    <button onClick={doit}>Use a Sample PST!</button><br/><br/>
 | 
			
		||||
    <table><thead><th colspan="3">Attachments</th></thead>
 | 
			
		||||
      <tbody>{files.map((f,j) => (
 | 
			
		||||
        <tr key={j}><th>{f.filename}</th>
 | 
			
		||||
          <td><a onClick={()=>view(j)}>(preview)</a></td>
 | 
			
		||||
          <td><a onClick={()=>dl(j)}>(download)</a></td>
 | 
			
		||||
        </tr>
 | 
			
		||||
      ))}</tbody>
 | 
			
		||||
    </table>
 | 
			
		||||
    <b>Preview of first worksheet</b><br/>
 | 
			
		||||
    <div dangerouslySetInnerHTML={{__html}}></div>
 | 
			
		||||
  </> );
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
[^1]: The project has no official website. The official [repository](https://github.com/epfromer/pst-extractor) is hosted on GitHub.
 | 
			
		||||
[^2]: Extracted spreadsheets are [available on GitHub](https://github.com/SheetJS/enron_xls)
 | 
			
		||||
[^3]: See [`fs.readFileSync`](https://nodejs.org/api/fs.html#fsreadfilesyncpath-options) in the NodeJS documentation
 | 
			
		||||
[^4]: See [`read` in "Reading Files"](/docs/api/parse-options)
 | 
			
		||||
[^5]: See ["Workbook Object"](/docs/csf/book)
 | 
			
		||||
[^6]: See [`sheet_to_csv` in "CSV and Text"](/docs/api/utilities/csv#delimiter-separated-output)
 | 
			
		||||
[^7]: See [`sheet_to_html` in "Utilities"](/docs/api/utilities/html#html-table-output)
 | 
			
		||||
							
								
								
									
										4
									
								
								docz/docs/03-demos/03-net/04-email/_category_.json
									
									
									
									
									
										Normal file
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										4
									
								
								docz/docs/03-demos/03-net/04-email/_category_.json
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,4 @@
 | 
			
		||||
{
 | 
			
		||||
  "label": "Electronic Mail",
 | 
			
		||||
  "position": 4
 | 
			
		||||
}
 | 
			
		||||
@ -1,16 +1,12 @@
 | 
			
		||||
---
 | 
			
		||||
title: Electronic Mail
 | 
			
		||||
pagination_prev: demos/net/server/index
 | 
			
		||||
pagination_next: demos/net/headless
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
import current from '/version.js';
 | 
			
		||||
import CodeBlock from '@theme/CodeBlock';
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
<head>
 | 
			
		||||
  <script src="/pst/pstextractor.js"></script>
 | 
			
		||||
</head>
 | 
			
		||||
 | 
			
		||||
Electronic mail ("email" or "e-mail") is an essential part of modern business
 | 
			
		||||
workflows. Spreadsheets are commonly passed around and processed.
 | 
			
		||||
 | 
			
		||||
@ -367,98 +363,4 @@ proprietary mail and email account file formats.
 | 
			
		||||
 | 
			
		||||
### PST
 | 
			
		||||
 | 
			
		||||
`PST` is a common file format. The `pst-extractor` library is designed for
 | 
			
		||||
extracting messages and attachments from `PST` files in NodeJS and the browser.
 | 
			
		||||
 | 
			
		||||
This demo uses [a special build](pathname:///pst/pstextractor.js) for the web.
 | 
			
		||||
 | 
			
		||||
<details><summary><b>Build details</b> (click to show)</summary>
 | 
			
		||||
 | 
			
		||||
1) Initialize a new NodeJS project and install the dependency:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
mkdir pstextract
 | 
			
		||||
cd pstextract
 | 
			
		||||
npm init -y
 | 
			
		||||
npm i --save pst-extractor@1.9.0
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
2) Save the following to `shim.js`:
 | 
			
		||||
 | 
			
		||||
```js title="shim.js"
 | 
			
		||||
const PSTExtractor = require("pst-extractor");
 | 
			
		||||
module.exports = PSTExtractor;
 | 
			
		||||
module.exports.Buffer = Buffer;
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
3) Build the script:
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
npx browserify@17.0.0 -s PSTExtractor -o pstextractor.js shim.js
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
</details>
 | 
			
		||||
 | 
			
		||||
The [test file](pathname:///pst/enron.pst) was based on the EDRM clean extract
 | 
			
		||||
from the "Enron Corpus" and includes a few XLS attachments.
 | 
			
		||||
 | 
			
		||||
```jsx live
 | 
			
		||||
function SheetJSPreviewPSTSheets() {
 | 
			
		||||
  const [ files, setFiles ] = React.useState([]);
 | 
			
		||||
  const [ __html, setHTML ] = React.useState("");
 | 
			
		||||
 | 
			
		||||
  /* recursively walk PST and collect attachments */
 | 
			
		||||
  const walk = (f,arr) => {
 | 
			
		||||
    if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr);
 | 
			
		||||
    if(f.contentCount > 0) for(let e = f.getNextChild(); e != null; e = f.getNextChild()) {
 | 
			
		||||
      for(var i = 0; i < e.numberOfAttachments; ++i) {
 | 
			
		||||
        var a = e.getAttachment(i);
 | 
			
		||||
        /* XLS spreadsheet test by filename */
 | 
			
		||||
        if(a.filename.endsWith(".xls")) arr.push(a);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /* view selected attachment */
 | 
			
		||||
  const view = (j) => {
 | 
			
		||||
    /* collect data into a "Buffer" */
 | 
			
		||||
    const strm = files[j].fileInputStream;
 | 
			
		||||
    const data = new PSTExtractor.Buffer(strm._length.low);
 | 
			
		||||
    strm.readCompletely(data);
 | 
			
		||||
 | 
			
		||||
    /* parse */
 | 
			
		||||
    const wb = XLSX.read(data);
 | 
			
		||||
 | 
			
		||||
    /* convert first sheet to HTML */
 | 
			
		||||
    const ws = wb.Sheets[wb.SheetNames[0]];
 | 
			
		||||
    setHTML(XLSX.utils.sheet_to_html(ws));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /* process array buffer */
 | 
			
		||||
  const process_ab = (ab) => {
 | 
			
		||||
    const pst = new (PSTExtractor.PSTFile)(new PSTExtractor.Buffer(ab));
 | 
			
		||||
    const data = [];
 | 
			
		||||
    walk(pst.getRootFolder(), data);
 | 
			
		||||
    setFiles(data);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  /* on click, fetch and process file */
 | 
			
		||||
  const doit = async() => {
 | 
			
		||||
    const ab = await (await fetch("/pst/enron.pst")).arrayBuffer();
 | 
			
		||||
    process_ab(ab);
 | 
			
		||||
  };
 | 
			
		||||
  const chg = async(e) => process_ab(await e.target.files[0].arrayBuffer());
 | 
			
		||||
 | 
			
		||||
  return ( <>
 | 
			
		||||
    <p>Use the file input to select a file, or click "Use a Sample PST"</p>
 | 
			
		||||
    <button onClick={doit}>Use a Sample PST!</button><br/><br/>
 | 
			
		||||
    <input type="file" accept=".pst" onChange={chg}/><br/>
 | 
			
		||||
    <b>Attachments</b>
 | 
			
		||||
    <ul>{files.map((f,j) => (
 | 
			
		||||
      <li key={j}><a onClick={()=>view(j)}>{f.filename} (click to view)</a></li>
 | 
			
		||||
    ))}</ul>
 | 
			
		||||
    <b>Table View</b><br/>
 | 
			
		||||
    <div dangerouslySetInnerHTML={{__html}}></div>
 | 
			
		||||
  </> );
 | 
			
		||||
}
 | 
			
		||||
**[The exposition has been moved to a separate page.](/docs/demos/net/email/pst)**
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
---
 | 
			
		||||
title: Browser Automation
 | 
			
		||||
pagination_prev: demos/net/email/index
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
import current from '/version.js';
 | 
			
		||||
 | 
			
		||||
@ -1,9 +1,9 @@
 | 
			
		||||
---
 | 
			
		||||
title: Hyperlinks and Tooltips
 | 
			
		||||
sidebar_label: Hyperlinks
 | 
			
		||||
sidebar_position: 3
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
# Hyperlinks
 | 
			
		||||
 | 
			
		||||
<details>
 | 
			
		||||
  <summary><b>File Format Support</b> (click to show)</summary>
 | 
			
		||||
 | 
			
		||||
@ -254,6 +254,20 @@ XLSX documents.  A workaround was added in library version 0.18.12.
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
## Tooltips
 | 
			
		||||
 | 
			
		||||
Tooltips are attached to hyperlink information. There is no way to specify a
 | 
			
		||||
tooltip without assigning a cell link.
 | 
			
		||||
 | 
			
		||||
:::warning pass
 | 
			
		||||
 | 
			
		||||
**Excel has an undocumented tooltip length limit of 255 characters.**
 | 
			
		||||
 | 
			
		||||
Writing longer tooltips is currently permitted by the library but the generated
 | 
			
		||||
files will not open in Excel.
 | 
			
		||||
 | 
			
		||||
:::
 | 
			
		||||
 | 
			
		||||
## HTML
 | 
			
		||||
 | 
			
		||||
The HTML DOM parser[^1] will process `<a>` links in the table.
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										51
									
								
								docz/static/pst/SheetJSPST.js
									
									
									
									
									
										Normal file
									
								
							
							
								
								
								
								
								
									
									
								
							
						
						
									
										51
									
								
								docz/static/pst/SheetJSPST.js
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,51 @@
 | 
			
		||||
const fs = require("fs");
 | 
			
		||||
const PSTExtractor = require("pst-extractor");
 | 
			
		||||
const XLSX = require("xlsx");
 | 
			
		||||
 | 
			
		||||
/* walk the PST file and add all attachments to the specified array */
 | 
			
		||||
function walk(f,arr) {
 | 
			
		||||
  if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr);
 | 
			
		||||
  if(f.contentCount <= 0) return;
 | 
			
		||||
  for(let e = f.getNextChild(); e != null; e = f.getNextChild()) {
 | 
			
		||||
    for(let i = 0; i < e.numberOfAttachments; ++i) {
 | 
			
		||||
      var a = e.getAttachment(i);
 | 
			
		||||
      /* XLS spreadsheet test by filename */
 | 
			
		||||
      if(/.xls[xmb]?$/.test(a.filename)) arr.push(a);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* collect data from the attachment into a "Buffer" */
 | 
			
		||||
function collect(file) {
 | 
			
		||||
  const strm = file.fileInputStream;
 | 
			
		||||
  const data = Buffer.alloc(strm._length.low);
 | 
			
		||||
  strm.readCompletely(data);
 | 
			
		||||
  return data;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
(async() => {
 | 
			
		||||
  /* fetch https://docs.sheetjs.com/pst/enron.pst */
 | 
			
		||||
  const ab = await (await fetch("https://docs.sheetjs.com/pst/enron.pst")).arrayBuffer();
 | 
			
		||||
  const pst = new (PSTExtractor.PSTFile)(Buffer.from(ab));
 | 
			
		||||
 | 
			
		||||
  /* generate a list of attachments */
 | 
			
		||||
  const files = [];
 | 
			
		||||
  walk(pst.getRootFolder(), files);
 | 
			
		||||
 | 
			
		||||
  files.forEach((file, idx) => {
 | 
			
		||||
    /* extract and save workbook to file */
 | 
			
		||||
    const ext = file.filename.slice(file.filename.lastIndexOf(".") + 1);
 | 
			
		||||
    console.log(`saving file ${idx} |${file.filename}| to file${idx}.${ext}`);
 | 
			
		||||
    const buf = collect(file);
 | 
			
		||||
    fs.writeFileSync(`file${idx}.${ext}`, buf);
 | 
			
		||||
 | 
			
		||||
    /* parse workbook and print CSV contents of each sheet */
 | 
			
		||||
    const wb = XLSX.read(buf);
 | 
			
		||||
    wb.SheetNames.forEach(n => {
 | 
			
		||||
      const ws = wb.Sheets[n];
 | 
			
		||||
      const csv = XLSX.utils.sheet_to_csv(ws);
 | 
			
		||||
      console.log(`#### ${file.filename} ! ${n}`);
 | 
			
		||||
      console.log(csv);
 | 
			
		||||
    });
 | 
			
		||||
  });
 | 
			
		||||
})();
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user