2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								---
							 | 
						
					
						
							
								
									
										
										
										
											2023-01-22 04:23:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								title: Browser Automation
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								---
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-27 09:12:19 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import current from '/version.js';
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import Tabs from '@theme/Tabs';
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import TabItem from '@theme/TabItem';
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import CodeBlock from '@theme/CodeBlock';
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Headless automation involves controlling "headless browsers" to access websites
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								and submit or download data.  It is also possible to automate browsers using
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								custom browser extensions.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-10-30 05:45:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								The [SheetJS standalone script](/docs/getting-started/installation/standalone) can be added to
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								any website by inserting a `SCRIPT` tag.  Headless browsers usually provide
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								utility functions for running custom snippets in the browser and passing data
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								back to the automation script.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								## Use Case
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This demo focuses on exporting table data to a workbook.  Headless browsers do
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								not generally support passing objects between the browser context and the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								automation script, so the file data must be generated in the browser context
							 | 
						
					
						
							
								
									
										
										
										
											2023-01-22 04:23:58 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								and sent back to the automation script for saving in the file system.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								```mermaid
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								sequenceDiagram
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  autonumber off
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  actor U as User
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  participant C as Controller
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  participant B as Browser
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  U->>C: run script
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  rect rgba(255, 0, 0, 0.25)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    C->>B: launch browser
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    C->>B: load URL
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  rect rgba(0, 127, 0, 0.25)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    C->>B: add SheetJS script
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  rect rgba(255, 0, 0, 0.25)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    C->>B: ask for file
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    Note over B: scrape tables
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    Note over B: generate workbook
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    B->>C: file bytes
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  rect rgba(0, 127, 0, 0.25)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    C->>U: save file
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  end
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								```
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								<details open><summary><b>Key Steps</b> (click to hide)</summary>
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								1) Launch the headless browser and load the target site.
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								2) Add the standalone SheetJS build to the page in a `SCRIPT` tag.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								3) Add a script to the page (in the browser context) that will:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								- Make a workbook object from the first table using `XLSX.utils.table_to_book`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								- Generate the bytes for an XLSB file using `XLSX.write`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								- Send the bytes back to the automation script
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								4) When the automation context receives data, save to a file
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								</details>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This demo exports data from <https://sheetjs.com/demos/table>.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-16 18:54:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:::note pass
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								It is also possible to parse files from the browser context, but parsing from
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								the automation context is more efficient and strongly recommended.
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:::
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								## Puppeteer
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Puppeteer enables headless Chromium automation for NodeJS.  Releases ship with
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								an installer script that prepares a compatible browser version.
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								<Tabs>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  <TabItem value="nodejs" label="NodeJS">
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Binary strings are the favored data type.  They can be safely passed from the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								browser context to the automation script.  NodeJS provides an API to write
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								binary strings to file (`fs.writeFileSync` using encoding `binary`).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								The key steps are commented below:
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								<CodeBlock language="js" title="SheetJSPuppeteer.js">{`\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								const fs = require("fs");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								const puppeteer = require('puppeteer');
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								(async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  /* (1) Load the target page */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const browser = await puppeteer.launch();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const page = await browser.newPage();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  page.on("console", msg => console.log("PAGE LOG:", msg.text()));
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  await page.setViewport({width: 1920, height: 1080});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  await page.goto('https://sheetjs.com/demos/table');
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  /* (2) Load the standalone SheetJS build from the CDN */
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  await page.addScriptTag({ url: 'https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js' });
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  /* (3) Run the snippet in browser and return data */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const bin = await page.evaluate(() => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* NOTE: this function will be evaluated in the browser context.
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								       \`page\`, \`fs\` and \`puppeteer\` are not available.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								       \`XLSX\` will be available thanks to step 2 */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* find first table */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    var table = document.body.getElementsByTagName('table')[0];
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* call table_to_book on first table */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    var wb = XLSX.utils.table_to_book(table);
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* generate XLSB and return binary string */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return XLSX.write(wb, {type: "binary", bookType: "xlsb"});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  });
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  /* (4) write data to file */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  fs.writeFileSync("SheetJSPuppeteer.xlsb", bin, { encoding: "binary" });
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  await browser.close();
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								})();`}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								</CodeBlock>
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								**Demo**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:::note
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This demo was last tested on 2023 April 29 against Puppeteer 19.11.1.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:::
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								1) Install SheetJS and Puppeteer:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								<CodeBlock language="bash">{`\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz puppeteer@19.11.1`}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								</CodeBlock>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								2) Save the `SheetJSPuppeteer.js` code snippet to `SheetJSPuppeteer.js`.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								3) Run `node SheetJSPuppeteer.js`.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								When the script finishes, the file `SheetJSPuppeteer.xlsb` will be created.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This file can be opened with Excel.
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  </TabItem>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  <TabItem value="deno" label="Deno">
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-16 18:54:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:::caution pass
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Deno Puppeteer is a fork. It is not officially supported by the Puppeteer team.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:::
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Base64 strings are the favored data type.  They can be safely passed from the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								browser context to the automation script.  Deno can decode the Base64 strings
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								and write the decoded `Uint8Array` data to file with `Deno.writeFileSync`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								The key steps are commented below:
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								<CodeBlock language="ts" title="SheetJSPuppeteer.ts">{`\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import puppeteer from "https://deno.land/x/puppeteer@16.2.0/mod.ts";
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import { decode } from "https://deno.land/std/encoding/base64.ts"
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								/* (1) Load the target page */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								const browser = await puppeteer.launch();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								const page = await browser.newPage();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								page.on("console", msg => console.log("PAGE LOG:", msg.text()));
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								await page.setViewport({width: 1920, height: 1080});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								await page.goto('https://sheetjs.com/demos/table');
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								/* (2) Load the standalone SheetJS build from the CDN */
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								await page.addScriptTag({ url: 'https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js' });
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								/* (3) Run the snippet in browser and return data */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								const b64 = await page.evaluate(() => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  /* NOTE: this function will be evaluated in the browser context.
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								     \`page\`, \`fs\` and \`puppeteer\` are not available.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     \`XLSX\` will be available thanks to step 2 */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  /* find first table */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  var table = document.body.getElementsByTagName('table')[0];
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  /* call table_to_book on first table */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  var wb = XLSX.utils.table_to_book(table);
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  /* generate XLSB and return binary string */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  return XLSX.write(wb, {type: "base64", bookType: "xlsb"});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								/* (4) write data to file */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Deno.writeFileSync("SheetJSPuppeteer.xlsb", decode(b64));
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								await browser.close();`}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								</CodeBlock>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								**Demo**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:::note
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								This demo was last tested on 2023 April 29 against deno-puppeteer 16.2.0.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:::
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								1) Install deno-puppeteer:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								```bash
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								env PUPPETEER_PRODUCT=chrome deno run -A --unstable https://deno.land/x/puppeteer@16.2.0/install.ts
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								```
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								2) Save the `SheetJSPuppeteer.ts` code snippet to `SheetJSPuppeteer.ts`.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								3) Run `deno run -A --unstable SheetJSPuppeteer.ts`.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								When the script finishes, the file `SheetJSPuppeteer.xlsb` will be created.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This file can be opened with Excel.
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-25 08:22:28 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  </TabItem>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								</Tabs>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								## Playwright
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Playwright presents a unified scripting framework for Chromium, WebKit, and
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								other browsers.  It draws inspiration from Puppeteer.  In fact, the example
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								code is almost identical!
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								Differences from the Puppeteer example are highlighted below:
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								<CodeBlock language="js" title="SheetJSPlaywright.js">{`\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								const fs = require("fs");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								// highlight-next-line
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								const { webkit } = require('playwright'); // import desired browser
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								(async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  /* (1) Load the target page */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  // highlight-next-line
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const browser = await webkit.launch(); // launch desired browser
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const page = await browser.newPage();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  page.on("console", msg => console.log("PAGE LOG:", msg.text()));
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  // highlight-next-line
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  await page.setViewportSize({width: 1920, height: 1080}); // different name :(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  await page.goto('https://sheetjs.com/demos/table');
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  /* (2) Load the standalone SheetJS build from the CDN */
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  await page.addScriptTag({ url: 'https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js' });
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  /* (3) Run the snippet in browser and return data */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const bin = await page.evaluate(() => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* NOTE: this function will be evaluated in the browser context.
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								       \`page\`, \`fs\` and the browser engine are not available.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								       \`XLSX\` will be available thanks to step 2 */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* find first table */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    var table = document.body.getElementsByTagName('table')[0];
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* call table_to_book on first table */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    var wb = XLSX.utils.table_to_book(table);
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* generate XLSB and return binary string */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return XLSX.write(wb, {type: "binary", bookType: "xlsb"});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  });
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  /* (4) write data to file */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  fs.writeFileSync("SheetJSPlaywright.xlsb", bin, { encoding: "binary" });
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  await browser.close();
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								})();`}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								</CodeBlock>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								**Demo**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:::note
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This demo was last tested on 2023 April 29 against Playwright 1.33.0.
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:::
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								1) Install SheetJS and Playwright:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								<CodeBlock language="bash">{`\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz playwright@1.33.0`}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								</CodeBlock>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								2) Save the `SheetJSPlaywright.js` code snippet to `SheetJSPlaywright.js`.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								3) Run `node SheetJSPlaywright.js`.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								When the script finishes, the file `SheetJSPlaywright.xlsb` will be created.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This file can be opened with Excel.
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								## PhantomJS
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-11-07 10:41:00 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								PhantomJS is a headless web browser powered by WebKit.
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-16 18:54:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:::warning pass
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This information is provided for legacy deployments.  PhantomJS development has
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								been suspended and there are known vulnerabilities, so new projects should use
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								alternatives.  For WebKit automation, new projects should use Playwright.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:::
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								Binary strings are the favored data type.  They can be safely passed from the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								browser context to the automation script.  PhantomJS provides an API to write
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								binary strings to file (`fs.write` using mode `wb`).
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								<details><summary><b>Integration Details and Demo</b> (click to show)</summary>
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								The steps are marked in the comments:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								<CodeBlock language="js" title="SheetJSPhantom.js">{`\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								var page = require('webpage').create();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								page.onConsoleMessage = function(msg) { console.log(msg); };
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								/* (1) Load the target page */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								page.open('https://sheetjs.com/demos/table', function() {
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  /* (2) Load the standalone SheetJS build from the CDN */
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  page.includeJs("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js", function() {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* (3) Run the snippet in browser and return data */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    var bin = page.evaluateJavaScript([ "function(){",
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      /* find first table */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      "var table = document.body.getElementsByTagName('table')[0];",
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      /* call table_to_book on first table */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      "var wb = XLSX.utils.table_to_book(table);",
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      /* generate XLSB file and return binary string */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      "return XLSX.write(wb, {type: 'binary', bookType: 'xlsb'});",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "}" ].join(""));
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /* (4) write data to file */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    require("fs").write("SheetJSPhantomJS.xlsb", bin, "wb");
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								\n\
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    phantom.exit();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  });
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								});`}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								</CodeBlock>
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-16 18:54:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								:::caution pass
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-07 04:05:14 +00:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								PhantomJS is very finicky and will hang if there are script errors.  It is
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								strongly recommended to add verbose logging and to lint scripts before use.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:::
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								**Demo**
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:::note
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-16 18:54:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								This demo was last tested on 2023 August 16 against PhantomJS 2.1.1
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								:::
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-16 18:54:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								1) Download and unzip the PhantomJS release from the official website[^1].
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-29 11:21:37 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								2) Save the `SheetJSPhantom.js` code snippet to `SheetJSPhantom.js`.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								3) Run the command.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								In macOS:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								```bash
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								./phantomjs-2.1.1-macosx/bin/phantomjs SheetJSPhantom.js
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								```
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								When the script finishes, the file `SheetJSPhantomJS.xlsb` will be created.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								This file can be opened with Excel.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-16 18:54:32 +00:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								</details>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								[^1]: Downloads available at <https://phantomjs.org/download.html>
							 |