forked from sheetjs/docs.sheetjs.com
		
	
		
			
	
	
		
			266 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
		
		
			
		
	
	
			266 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
|  | --- | ||
|  | sidebar_position: 26 | ||
|  | title: Amazon Web Services | ||
|  | --- | ||
|  | 
 | ||
|  | AWS is a Cloud Services platform which includes traditional virtual machine | ||
|  | support, "Serverless Functions", cloud storage and much more. | ||
|  | 
 | ||
|  | :::caution | ||
|  | 
 | ||
|  | AWS iterates quickly and there is no guarantee that the referenced services | ||
|  | will be available in the future. | ||
|  | 
 | ||
|  | ::: | ||
|  | 
 | ||
|  | This demo focuses on two key offerings: cloud storage ("S3") and the | ||
|  | "Serverless Function" platform ("Lambda"). | ||
|  | 
 | ||
|  | :::note | ||
|  | 
 | ||
|  | This was tested on 2022 August 21. | ||
|  | 
 | ||
|  | ::: | ||
|  | 
 | ||
|  | ## AWS Lambda Functions
 | ||
|  | 
 | ||
|  | In this demo, the "Function URL" (automatic API Gateway management) features | ||
|  | are used.  Older deployments required special "Binary Media Types" to handle | ||
|  | formats like XLSX.  At the time of testing, the configuration was not required. | ||
|  | 
 | ||
|  | ### Reading Data
 | ||
|  | 
 | ||
|  | In the Lambda handler method, the `event.body` attribute is a Base64-encoded | ||
|  | string.  The `busboy` body parser can accept a decoded body. | ||
|  | 
 | ||
|  | <details><summary><b>Code Sample</b> (click to show)</summary> | ||
|  | 
 | ||
|  | ```js | ||
|  | const XLSX = require('xlsx'); | ||
|  | var Busboy = require('busboy'); | ||
|  | 
 | ||
|  | exports.handler = function(event, context, callback) { | ||
|  |   /* set up busboy */ | ||
|  |   var ctype = event.headers['Content-Type']||event.headers['content-type']; | ||
|  |   var bb = Busboy({headers:{'content-type':ctype}}); | ||
|  | 
 | ||
|  |   /* busboy is evented; accumulate the fields and files manually */ | ||
|  |   var fields = {}, files = {}; | ||
|  |   bb.on('error', function(err) { callback(null, { body: err.message }); }); | ||
|  |   bb.on('field', function(fieldname, val) {fields[fieldname] = val }); | ||
|  |   // highlight-start | ||
|  |   bb.on('file', function(fieldname, file, filename) { | ||
|  |     /* concatenate the individual data buffers */ | ||
|  |     var buffers = []; | ||
|  |     file.on('data', function(data) { buffers.push(data); }); | ||
|  |     file.on('end', function() { files[fieldname] = [Buffer.concat(buffers), filename]; }); | ||
|  |   }); | ||
|  |   // highlight-end | ||
|  | 
 | ||
|  |   /* on the finish event, all of the fields and files are ready */ | ||
|  |   bb.on('finish', function() { | ||
|  |     /* grab the first file */ | ||
|  |     var f = files["upload"]; | ||
|  |     if(!f) callback(new Error("Must submit a file for processing!")); | ||
|  | 
 | ||
|  |     /* f[0] is a buffer */ | ||
|  |     // highlight-next-line | ||
|  |     var wb = XLSX.read(f[0]); | ||
|  | 
 | ||
|  |     /* grab first worksheet and convert to CSV */ | ||
|  |     var ws = wb.Sheets[wb.SheetNames[0]]; | ||
|  |     callback(null, { statusCode: 200, body: XLSX.utils.sheet_to_csv(ws) }); | ||
|  |   }); | ||
|  | 
 | ||
|  |   /* start the processing */ | ||
|  |   // highlight-next-line | ||
|  |   bb.end(Buffer.from(event.body, "base64")); | ||
|  | }; | ||
|  | ``` | ||
|  | 
 | ||
|  | </details> | ||
|  | 
 | ||
|  | ### Writing Data
 | ||
|  | 
 | ||
|  | For safely transmitting binary data, the `base64` type should be used.  Lambda | ||
|  | callback response `isBase64Encoded` property forces a binary download: | ||
|  | 
 | ||
|  | <details><summary><b>Code Sample</b> (click to show)</summary> | ||
|  | 
 | ||
|  | ```js | ||
|  | var XLSX = require('xlsx'); | ||
|  | 
 | ||
|  | exports.handler = function(event, context, callback) { | ||
|  |   /* make workbook */ | ||
|  |   var wb = XLSX.read("S,h,e,e,t,J,S\n5,4,3,3,7,9,5", {type: "binary"}); | ||
|  |   /* write to XLSX file in base64 encoding */ | ||
|  |   // highlight-next-line | ||
|  |   var body = XLSX.write(wb, {type:"base64", bookType: "xlsx"}); | ||
|  |   /* mark as attached file */ | ||
|  |   var headers = { "Content-Disposition": 'attachment; filename="SheetJSLambda.xlsx"'}; | ||
|  |   /* Send back data */ | ||
|  |   callback(null, { | ||
|  |     statusCode: 200, | ||
|  |     // highlight-next-line | ||
|  |     isBase64Encoded: true, | ||
|  |     body: body, | ||
|  |     headers: headers | ||
|  |   }); | ||
|  | }; | ||
|  | ``` | ||
|  | 
 | ||
|  | </details> | ||
|  | 
 | ||
|  | ### Demo
 | ||
|  | 
 | ||
|  | <details><summary><b>Complete Example</b> (click to show)</summary> | ||
|  | 
 | ||
|  | 0) Review the quick start for JavaScript on AWS | ||
|  | 
 | ||
|  | 1) Create a new folder and download [`index.js`](pathname:///aws/index.js): | ||
|  | 
 | ||
|  | ```bash | ||
|  | mkdir SheetJSLambda | ||
|  | cd SheetJSLambda | ||
|  | curl -LO https://docs.sheetjs.com/aws/index.js | ||
|  | ``` | ||
|  | 
 | ||
|  | 2) Install dependencies to the current directory; | ||
|  | 
 | ||
|  | ```bash | ||
|  | mkdir node_modules | ||
|  | npm install https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz busboy | ||
|  | ``` | ||
|  | 
 | ||
|  | 3) Create a .zip package of the contents of the folder: | ||
|  | 
 | ||
|  | ```bash | ||
|  | yes | zip -c ../SheetJSLambda.zip -r . | ||
|  | ``` | ||
|  | 
 | ||
|  | 4) In the web interface for AWS Lambda, create a new Function with the options: | ||
|  | 
 | ||
|  | - Select "Author from scratch" (default choice when last verified) | ||
|  | - "Function Name": SheetJSLambda | ||
|  | - "Runtime": "Node.js" (select the version in the "Latest supported" block) | ||
|  | - Advanced Settings: | ||
|  |  + check "Enable function URL" | ||
|  |  + Auth type: NONE | ||
|  |  + Check "Configure CORS" | ||
|  | 
 | ||
|  | 5) In the Interface, click "Upload from" and select ".zip file".  Click the | ||
|  | "Upload" button in the modal, select SheetJSLambda.zip, and click "Save". | ||
|  | 
 | ||
|  | At the time of writing, the ZIP is small enough that the Lambda code editor | ||
|  | will load the package. | ||
|  | 
 | ||
|  | 6) Enable external access to the function. | ||
|  | 
 | ||
|  | Under Configuration > Function URL, click "Edit" and ensure that Auth type is | ||
|  | set to NONE.  If it is not, select NONE and hit Save. | ||
|  | 
 | ||
|  | Under Configuration > Permissions, scroll down to "Resource-based policy". | ||
|  | If no policy statements are defined, select "Add Permission" with the options: | ||
|  | 
 | ||
|  | - Select "Function URL" at the top | ||
|  | - Auth type: NONE | ||
|  | - Ensure that Statement ID is set to `FunctionURLAllowPublicAccess` | ||
|  | - Ensure that Principal is set to `*` | ||
|  | - Ensure that Action is set to `lambda:InvokeFunctionUrl` | ||
|  | 
 | ||
|  | Click "Save" and a new Policy statement should be created. | ||
|  | 
 | ||
|  | 7) Find the Function URL (It is in the "Function Overview" section). | ||
|  | 
 | ||
|  | Try to access that URL in a web browser and the site will try to download | ||
|  | `SheetJSLambda.xlsx`.  Save and open the file to confirm it is valid. | ||
|  | 
 | ||
|  | To test parsing, download <https://sheetjs.com/pres.numbers> and run | ||
|  | 
 | ||
|  | ```bash | ||
|  | curl -X POST -F "upload=@pres.numbers" FUNCTION_URL | ||
|  | ``` | ||
|  | 
 | ||
|  | The result should be a CSV output of the first sheet. | ||
|  | 
 | ||
|  | </details> | ||
|  | 
 | ||
|  | ## S3 Storage
 | ||
|  | 
 | ||
|  | The main module for S3 and all AWS services is `aws-sdk`. | ||
|  | 
 | ||
|  | ### Reading Data
 | ||
|  | 
 | ||
|  | The `s3#getObject` method returns an object with a `createReadStream` method. | ||
|  | Buffers can be concatenated and passed to `XLSX.read`: | ||
|  | 
 | ||
|  | <details><summary><b>Code Sample</b> (click to show)</summary> | ||
|  | 
 | ||
|  | ```js title="SheetJSReadFromS3.mjs" | ||
|  | var XLSX = require("xlsx"); | ||
|  | var AWS = require('aws-sdk'); | ||
|  | 
 | ||
|  | /* replace these constants */ | ||
|  | var accessKeyId = "<REPLACE WITH ACCESS KEY ID>"; | ||
|  | var secretAccessKey = "<REPLACE WITH SECRET ACCESS KEY>"; | ||
|  | var Bucket = "<REPLACE WITH BUCKET NAME>"; | ||
|  | var Key = "<REPLACE WITH KEY>"; | ||
|  | 
 | ||
|  | /* Get stream */ | ||
|  | var s3 = new AWS.S3({ | ||
|  |   apiVersion: '2006-03-01', | ||
|  |   credentials: { | ||
|  |     accessKeyId: accessKeyId, | ||
|  |     secretAccessKey: secretAccessKey | ||
|  |   } | ||
|  | }); | ||
|  | var f = s3.getObject({ Bucket: Bucket, Key: Key }).createReadStream(); | ||
|  | 
 | ||
|  | /* collect data */ | ||
|  | var bufs = []; | ||
|  | f.on('data', function(data) { bufs.push(data); }); | ||
|  | f.on('end', function() { | ||
|  |   /* concatenate and parse */ | ||
|  |   var wb = XLSX.read(Buffer.concat(bufs)); | ||
|  |   console.log(XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]])); | ||
|  | }); | ||
|  | ``` | ||
|  | 
 | ||
|  | </details> | ||
|  | 
 | ||
|  | ### Writing Data
 | ||
|  | 
 | ||
|  | `S3#upload` directly accepts a Buffer: | ||
|  | 
 | ||
|  | <details><summary><b>Code Sample</b> (click to show)</summary> | ||
|  | 
 | ||
|  | ```js title="SheetJSWriteToS3.js" | ||
|  | var XLSX = require("xlsx"); | ||
|  | var AWS = require('aws-sdk'); | ||
|  | 
 | ||
|  | /* replace these constants */ | ||
|  | var accessKeyId = "<REPLACE WITH ACCESS KEY ID>"; | ||
|  | var secretAccessKey = "<REPLACE WITH SECRET ACCESS KEY>"; | ||
|  | var Bucket = "<REPLACE WITH BUCKET NAME>"; | ||
|  | var Key = "<REPLACE WITH KEY>"; | ||
|  | 
 | ||
|  | /* Create a simple workbook and write XLSX to buffer */ | ||
|  | var ws = XLSX.utils.aoa_to_sheet(["SheetJS".split(""), [5,4,3,3,7,9,5]]); | ||
|  | var wb = XLSX.utils.book_new(); XLSX.utils.book_append_sheet(wb, ws, "Sheet1"); | ||
|  | var Body = XLSX.write(wb, {type: "buffer", bookType: "xlsx"}); | ||
|  | 
 | ||
|  | /* upload buffer */ | ||
|  | var s3 = new AWS.S3({ | ||
|  |   apiVersion: '2006-03-01', | ||
|  |   credentials: { | ||
|  |     accessKeyId: accessKeyId, | ||
|  |     secretAccessKey: secretAccessKey | ||
|  |   } | ||
|  | }); | ||
|  | s3.upload({ Bucket: Bucket, Key: Key, Body: Body }, function(err, data) { | ||
|  |   if(err) throw err; | ||
|  |   console.log("Uploaded to " + data.Location); | ||
|  | }); | ||
|  | ``` | ||
|  | 
 | ||
|  | </details> |