forked from sheetjs/sheetjs
		
	
		
			
	
	
		
			132 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
		
		
			
		
	
	
			132 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
|  | # Typed Arrays and Math
 | ||
|  | 
 | ||
|  | ECMAScript version 6 introduced Typed Arrays, array-like objects designed for | ||
|  | low-level optimizations and predictable operations.  They are supported in most | ||
|  | modern browsers and form the basis of various APIs, including NodeJS Buffers, | ||
|  | WebGL buffers, WebAssembly, and tensors in linear algebra and math libraries. | ||
|  | 
 | ||
|  | This demo covers conversions between worksheets and Typed Arrays.  It also tries | ||
|  | to cover common numerical libraries that work with data arrays. | ||
|  | 
 | ||
|  | Excel supports a subset of the IEEE754 Double precision floating point numbers, | ||
|  | but many libraries only support `Float32` Single precision values. `Math.fround` | ||
|  | rounds `Number` values to the nearest single-precision floating point value. | ||
|  | 
 | ||
|  | ## Working with Data in Typed Arrays
 | ||
|  | 
 | ||
|  | Typed arrays are not true Array objects.  The array of array utility functions | ||
|  | like `aoa_to_sheet` will not handle arrays of Typed Arrays. | ||
|  | 
 | ||
|  | #### Exporting Typed Arrays to a Worksheet
 | ||
|  | 
 | ||
|  | A single typed array can be converted to a pure JS array with `Array.from`: | ||
|  | 
 | ||
|  | ```js | ||
|  | var column = Array.from(dataset_typedarray); | ||
|  | ``` | ||
|  | 
 | ||
|  | `aoa_to_sheet` expects a row-major array of arrays.  To export multiple data | ||
|  | sets, "transpose" the data: | ||
|  | 
 | ||
|  | ```js | ||
|  | /* assuming data is an array of typed arrays */ | ||
|  | var aoa = []; | ||
|  | for(var i = 0; i < data.length; ++i) { | ||
|  |   for(var j = 0; j < data[i].length; ++j) { | ||
|  |     if(!aoa[j]) aoa[j] = []; | ||
|  |     aoa[j][i] = data[i][j]; | ||
|  |   } | ||
|  | } | ||
|  | /* aoa can be directly converted to a worksheet object */ | ||
|  | var ws = XLSX.utils.aoa_to_sheet(aoa); | ||
|  | ``` | ||
|  | 
 | ||
|  | #### Importing Data from a Spreadsheet
 | ||
|  | 
 | ||
|  | `sheet_to_json` with the option `header:1` will generate a row-major array of | ||
|  | arrays that can be transposed.  However, it is more efficient to walk the sheet | ||
|  | manually: | ||
|  | 
 | ||
|  | ```js | ||
|  | /* find worksheet range */ | ||
|  | var range = XLSX.utils.decode_range(ws['!ref']); | ||
|  | var out = [] | ||
|  | /* walk the columns */ | ||
|  | for(var C = range.s.c; C <= range.e.c; ++C) { | ||
|  |   /* create the typed array */ | ||
|  |   var ta = new Float32Array(range.e.r - range.s.r + 1); | ||
|  |   /* walk the rows */ | ||
|  |   for(var R = range.s.r; R <= range.e.r; ++R) { | ||
|  |     /* find the cell, skip it if the cell isn't numeric or boolean */ | ||
|  |     var cell = ws[XLSX.utils.encode_cell({r:R, c:C})]; | ||
|  |     if(!cell || cell.t != 'n' && cell.t != 'b') continue; | ||
|  |     /* assign to the typed array */ | ||
|  |     ta[R - range.s.r] = cell.v; | ||
|  |   } | ||
|  |   out.push(ta); | ||
|  | } | ||
|  | ``` | ||
|  | 
 | ||
|  | If the data set has a header row, the loop can be adjusted to skip those rows. | ||
|  | 
 | ||
|  | 
 | ||
|  | ## Demos
 | ||
|  | 
 | ||
|  | Each example focuses on single-variable linear regression.  Sample worksheets | ||
|  | will start with a label row.  The first column is the x-value and the second | ||
|  | column is the y-value.  A sample spreadsheet can be generated randomly: | ||
|  | 
 | ||
|  | ```js | ||
|  | var aoo = []; | ||
|  | for(var i = 0; i < 100; ++i) aoo.push({x:i, y:2 * i + Math.random()}); | ||
|  | var ws = XLSX.utils.json_to_sheet(aoo); | ||
|  | var wb = XLSX.utils.book_new(); XLSX.utils.book_append_sheet(wb, ws, "Sheet1"); | ||
|  | XLSX.writeFile(wb, "linreg.xlsx"); | ||
|  | ``` | ||
|  | 
 | ||
|  | Some libraries provide utility functions that work with plain arrays of numbers. | ||
|  | When possible, they should be preferred over manual conversion. | ||
|  | 
 | ||
|  | Reshaping raw float arrays and exporting to a worksheet is straightforward: | ||
|  | 
 | ||
|  | ```js | ||
|  | function array_to_sheet(farray, shape, headers) { | ||
|  |   /* generate new AOA from the float array */ | ||
|  |   var aoa = []; | ||
|  |   for(var j = 0; j < shape[0]; ++j) { | ||
|  |     aoa[j] = []; | ||
|  |     for(var i = 0; i < shape[1]; ++i) aoa[j][i] = farray[j * shape[1] + i]; | ||
|  |   } | ||
|  | 
 | ||
|  |   /* add headers and generate worksheet */ | ||
|  |   if(headers) aoa.unshift(headers); | ||
|  |   return XLSX.utils.aoa_to_sheet(aoa); | ||
|  | } | ||
|  | ``` | ||
|  | 
 | ||
|  | #### TensorFlow
 | ||
|  | 
 | ||
|  | [TensorFlow](https://js.tensorflow.org/) `tensor` objects can be created from | ||
|  | arrays of arrays: | ||
|  | 
 | ||
|  | ```js | ||
|  | var tensor = tf.tensor2d(aoa).transpose(); | ||
|  | var col1 = tensor.slice([0,0], [1,tensor.shape[1]]).flatten(); | ||
|  | var col2 = tensor.slice([1,0], [1,tensor.shape[1]]).flatten(); | ||
|  | ``` | ||
|  | 
 | ||
|  | `stack` should be used to create the 2-d tensor for export: | ||
|  | 
 | ||
|  | ```js | ||
|  | var tensor = tf.stack([col1, col2]).transpose(); | ||
|  | var shape = tensor.shape; | ||
|  | var farray = tensor.dataSync(); | ||
|  | var ws = array_to_sheet(farray, shape, ["header1", "header2"]); | ||
|  | ``` | ||
|  | 
 | ||
|  | The demo generates a sample dataset and uses a simple linear predictor with | ||
|  | least-squares scoring to calculate regression coefficients.  The tensors are | ||
|  | exported to a new file. | ||
|  | 
 | ||
|  | [](https://github.com/SheetJS/js-xlsx) |