forked from sheetjs/docs.sheetjs.com
		
	
		
			
	
	
		
			52 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
		
		
			
		
	
	
			52 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| 
								 | 
							
								const fs = require("fs");
							 | 
						||
| 
								 | 
							
								const PSTExtractor = require("pst-extractor");
							 | 
						||
| 
								 | 
							
								const XLSX = require("xlsx");
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* walk the PST file and add all attachments to the specified array */
							 | 
						||
| 
								 | 
							
								function walk(f,arr) {
							 | 
						||
| 
								 | 
							
								  if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr);
							 | 
						||
| 
								 | 
							
								  if(f.contentCount <= 0) return;
							 | 
						||
| 
								 | 
							
								  for(let e = f.getNextChild(); e != null; e = f.getNextChild()) {
							 | 
						||
| 
								 | 
							
								    for(let i = 0; i < e.numberOfAttachments; ++i) {
							 | 
						||
| 
								 | 
							
								      var a = e.getAttachment(i);
							 | 
						||
| 
								 | 
							
								      /* XLS spreadsheet test by filename */
							 | 
						||
| 
								 | 
							
								      if(/.xls[xmb]?$/.test(a.filename)) arr.push(a);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/* collect data from the attachment into a "Buffer" */
							 | 
						||
| 
								 | 
							
								function collect(file) {
							 | 
						||
| 
								 | 
							
								  const strm = file.fileInputStream;
							 | 
						||
| 
								 | 
							
								  const data = Buffer.alloc(strm._length.low);
							 | 
						||
| 
								 | 
							
								  strm.readCompletely(data);
							 | 
						||
| 
								 | 
							
								  return data;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								(async() => {
							 | 
						||
| 
								 | 
							
								  /* fetch https://docs.sheetjs.com/pst/enron.pst */
							 | 
						||
| 
								 | 
							
								  const ab = await (await fetch("https://docs.sheetjs.com/pst/enron.pst")).arrayBuffer();
							 | 
						||
| 
								 | 
							
								  const pst = new (PSTExtractor.PSTFile)(Buffer.from(ab));
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  /* generate a list of attachments */
							 | 
						||
| 
								 | 
							
								  const files = [];
							 | 
						||
| 
								 | 
							
								  walk(pst.getRootFolder(), files);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								  files.forEach((file, idx) => {
							 | 
						||
| 
								 | 
							
								    /* extract and save workbook to file */
							 | 
						||
| 
								 | 
							
								    const ext = file.filename.slice(file.filename.lastIndexOf(".") + 1);
							 | 
						||
| 
								 | 
							
								    console.log(`saving file ${idx} |${file.filename}| to file${idx}.${ext}`);
							 | 
						||
| 
								 | 
							
								    const buf = collect(file);
							 | 
						||
| 
								 | 
							
								    fs.writeFileSync(`file${idx}.${ext}`, buf);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /* parse workbook and print CSV contents of each sheet */
							 | 
						||
| 
								 | 
							
								    const wb = XLSX.read(buf);
							 | 
						||
| 
								 | 
							
								    wb.SheetNames.forEach(n => {
							 | 
						||
| 
								 | 
							
								      const ws = wb.Sheets[n];
							 | 
						||
| 
								 | 
							
								      const csv = XLSX.utils.sheet_to_csv(ws);
							 | 
						||
| 
								 | 
							
								      console.log(`#### ${file.filename} ! ${n}`);
							 | 
						||
| 
								 | 
							
								      console.log(csv);
							 | 
						||
| 
								 | 
							
								    });
							 | 
						||
| 
								 | 
							
								  });
							 | 
						||
| 
								 | 
							
								})();
							 |