forked from sheetjs/docs.sheetjs.com
		
	
		
			
	
	
		
			52 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
		
		
			
		
	
	
			52 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
|  | const fs = require("fs"); | ||
|  | const PSTExtractor = require("pst-extractor"); | ||
|  | const XLSX = require("xlsx"); | ||
|  | 
 | ||
|  | /* walk the PST file and add all attachments to the specified array */ | ||
|  | function walk(f,arr) { | ||
|  |   if(f.hasSubfolders) for(let sf of f.getSubFolders()) walk(sf,arr); | ||
|  |   if(f.contentCount <= 0) return; | ||
|  |   for(let e = f.getNextChild(); e != null; e = f.getNextChild()) { | ||
|  |     for(let i = 0; i < e.numberOfAttachments; ++i) { | ||
|  |       var a = e.getAttachment(i); | ||
|  |       /* XLS spreadsheet test by filename */ | ||
|  |       if(/.xls[xmb]?$/.test(a.filename)) arr.push(a); | ||
|  |     } | ||
|  |   } | ||
|  | } | ||
|  | 
 | ||
|  | /* collect data from the attachment into a "Buffer" */ | ||
|  | function collect(file) { | ||
|  |   const strm = file.fileInputStream; | ||
|  |   const data = Buffer.alloc(strm._length.low); | ||
|  |   strm.readCompletely(data); | ||
|  |   return data; | ||
|  | } | ||
|  | 
 | ||
|  | (async() => { | ||
|  |   /* fetch https://docs.sheetjs.com/pst/enron.pst */ | ||
|  |   const ab = await (await fetch("https://docs.sheetjs.com/pst/enron.pst")).arrayBuffer(); | ||
|  |   const pst = new (PSTExtractor.PSTFile)(Buffer.from(ab)); | ||
|  | 
 | ||
|  |   /* generate a list of attachments */ | ||
|  |   const files = []; | ||
|  |   walk(pst.getRootFolder(), files); | ||
|  | 
 | ||
|  |   files.forEach((file, idx) => { | ||
|  |     /* extract and save workbook to file */ | ||
|  |     const ext = file.filename.slice(file.filename.lastIndexOf(".") + 1); | ||
|  |     console.log(`saving file ${idx} |${file.filename}| to file${idx}.${ext}`); | ||
|  |     const buf = collect(file); | ||
|  |     fs.writeFileSync(`file${idx}.${ext}`, buf); | ||
|  | 
 | ||
|  |     /* parse workbook and print CSV contents of each sheet */ | ||
|  |     const wb = XLSX.read(buf); | ||
|  |     wb.SheetNames.forEach(n => { | ||
|  |       const ws = wb.Sheets[n]; | ||
|  |       const csv = XLSX.utils.sheet_to_csv(ws); | ||
|  |       console.log(`#### ${file.filename} ! ${n}`); | ||
|  |       console.log(csv); | ||
|  |     }); | ||
|  |   }); | ||
|  | })(); |