2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								---
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								title: Loader Tutorial
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								pagination_prev: getting-started/installation/index
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								pagination_next: getting-started/roadmap
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								sidebar_position: 6
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								---
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import current from '/version.js';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import Tabs from '@theme/Tabs';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import TabItem from '@theme/TabItem';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import CodeBlock from '@theme/CodeBlock';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Many existing systems and platforms include support for loading data from CSV
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								files. Many users prefer to work in spreadsheet software and multi-sheet file
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								formats including XLSX. SheetJS libraries help bridge the gap by translating
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								complex workbooks to simple CSV data.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The goal of this example is to load spreadsheet data into a vector store and use
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								a large language model to generate queries based on English language input. The
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								existing tooling supports CSV but does not support real spreadsheets.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								In ["SheetJS Conversion" ](#sheetjs-conversion ), we will use SheetJS libraries to
							 
						 
					
						
							
								
									
										
										
										
											2024-12-22 04:47:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								generate CSV files for the LangChainJS CSV loader. These conversions can be run
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								in a preprocessing step without disrupting existing CSV workflows.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-20 07:30:34 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								In ["SheetJS Loader" ](#sheetjs-loader ), we will use SheetJS libraries in a
							 
						 
					
						
							
								
									
										
										
										
											2024-06-23 05:30:41 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								custom `LoadOfSheet`  data loader to directly generate documents and metadata.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-20 07:30:34 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								["SheetJS Loader Demo" ](#sheetjs-loader-demo ) is a complete demo that uses the
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								SheetJS Loader to answer questions based on data from a XLS workbook.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::note Tested Deployments
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								This demo was tested in the following configurations:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-12-21 03:32:22 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								| Platform                                                          | Architecture | Date       |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								|:------------------------------------------------------------------|:-------------|:-----------|
							 
						 
					
						
							
								
									
										
										
										
											2025-05-18 20:09:19 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								| NVIDIA RTX 5090 (32 GB VRAM) + Ryzen Z1 Extreme (16 GB RAM)       | `win11-x64`   | 2025-05-17 |
							 
						 
					
						
							
								
									
										
										
										
											2025-04-18 04:04:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								| NVIDIA RTX 4090 (24 GB VRAM) + Ryzen Z1 Extreme (16 GB RAM)       | `win11-x64`   | 2025-04-17 |
							 
						 
					
						
							
								
									
										
										
										
											2025-02-10 07:13:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								| NVIDIA RTX 4090 (24 GB VRAM) + Ryzen Z1 Extreme (16 GB RAM)       | `linux-x64`   | 2025-01-28 |
							 
						 
					
						
							
								
									
										
										
										
											2025-01-15 18:43:08 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								| AMD RX 7900 XTX (24 GB VRAM) + Ryzen Z1 Extreme (16 GB RAM)       | `win11-x64`   | 2025-01-12 |
							 
						 
					
						
							
								
									
										
										
										
											2025-02-10 07:13:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								| AMD RX 7900 XTX (24 GB VRAM) + Ryzen Z1 Extreme (16 GB RAM)       | `linux-x64`   | 2025-01-29 |
							 
						 
					
						
							
								
									
										
										
										
											2025-01-26 16:46:11 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								| Intel Arc B580 (12 GB VRAM) + Ryzen Z1 Extreme (24 GB RAM)        | `win11-x64`   | 2025-01-24 |
							 
						 
					
						
							
								
									
										
										
										
											2025-02-10 07:13:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								| Intel Arc B580 (12 GB VRAM) + Ryzen Z1 Extreme (16 GB RAM)        | `linux-x64`   | 2025-02-08 |
							 
						 
					
						
							
								
									
										
										
										
											2025-03-27 02:49:13 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								| Apple M4 Max 16-Core CPU + 40-Core GPU (48 GB unified memory)     | `darwin-arm`  | 2025-03-06 |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| Apple M2 Max 12-Core CPU + 30-Core GPU (32 GB unified memory)     | `darwin-arm`  | 2025-03-25 |
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-07-01 03:59:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								SheetJS users have verified this demo in other configurations:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < summary > < b > Other tested configurations< / b >  (click to show)< / summary > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-03-30 06:31:40 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								| Platform                                                             | Architecture | Demo        |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								|:---------------------------------------------------------------------|:-------------|:------------|
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA L40 (48 GB VRAM) + i9-13900K (32 GB RAM)                      | `linux-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA RTX 4080 SUPER (16 GB VRAM) + Ryzen Z1 Extreme (16 GB RAM)    | `win11-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA RTX 4070 Ti SUPER (16 GB VRAM) + Ryzen Z1 Extreme (16 GB RAM) | `win11-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA RTX 4070 Ti (12 GB VRAM) + Ryzen 7 5800x (64 GB RAM)          | `win11-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA RTX 4060 (8 GB VRAM) + Ryzen 7 5700g (32 GB RAM)              | `win11-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA RTX 3090 (24 GB VRAM) + Ryzen 9 3900XT (128 GB RAM)           | `win11-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA RTX 3080 (12 GB VRAM) + Ryzen 7 5800X (32 GB RAM)             | `win11-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA RTX 3070 (8 GB VRAM) + Ryzen Z1 Extreme (16 GB RAM)           | `win11-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA RTX 3060 (12 GB VRAM) + i5-11400 (32 GB RAM)                  | `win10-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA RTX 2080 (12 GB VRAM) + i7-9700K (16 GB RAM)                  | `win10-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA RTX 2070 (8 GB VRAM) + Ryzen 7 3700x (80 GB RAM)              | `linux-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA RTX 2060 (6 GB VRAM) + Ryzen 5 3600 (32 GB RAM)               | `win10-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA GTX 1080 (8 GB VRAM) + Ryzen 7 5800x (64 GB RAM)              | `win10-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| NVIDIA GTX 1070 (8 GB VRAM) + Ryzen 7 7700x (32 GB RAM)              | `win11-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| AMD RX 6800 XT (16 GB VRAM) + Ryzen Z1 Extreme (16 GB RAM)           | `win11-x64`   | LangChainJS |
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								| Apple M4 10-Core CPU + 10-Core GPU (24 GB unified memory)            | `darwin-arm`  | LangChainJS |
							 
						 
					
						
							
								
									
										
										
										
											2024-07-01 03:59:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< / details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-03-30 06:31:40 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								Special thanks to the following users for testing with multiple configurations:
							 
						 
					
						
							
								
									
										
										
										
											2024-07-01 03:59:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-12 19:59:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								-  [Asadbek Karimov ](https://asadk.dev/ ) 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								-  [Rasmus Tengstedt ](https://tengstedt.dev/ ) 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								## CSV Loader
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-07-01 03:59:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								:::note pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-12-22 04:47:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								This explanation was verified against LangChainJS 0.2.
							 
						 
					
						
							
								
									
										
										
										
											2024-07-01 03:59:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								Document loaders generate data objects ("documents") and associated metadata
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from data sources.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-12-22 04:47:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								LangChainJS offers a `CSVLoader` [^1] component for loading CSV data from a file:
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="Generating Documents from a CSV file"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { CSVLoader } from "@langchain/community/document_loaders/fs/csv";
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const loader = new CSVLoader("pres.csv");
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const docs = await loader.load();
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								console.log(docs);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The CSV loader uses the first row to determine column headers and generates one
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								document per data row. For example, the following CSV holds Presidential data:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```csv
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Name,Index
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Bill Clinton,42
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								GeorgeW Bush,43
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Barack Obama,44
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Donald Trump,45
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Joseph Biden,46
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Each data row is translated to a document whose content is a list of attributes
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								and values. For example, the third data row is shown below:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< table >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < thead > < tr > < th > CSV Row< / th > < th > Document Content< / th > < / tr > < / thead > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < tbody > < tr > < td > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Name,Index
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Barack Obama,44
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < / td > < td > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Name: Barack Obama
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Index: 44
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < / td > < / tr > < / tbody > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< / table >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-12-22 04:47:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								The LangChainJS CSV loader will include source metadata in the document:
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="Document generated by the CSV loader"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Document {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  pageContent: 'Name: Barack Obama\nIndex: 44',
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  metadata: { source: 'pres.csv', line: 3 }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								## SheetJS Conversion
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The [SheetJS NodeJS module ](/docs/getting-started/installation/nodejs ) can be
							 
						 
					
						
							
								
									
										
										
										
											2024-12-22 04:47:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								imported in NodeJS scripts that use LangChainJS and other JavaScript libraries.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-20 07:30:34 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								A simple pre-processing step can convert workbooks to CSV files that can be
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								processed by the existing CSV tooling:
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```mermaid
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								flowchart LR
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  file[(Workbook\nXLSX/XLS)]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  subgraph SheetJS Structures
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    wb(((SheetJS\nWorkbook)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ws((SheetJS\nWorksheet))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  end
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  csv(CSV\nstring)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  docs[[Documents\nArray]]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  file --> |readFile\n\n| wb
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  wb --> |wb.Sheets\nselect sheet| ws
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  ws --> |sheet_to_csv\n\n| csv
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  csv --> |CSVLoader\n\n| docs
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  linkStyle 0,1,2 color:blue,stroke:blue;
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								**Parsing files from the filesystem**
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The SheetJS `readFile`  method[^2] can read workbook files. The method accepts a
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								path and returns a workbook object that conforms to the SheetJS data model[^3].
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Load SheetJS Libraries */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { readFile, set_fs } from 'xlsx';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Load 'fs' for readFile support */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import * as fs from 'fs';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set_fs(fs);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Parse `pres.xlsx`  */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// highlight-next-line
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const wb = readFile("pres.xlsx");
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								**Inspecting SheetJS workbook and worksheet objects**
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Workbook objects represent multi-sheet workbook files. They store individual
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								worksheet objects and other metadata.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								Relevant to this discussion, the workbook object uses the following keys[^7]:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  `SheetNames`  is an array of worksheet names 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  `Sheets`  is an object whose keys are sheet names and whose values are sheet objects. 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								`SheetNames[0]`  is the first worksheet name, so the following snippet will pull 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								the first worksheet from the workbook:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const first_ws = wb.Sheets[wb.SheetNames[0]];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								**Exporting SheetJS worksheets to CSV**
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								Each worksheet in the workbook can be written to CSV text using the SheetJS
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								`sheet_to_csv` [^4] method. The method accepts a SheetJS worksheet object and 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								returns a string.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const csv = utils.sheet_to_csv(first_ws);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								**Complete Script**
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								For example, the following NodeJS script reads `pres.xlsx`  and displays CSV rows
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								from the first worksheet:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="Print CSV data from the first worksheet"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Load SheetJS Libraries */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { readFile, set_fs, utils } from 'xlsx';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Load 'fs' for readFile support */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import * as fs from 'fs';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set_fs(fs);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Parse `pres.xlsx`  */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const wb = readFile("pres.xlsx");
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Print CSV rows from first worksheet */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const first_ws = wb.Sheets[wb.SheetNames[0]];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const csv = utils.sheet_to_csv(first_ws);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								console.log(csv);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-12 19:59:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								### Similar Workflows {#similar-workflows}
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-20 07:30:34 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								:::note pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								A number of demos cover spiritually similar workflows:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [Stata ](/docs/demos/extensions/stata ), [MATLAB ](/docs/demos/extensions/matlab ) 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								and [Maple ](/docs/demos/extensions/maple/ ) support XLSX data import. The SheetJS
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								integrations generate clean XLSX workbooks from user-supplied spreadsheets.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [TensorFlow.js ](/docs/demos/math/tensorflow ), [Pandas ](/docs/demos/math/pandas ) 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								and [Mathematica ](/docs/demos/extensions/mathematica ) support CSV data import.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The SheetJS integrations generate clean CSVs and use built-in CSV processors.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-20 07:30:34 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  The ["Command-Line Tools" ](/docs/demos/cli/ ) demo covers techniques for making 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								standalone command-line tools for file conversion.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								### Single Worksheet
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								For a single worksheet, a SheetJS pre-processing step can write the CSV rows to
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								file and the `CSVLoader`  can load the newly written file.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< details  open >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < summary > < b > Code example< / b >  (click to hide)< / summary > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="Pulling data from the first worksheet of a workbook"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { CSVLoader } from "@langchain/community/document_loaders/fs/csv";
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { readFile, set_fs, utils } from 'xlsx';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Load 'fs' for readFile support */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import * as fs from 'fs';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set_fs(fs);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Parse `pres.xlsx` ` */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const wb = readFile("pres.xlsx");
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Generate CSV and write to `pres.xlsx.csv`  */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const first_ws = wb.Sheets[wb.SheetNames[0]];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const csv = utils.sheet_to_csv(first_ws);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								fs.writeFileSync("pres.xlsx.csv", csv);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Create documents with CSVLoader */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const loader = new CSVLoader("pres.xlsx.csv");
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const docs = await loader.load();
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								console.log(docs);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< / details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								### Workbook
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								A workbook is a collection of worksheets. Each worksheet can be exported to a
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								separate CSV. If the CSVs are written to a subfolder, a `DirectoryLoader` [^5]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								can process the files in one step.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< details  open >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < summary > < b > Code example< / b >  (click to hide)< / summary > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								In this example, the script creates a subfolder named `csv` . Each worksheet in
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								the workbook will be processed and the generated CSV will be stored to numbered
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								files. The first worksheet will be stored to `csv/0.csv` .
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="Pulling data from the each worksheet of a workbook"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { CSVLoader } from "@langchain/community/document_loaders/fs/csv";
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { DirectoryLoader } from "langchain/document_loaders/fs/directory";
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { readFile, set_fs, utils } from 'xlsx';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Load 'fs' for readFile support */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import * as fs from 'fs';
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set_fs(fs);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Parse `pres.xlsx` ` */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const wb = readFile("pres.xlsx");
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Create a folder `csv`  */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								try { fs.mkdirSync("csv"); } catch(e) {}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Generate CSV data for each worksheet */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								wb.SheetNames.forEach((name, idx) => {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  const ws = wb.Sheets[name];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  const csv = utils.sheet_to_csv(ws);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  fs.writeFileSync(`csv/${idx}.csv`, csv);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								});
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Create documents with DirectoryLoader */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const loader = new DirectoryLoader("csv", {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  ".csv": (path) => new CSVLoader(path)
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								});
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const docs = await loader.load();
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								console.log(docs);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< / details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								## SheetJS Loader
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-12-22 04:47:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								The LangChainJS `CSVLoader`  does not add any Document metadata and does not
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								generate any attributes. A custom loader can work around limitations in the CSV
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								tooling and potentially include metadata that has no CSV equivalent.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```mermaid
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								flowchart LR
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  file[(Workbook\nXLSX/XLS)]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  subgraph SheetJS Structures
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    wb(((SheetJS\nWorkbook)))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ws((SheetJS\nWorksheet))
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  end
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  aoo[(Array of\nObjects)]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  docs[[Documents\nArray]]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  file --> |readFile\n\n| wb
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  wb --> |wb.Sheets\nEach worksheet| ws
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  ws --> |sheet_to_json\n\n| aoo
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  aoo --> |new Document\nEach Row| docs
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  linkStyle 0,1,2 color:blue,stroke:blue;
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The demo [`LoadOfSheet` loader ](pathname:///loadofsheet/loadofsheet.mjs ) will
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								generate one Document per data row across all worksheets. It will also attempt
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								to build metadata and attributes for use in self-querying retrievers.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-20 07:30:34 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								```js title="Sample usage"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* read and parse `data.xlsb`  */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const loader = new LoadOfSheet("./data.xlsb");
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* generate documents */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const docs = await loader.load();
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* synthesized attributes for the SelfQueryRetriever */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const attributes = loader.attributes;
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								< details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < summary > < b > Sample SheetJS Loader< / b >  (click to show)< / summary > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								This example loader pulls data from each worksheet. It assumes each worksheet
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								includes one header row and a number of data rows.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="loadofsheet.mjs"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { Document } from "@langchain/core/documents";
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { BufferLoader } from "langchain/document_loaders/fs/buffer";
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { read, utils } from "xlsx";
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  Document loader that uses SheetJS to load documents.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  Each worksheet is parsed into an array of row objects using the SheetJS
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  `sheet_to_json`  method and projected to a `Document` . Metadata includes
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 *  original sheet name, row data, and row index
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								 */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								export default class LoadOfSheet extends BufferLoader {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /** @type  {import("langchain/chains/query_constructor").AttributeInfo[]}  */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  attributes = [];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /**
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Document loader that uses SheetJS to load documents.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @param  {string|Blob} filePathOrBlob Source Data
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  constructor(filePathOrBlob) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    super(filePathOrBlob);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    this.attributes = [];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  /**
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  Parse document
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  NOTE: column labels in multiple sheets are not disambiguated!
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @param  {Buffer} raw Raw data Buffer
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @param  {Document["metadata"]} metadata Document metadata
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   *  @returns  {Promise< Document [] > } Array of Documents
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								   */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  async parse(raw, metadata) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    /** @type  {Document[]} */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    const result = [];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    this.attributes = [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      { name: "worksheet", description: "Sheet or Worksheet Name", type: "string" },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      { name: "rowNum", description: "Row index", type: "number" }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    const wb = read(raw, {type: "buffer", WTF:1});
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for(let name of wb.SheetNames) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      const fields = {};
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      const ws = wb.Sheets[name];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      if(!ws) return;
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      const aoo = utils.sheet_to_json(ws);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      aoo.forEach((row, idx) => {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        result.push({
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          pageContent: "Row " + (idx + 1) + " has the following content: \n" + Object.entries(row).map(kv => `- ${kv[0]}: ${kv[1]}` ).join("\n") + "\n",
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          metadata: {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            worksheet: name,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            rowNum: row["__rowNum__"],
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ...metadata,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ...row
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        });
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        Object.entries(row).forEach(([k,v]) => { if(v != null) (fields[k] || (fields[k] = {}))[v instanceof Date ? "date" : typeof v] = true } );
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      });
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      Object.entries(fields).forEach(([k,v]) => this.attributes.push({
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        name: k, description: k, type: Object.keys(v).join(" or ")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      }));
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return result;
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								};
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< / details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								### From Text to Binary
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Many libraries and platforms offer generic "text" loaders that process files
							 
						 
					
						
							
								
									
										
										
										
											2024-12-22 04:47:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								assuming the UTF-8 encoding. This corrupts many spreadsheet formats including
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								XLSX, XLSB, XLSM and XLS.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::note pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								This issue affects many JavaScript tools. Various demos cover workarounds:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [ViteJS plugins ](/docs/demos/static/vitejs#plugins ) receive the relative path 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								to the workbook file and can read the file directly.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								-  [Webpack plugins ](/docs/demos/static/webpack#sheetjs-loader ) support a special 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								`raw`  option that instructs the bundler to pass raw binary data. 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [NuxtJS parsers and transformers ](/docs/demos/static/nuxtjs ) can deduce the 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								path to the workbook file from internal identifiers.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-12-22 04:47:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								The `CSVLoader`  extends a special `TextLoader`  that forces UTF-8 text parsing.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								There is a separate `BufferLoader`  class, used by the PDF loader, that passes
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								the raw data using NodeJS `Buffer`  objects.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< table >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < thead > < tr > < th > Binary< / th > < th > Text< / th > < / tr > < / thead > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < tbody > < tr > < td > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```ts title="pdf.ts (structure)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								export class PDFLoader extends BufferLoader {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  public async parse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    raw: Buffer,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    metadata: Document["metadata"]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  ): Promise< Document [ ] >  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < / td > < td > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```ts title="csv.ts (structure)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								export class CSVLoader extends TextLoader {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  protected async parse(
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    raw: string
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  ): Promise< string [ ] >  {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < / td > < / tr > < / tbody > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< / table >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								### NodeJS Buffers
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The SheetJS `read`  method supports NodeJS Buffer objects directly[^6]:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="Parsing a workbook in a BufferLoader"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { BufferLoader } from "langchain/document_loaders/fs/buffer";
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import { read, utils } from "xlsx";
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								export default class LoadOfSheet extends BufferLoader {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  async parse(raw, metadata) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // highlight-next-line
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    const wb = read(raw, {type: "buffer"});
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // At this point, `wb`  is a SheetJS workbook object
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The `read`  method returns a SheetJS workbook object[^7].
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								### Generating Content
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The SheetJS `sheet_to_json`  method[^8] returns an array of data objects whose
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								keys are drawn from the first row of the worksheet.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< table >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < thead > < tr > < th > Spreadsheet< / th > < th > Array of Objects< / th > < / tr > < / thead > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < tbody > < tr > < td > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< / td > < td >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								[
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  { Name: "Bill Clinton", Index: 42 },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  { Name: "GeorgeW Bush", Index: 43 },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  { Name: "Barack Obama", Index: 44 },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  { Name: "Donald Trump", Index: 45 },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  { Name: "Joseph Biden", Index: 46 }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< / td > < / tr > < / tbody > < / table >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The original `CSVLoader`  wrote one row for each key-value pair. This text can be
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								generated by looping over the keys and values of the data row object. The
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								`Object.entries`  helper function simplifies the conversion: 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								function make_csvloader_doc_from_row_object(row) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  return Object.entries(row).map(([k,v]) => `${k}: ${v}` ).join("\n");
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								### Generating Documents
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The loader must generate row objects for each worksheet in the workbook.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								In the SheetJS data model, the workbook object has two relevant fields:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  `SheetNames`  is an array of sheet names 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  `Sheets`  is an object whose keys are sheet names and values are sheet objects. 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								A `for..of`  loop can iterate across the worksheets:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="Looping over a workbook (skeleton)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    const wb = read(raw, {type: "buffer", WTF:1});
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for(let name of wb.SheetNames) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      const ws = wb.Sheets[name];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      const aoa = utils.sheet_to_json(ws);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // at this point, `aoa`  is an array of objects
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								This simplified `parse`  function uses the snippet from the previous section:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="BufferLoader parse function (skeleton)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  async parse(raw, metadata) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    /* array to hold generated documents */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    const result = [];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    /* read workbook */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    const wb = read(raw, {type: "buffer", WTF:1});
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    /* loop over worksheets */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for(let name of wb.SheetNames) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      const ws = wb.Sheets[name];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      const aoa = utils.sheet_to_json(ws);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      /* loop over data rows */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      aoa.forEach((row, idx) => {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        /* generate a new document and add to the result array */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        result.push({
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          pageContent: Object.entries(row).map(([k,v]) => `${k}: ${v}` ).join("\n")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        });
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      });
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    return result;
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								### Metadata and Attributes
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								It is strongly recommended to generate additional metadata and attributes for
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								self-query retrieval applications.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < summary > < b > Implementation Details< / b >  (click to show)< / summary > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								**Metadata**
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Metadata is attached to each document object. The following example appends the
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								raw row data to the document metadata:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="Document with metadata (snippet)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        /* generate a new document and add to the result array */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        result.push({
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          pageContent: Object.entries(row).map(([k,v]) => `${k}: ${v}` ).join("\n"),
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          metadata: {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            worksheet: name, // name of the worksheet
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            rowNum: idx, // data row index
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								            ...row // raw row data
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        });
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								**Attributes**
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Each attribute object specifies three properties:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  `name`  corresponds to the field in the document metadata 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  `description`  is a description of the field 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  `type`  is a description of the data type. 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								While looping through data rows, a simple type check can keep track of the data
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								type for each column:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="Tracking column types (sketch)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for(let name of wb.SheetNames) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      /* track column types */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      const fields = {};
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      aoo.forEach((row, idx) => {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        result.push({/* ... */});
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        /* Check each property */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        Object.entries(row).forEach(([k,v]) => {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          /* Update fields entry to reflect the new data point */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          if(v != null) (fields[k] || (fields[k] = {}))[v instanceof Date ? "date" : typeof v] = true
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        });
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      });
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Attributes can be generated after writing the worksheet data. Storing attributes
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								in a loader property will make it accessible to scripts that use the loader.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="Adding Attributes to a Loader (sketch)"
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								export default class LoadOfSheet extends BufferLoader {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  // highlight-next-line
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  attributes = [];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  async parse(raw, metadata) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // Add the worksheet name and row index attributes
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // highlight-start
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    this.attributes = [
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      { name: "worksheet", description: "Sheet or Worksheet Name", type: "string" },
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      { name: "rowNum", description: "Row index", type: "number" }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    ];
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // highlight-end
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    const wb = read(raw, {type: "buffer", WTF:1});
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    for(let name of wb.SheetNames) {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // highlight-next-line
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      const fields = {};
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      const aoo = utils.sheet_to_json(ws);
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      aoo.forEach((row, idx) => {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        result.push({/* ... */});
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        /* Check each property */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        Object.entries(row).forEach(([k,v]) => {
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          /* Update fields entry to reflect the new data point */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								          if(v != null) (fields[k] || (fields[k] = {}))[v instanceof Date ? "date" : typeof v] = true
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        });
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      });
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      /* Add one attribute per metadata field */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // highlight-start
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      Object.entries(fields).forEach(([k,v]) => this.attributes.push({
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        name: k, description: k,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        /* { number: true, string: true } -> "number or string" */
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        type: Object.keys(v).join(" or ")
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      }));
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								      // highlight-end
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    // ...
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< / details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								## SheetJS Loader Demo
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The demo performs the query "Which rows have over 40 miles per gallon?" against
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								a [sample cars dataset ](pathname:///cd.xls ) and displays the results.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::caution pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								This demo was tested using the ChatQA-1.5 model[^9] in Ollama.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-26 16:46:11 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								The tested model used up to 10GB VRAM. It is strongly recommended to run the
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								demo on a GPU with at least 12GB VRAM or a newer Apple Silicon Mac with at least
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								32GB unified memory.
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								0) Install pre-requisites:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [NodeJS LTS (version 20+) ](https://nodejs.org/ ) 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [Ollama ](https://ollama.com/download ) 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::note pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Ollama should be installed on the same platform as NodeJS. If NodeJS is run
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								within WSL, Ollama should also be installed within WSL.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-26 16:46:11 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								:::danger pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Intel ARC GPUs require the Intel Extension for PyTorch (IPEX) and a special
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								version of Ollama that ships with the associated LLM Library (IPEX-LLM).
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < summary > < b > ARC Instructions< / b >  (click to show)< / summary > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								These instructions are based on the official Intel recommendations.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								A) If Ollama for Windows was installed, close the program by right-clicking on
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								the tray icon and selecting "Quit Ollama".
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								B) Install Miniforge3[^10], selecting "Just Me" when prompted.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								C) Launch a normal Command Prompt and create a Conda environment:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								cd %USERPROFILE%\Documents
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								mkdir ollama-intel
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								cd ollama-intel
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set PATH=%PATH%;%USERPROFILE%\miniforge3\condabin
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								conda create -n llm-cpp python=3.11
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								D) Activate the environment in the session and install dependencies:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								conda activate llm-cpp
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								pip install --pre --upgrade ipex-llm[cpp]
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Close the window after the installation.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								E) Launch a new Administrator Command Prompt and set up Ollama:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								cd %USERPROFILE%\Documents\ollama-intel
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set PATH=%PATH%;%USERPROFILE%\miniforge3\condabin
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								conda activate llm-cpp
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								init-ollama.bat
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Close the window.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								F) Launch a normal Command Prompt window and run Ollama:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								cd %USERPROFILE%\Documents\ollama-intel
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set PATH=%PATH%;%USERPROFILE%\miniforge3\condabin
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								conda activate llm-cpp
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set OLLAMA_NUM_GPU=999
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set no_proxy=localhost,127.0.0.1
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set ZES_ENABLE_SYSMAN=1
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set SYCL_CACHE_PERSISTENT=1
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								set SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								ollama serve
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								This window should be kept open throughout the demo.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< / details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								After installing dependencies, start a new terminal session.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								1) Create a new project:
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								mkdir sheetjs-loader
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								cd sheetjs-loader
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								npm init -y
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								2) Download the demo scripts:
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [`loadofsheet.mjs` ](pathname:///loadofsheet/loadofsheet.mjs ) 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  [`query.mjs` ](pathname:///loadofsheet/query.mjs ) 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								curl -LO https://docs.sheetjs.com/loadofsheet/query.mjs
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								curl -LO https://docs.sheetjs.com/loadofsheet/loadofsheet.mjs
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-07-01 03:59:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								:::note pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								In PowerShell, the command may fail with a parameter error:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Invoke-WebRequest : A parameter cannot be found that matches parameter name 'LO'.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								`curl.exe`  must be invoked directly: 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								curl.exe -LO https://docs.sheetjs.com/loadofsheet/query.mjs
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								curl.exe -LO https://docs.sheetjs.com/loadofsheet/loadofsheet.mjs
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								3) Install the SheetJS NodeJS module:
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< CodeBlock  language = "bash" > {`\ 
						 
					
						
							
								
									
										
										
										
											2024-07-12 19:39:46 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								npm i --save https://sheet.lol/balls/xlsx-${current}.tgz`}
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								< / CodeBlock >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								4) Install dependencies:
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
									
										
										
										
											2025-04-18 04:04:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								npm i --save @langchain/core@0 .3.44 langchain@0.3.21 @langchain/ollama@0 .2.0 peggy@3.0.2
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-07-14 07:17:31 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								:::note pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-04 11:45:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								In some test runs, there were error messages relating to dependency and peer
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								dependency versions. The `--force`  flag will suppress version mismatch errors:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
									
										
										
										
											2025-04-18 04:04:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								npm i --save @langchain/core@0 .3.44 langchain@0.3.21 @langchain/ollama@0 .2.0 peggy@3.0.2 --force
							 
						 
					
						
							
								
									
										
										
										
											2024-11-04 11:45:57 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
									
										
										
										
											2024-07-14 07:17:31 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								5) Download the [cars dataset ](pathname:///cd.xls ):
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								curl -LO https://docs.sheetjs.com/cd.xls
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-07-01 03:59:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								:::note pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								In PowerShell, the command may fail with a parameter error:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Invoke-WebRequest : A parameter cannot be found that matches parameter name 'LO'.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								`curl.exe`  must be invoked directly: 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								curl.exe -LO https://docs.sheetjs.com/cd.xls
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								6) Install the `llama3-chatqa:8b-v1.5-q8_0`  model using Ollama:
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								ollama pull llama3-chatqa:8b-v1.5-q8_0
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-26 16:46:11 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								< details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								  < summary > < b > Additional steps for Intel GPUs< / b >  (click to show)< / summary > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								A different embedding model must be used on Intel GPUs:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								A) Install the `nomic-embed-text:latest`  model through Ollama:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								ollama pull nomic-embed-text:latest
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								B) Edit `query.mjs`  to use the embedding model:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```js title="query.mjs (edit highlighted line)"
							 
						 
					
						
							
								
									
										
										
										
											2025-03-27 02:49:13 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								const llm = new ChatOllama({ baseUrl: "http://127.0.0.1:11434", model });
							 
						 
					
						
							
								
									
										
										
										
											2025-01-26 16:46:11 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								// highlight-next-line
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const embeddings = new OllamaEmbeddings({ baseUrl: "http://127.0.0.1:11434", model: "nomic-embed-text:latest"});
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								< / details >  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-12 05:40:33 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								7) Run the demo script
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```bash
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								node query.mjs
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The demo performs the query "Which rows have over 40 miles per gallon?". It will
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								print the following nine results:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-07-01 03:59:01 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								```js title="Expected output (order of lines may differ)"
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								{ Name: 'volkswagen rabbit custom diesel', MPG: 43.1 }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{ Name: 'vw rabbit c (diesel)', MPG: 44.3 }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{ Name: 'renault lecar deluxe', MPG: 40.9 }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{ Name: 'honda civic 1500 gl', MPG: 44.6 }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{ Name: 'datsun 210', MPG: 40.8 }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{ Name: 'vw pickup', MPG: 44 }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{ Name: 'mazda glc', MPG: 46.6 }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{ Name: 'vw dasher (diesel)', MPG: 43.4 }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								{ Name: 'vw rabbit', MPG: 41.5 }
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								```
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-07-12 19:39:46 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								:::caution pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Some SheetJS users with older GPUs have reported errors.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								If the command fails, please try running the script a second time.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								To find the expected results:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  Open the `cd.xls`  spreadsheet in Excel 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  Select Home > Sort &  Filter > Filter in the Ribbon 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  Select the filter option for column B (`Miles_per_Gallon`) 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								-  In the popup, select "Greater Than" in the Filter dropdown and type 40 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The filtered results should match the following screenshot:
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-07-16 01:40:51 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								:::tip pass
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								The [SheetJS model ](/docs/csf ) exposes [formulae ](/docs/csf/features/formulae )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								and other features.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								[SheetJS Pro ](https://sheetjs.com/pro ) builds expose cell styling, images,
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								charts, tables, and other features.
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								:::
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-19 11:22:00 +00:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								[^1]: See ["How to load CSV data" ](https://js.langchain.com/v0.2/docs/how_to/document_loader_csv ) in the LangChain documentation
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								[^2]: See [`readFile` in "Reading Files" ](/docs/api/parse-options )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								[^3]: See ["SheetJS Data Model" ](/docs/csf/ )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								[^4]: See [`sheet_to_csv` in "CSV and Text" ](/docs/api/utilities/csv#delimiter-separated-output )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								[^5]: See ["Folders with multiple files" ](https://js.langchain.com/v0.2/docs/integrations/document_loaders/file_loaders/directory/ ) in the LangChain documentation
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								[^6]: See ["Supported Output Formats" type in "Writing Files" ](/docs/api/write-options#supported-output-formats )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								[^7]: See ["Workbook Object" ](/docs/csf/book )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								[^8]: See [`sheet_to_json` in "Utilities" ](/docs/api/utilities/array#array-output )
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								[^9]: See [the official ChatQA website ](https://chatqa-project.github.io/ ) for the ChatQA paper and other model details.
							 
						 
					
						
							
								
									
										
										
										
											2025-01-26 16:46:11 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								[^10]: Select ["Windows" `x86_64` ](https://conda-forge.org/download/ ) in the Installation page.