forked from sheetjs/docs.sheetjs.com
		
	
		
			
	
	
		
			308 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
		
		
			
		
	
	
			308 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Markdown
		
	
	
	
	
	
|  | --- | ||
|  | title: SQL Connectors | ||
|  | pagination_prev: demos/grid | ||
|  | pagination_next: demos/worker | ||
|  | sidebar_custom_props: | ||
|  |   sql: true | ||
|  | --- | ||
|  | 
 | ||
|  | import Tabs from '@theme/Tabs'; | ||
|  | import TabItem from '@theme/TabItem'; | ||
|  | 
 | ||
|  | ### Generating Tables
 | ||
|  | 
 | ||
|  | This example will fetch <https://sheetjs.com/data/cd.xls>, scan the columns of the | ||
|  | first worksheet to determine data types, and generate 6 PostgreSQL statements. | ||
|  | 
 | ||
|  | <details><summary><b>Explanation</b> (click to show)</summary> | ||
|  | 
 | ||
|  | The relevant `generate_sql` function takes a worksheet name and a table name: | ||
|  | 
 | ||
|  | ```js | ||
|  | // define mapping between determined types and PostgreSQL types | ||
|  | const PG = { "n": "float8", "s": "text", "b": "boolean" }; | ||
|  | 
 | ||
|  | function generate_sql(ws, wsname) { | ||
|  | 
 | ||
|  |   // generate an array of objects from the data | ||
|  |   const aoo = XLSX.utils.sheet_to_json(ws); | ||
|  | 
 | ||
|  |   // types will map column headers to types, while hdr holds headers in order | ||
|  |   const types = {}, hdr = []; | ||
|  | 
 | ||
|  |   // loop across each row object | ||
|  |   aoo.forEach(row => | ||
|  |     // Object.entries returns a row of [key, value] pairs.  Loop across those | ||
|  |     Object.entries(row).forEach(([k,v]) => { | ||
|  | 
 | ||
|  |       // If this is first time seeing key, mark unknown and append header array | ||
|  |       if(!types[k]) { types[k] = "?"; hdr.push(k); } | ||
|  | 
 | ||
|  |       // skip null and undefined | ||
|  |       if(v == null) return; | ||
|  | 
 | ||
|  |       // check and resolve type | ||
|  |       switch(typeof v) { | ||
|  |         case "string": // strings are the broadest type | ||
|  |           types[k] = "s"; break; | ||
|  |         case "number": // if column is not string, number is the broadest type | ||
|  |           if(types[k] != "s") types[k] = "n"; break; | ||
|  |         case "boolean": // only mark boolean if column is unknown or boolean | ||
|  |           if("?b".includes(types[k])) types[k] = "b"; break; | ||
|  |         default: types[k] = "s"; break; // default to string type | ||
|  |       } | ||
|  |     }) | ||
|  |   ); | ||
|  | 
 | ||
|  |   // The final array consists of the CREATE TABLE query and a series of INSERTs | ||
|  |   return [ | ||
|  |     // generate CREATE TABLE query and return batch | ||
|  |     `CREATE TABLE \`${wsname}\` (${hdr.map(h => | ||
|  |       // column name must be wrapped in backticks | ||
|  |       `\`${h}\` ${PG[types[h]]}` | ||
|  |     ).join(", ")});` | ||
|  |   ].concat(aoo.map(row => { // generate INSERT query for each row | ||
|  |     // entries will be an array of [key, value] pairs for the data in the row | ||
|  |     const entries = Object.entries(row); | ||
|  |     // fields will hold the column names and values will hold the values | ||
|  |     const fields = [], values = []; | ||
|  |     // check each key/value pair in the row | ||
|  |     entries.forEach(([k,v]) => { | ||
|  |       // skip null / undefined | ||
|  |       if(v == null) return; | ||
|  |       // column name must be wrapped in backticks | ||
|  |       fields.push(`\`${k}\``); | ||
|  |       // when the field type is numeric, `true` -> 1 and `false` -> 0 | ||
|  |       if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v); | ||
|  |       // otherwise, | ||
|  |       else values.push(`'${v.toString().replaceAll("'", "''")}'`); | ||
|  |     }) | ||
|  |     if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`; | ||
|  |   })).filter(x => x); // filter out skipped rows | ||
|  | } | ||
|  | ``` | ||
|  | 
 | ||
|  | </details> | ||
|  | 
 | ||
|  | ```jsx live | ||
|  | function SheetJSQLWriter() { | ||
|  |   // define mapping between determined types and PostgreSQL types | ||
|  |   const PG = { "n": "float8", "s": "text", "b": "boolean" }; | ||
|  |   function generate_sql(ws, wsname) { | ||
|  |     const aoo = XLSX.utils.sheet_to_json(ws); | ||
|  |     const types = {}, hdr = []; | ||
|  |     // loop across each key in each column | ||
|  |     aoo.forEach(row => Object.entries(row).forEach(([k,v]) => { | ||
|  |       // set up type if header hasn't been seen | ||
|  |       if(!types[k]) { types[k] = "?"; hdr.push(k); } | ||
|  |       // check and resolve type | ||
|  |       switch(typeof v) { | ||
|  |         case "string": types[k] = "s"; break; | ||
|  |         case "number": if(types[k] != "s") types[k] = "n"; break; | ||
|  |         case "boolean": if("?b".includes(types[k])) types[k] = "b"; break; | ||
|  |         default: types[k] = "s"; break; | ||
|  |       } | ||
|  |     })); | ||
|  |     return [ | ||
|  |       // generate CREATE TABLE query and return batch | ||
|  |       `CREATE TABLE \`${wsname}\` (${hdr.map(h => `\`${h}\` ${PG[types[h]]}`).join(", ")});` | ||
|  |     ].concat(aoo.map(row => { | ||
|  |       const entries = Object.entries(row); | ||
|  |       const fields = [], values = []; | ||
|  |       entries.forEach(([k,v]) => { | ||
|  |         if(v == null) return; | ||
|  |         fields.push(`\`${k}\``); | ||
|  |         if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v); | ||
|  |         else values.push(`'${v.toString().replaceAll("'", "''")}'`); | ||
|  |       }) | ||
|  |       if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`; | ||
|  |     })).filter(x => x).slice(0, 6); | ||
|  |   } | ||
|  |   const [url, setUrl] = React.useState("https://sheetjs.com/data/cd.xls"); | ||
|  |   const set_url = React.useCallback((evt) => setUrl(evt.target.value)); | ||
|  |   const [out, setOut] = React.useState(""); | ||
|  |   const xport = React.useCallback(async() => { | ||
|  |     const ab = await (await fetch(url)).arrayBuffer(); | ||
|  |     const wb = XLSX.read(ab), wsname = wb.SheetNames[0]; | ||
|  |     setOut(generate_sql(wb.Sheets[wsname], wsname).join("\n")); | ||
|  |   }); | ||
|  | 
 | ||
|  |   return ( <> {out && (<><a href={url}>{url}</a><pre>{out}</pre></>)} | ||
|  |     <b>URL: </b><input type="text" value={url} onChange={set_url} size="50"/> | ||
|  |     <br/><button onClick={xport}><b>Fetch!</b></button> | ||
|  |   </> ); | ||
|  | } | ||
|  | ``` | ||
|  | 
 | ||
|  | ## Databases
 | ||
|  | 
 | ||
|  | ### Query Builders
 | ||
|  | 
 | ||
|  | Query builders are designed to simplify query generation and normalize field | ||
|  | types and other database minutiae. | ||
|  | 
 | ||
|  | **Knex** | ||
|  | 
 | ||
|  | The result of a `SELECT` statement is an array of objects: | ||
|  | 
 | ||
|  | ```js | ||
|  | const aoo = await connection.select("*").from("DataTable"); | ||
|  | const worksheet = XLSX.utils.json_to_sheet(aoa); | ||
|  | ``` | ||
|  | 
 | ||
|  | Knex wraps primitive types when creating a table. `generate_sql` takes a `knex` | ||
|  | connection object and uses the API: | ||
|  | 
 | ||
|  | <details><summary><b>Generating a Table</b> (click to show)</summary> | ||
|  | 
 | ||
|  | ```js | ||
|  | // define mapping between determined types and Knex types | ||
|  | const PG = { "n": "float", "s": "text", "b": "boolean" }; | ||
|  | 
 | ||
|  | async function generate_sql(knex, ws, wsname) { | ||
|  | 
 | ||
|  |   // generate an array of objects from the data | ||
|  |   const aoo = XLSX.utils.sheet_to_json(ws); | ||
|  | 
 | ||
|  |   // types will map column headers to types, while hdr holds headers in order | ||
|  |   const types = {}, hdr = []; | ||
|  | 
 | ||
|  |   // loop across each row object | ||
|  |   aoo.forEach(row => | ||
|  |     // Object.entries returns a row of [key, value] pairs.  Loop across those | ||
|  |     Object.entries(row).forEach(([k,v]) => { | ||
|  | 
 | ||
|  |       // If this is first time seeing key, mark unknown and append header array | ||
|  |       if(!types[k]) { types[k] = "?"; hdr.push(k); } | ||
|  | 
 | ||
|  |       // skip null and undefined | ||
|  |       if(v == null) return; | ||
|  | 
 | ||
|  |       // check and resolve type | ||
|  |       switch(typeof v) { | ||
|  |         case "string": // strings are the broadest type | ||
|  |           types[k] = "s"; break; | ||
|  |         case "number": // if column is not string, number is the broadest type | ||
|  |           if(types[k] != "s") types[k] = "n"; break; | ||
|  |         case "boolean": // only mark boolean if column is unknown or boolean | ||
|  |           if("?b".includes(types[k])) types[k] = "b"; break; | ||
|  |         default: types[k] = "s"; break; // default to string type | ||
|  |       } | ||
|  |     }) | ||
|  |   ); | ||
|  | 
 | ||
|  |   await knex.schema.dropTableIfExists(wsname); | ||
|  |   await knex.schema.createTable(wsname, (table) => { hdr.forEach(h => { table[PG[types[h]] || "text"](h); }); }); | ||
|  |   for(let i = 0; i < aoo.length; ++i) { | ||
|  |     if(!aoo[i] || !Object.keys(aoo[i]).length) continue; | ||
|  |     try { await knex.insert(aoo[i]).into(wsname); } catch(e) {} | ||
|  |   } | ||
|  |   return knex; | ||
|  | } | ||
|  | ``` | ||
|  | 
 | ||
|  | </details> | ||
|  | 
 | ||
|  | 
 | ||
|  | ### Other SQL Databases
 | ||
|  | 
 | ||
|  | The `generate_sql` function from ["Building Schemas from Worksheets"](#building-schemas-from-worksheets) | ||
|  | can be adapted to generate SQL statements for a variety of databases, including: | ||
|  | 
 | ||
|  | **PostgreSQL** | ||
|  | 
 | ||
|  | The `pg` connector library was tested against the `generate_sql` output as-is. | ||
|  | 
 | ||
|  | The `rows` property of a query result is an array of objects that plays nice | ||
|  | with `json_to_sheet`: | ||
|  | 
 | ||
|  | ```js | ||
|  | const aoa = await connection.query(`SELECT * FROM DataTable`).rows; | ||
|  | const worksheet = XLSX.utils.json_to_sheet(aoa); | ||
|  | ``` | ||
|  | 
 | ||
|  | **MySQL / MariaDB** | ||
|  | 
 | ||
|  | The `mysql2` connector library was tested.  The differences are shown below, | ||
|  | primarily stemming from the different quoting requirements and field types. | ||
|  | 
 | ||
|  | <details><summary><b>Differences</b> (click to show)</summary> | ||
|  | 
 | ||
|  | ```js | ||
|  | // highlight-start | ||
|  | // define mapping between determined types and MySQL types | ||
|  | const PG = { "n": "REAL", "s": "TEXT", "b": "TINYINT" }; | ||
|  | // highlight-end | ||
|  | 
 | ||
|  | function generate_sql(ws, wsname) { | ||
|  | 
 | ||
|  |   // generate an array of objects from the data | ||
|  |   const aoo = XLSX.utils.sheet_to_json(ws); | ||
|  | 
 | ||
|  |   // types will map column headers to types, while hdr holds headers in order | ||
|  |   const types = {}, hdr = []; | ||
|  | 
 | ||
|  |   // loop across each row object | ||
|  |   aoo.forEach(row => | ||
|  |     // Object.entries returns a row of [key, value] pairs.  Loop across those | ||
|  |     Object.entries(row).forEach(([k,v]) => { | ||
|  | 
 | ||
|  |       // If this is first time seeing key, mark unknown and append header array | ||
|  |       if(!types[k]) { types[k] = "?"; hdr.push(k); } | ||
|  | 
 | ||
|  |       // skip null and undefined | ||
|  |       if(v == null) return; | ||
|  | 
 | ||
|  |       // check and resolve type | ||
|  |       switch(typeof v) { | ||
|  |         case "string": // strings are the broadest type | ||
|  |           types[k] = "s"; break; | ||
|  |         case "number": // if column is not string, number is the broadest type | ||
|  |           if(types[k] != "s") types[k] = "n"; break; | ||
|  |         case "boolean": // only mark boolean if column is unknown or boolean | ||
|  |           if("?b".includes(types[k])) types[k] = "b"; break; | ||
|  |         default: types[k] = "s"; break; // default to string type | ||
|  |       } | ||
|  |     }) | ||
|  |   ); | ||
|  | 
 | ||
|  |   // The final array consists of the CREATE TABLE query and a series of INSERTs | ||
|  |   return [ | ||
|  |     // generate CREATE TABLE query and return batch | ||
|  |     // highlight-next-line | ||
|  |     `CREATE TABLE ${wsname} (${hdr.map(h => | ||
|  |       // highlight-next-line | ||
|  |       `${h} ${PG[types[h]]}` | ||
|  |     ).join(", ")});` | ||
|  |   ].concat(aoo.map(row => { // generate INSERT query for each row | ||
|  |     // entries will be an array of [key, value] pairs for the data in the row | ||
|  |     const entries = Object.entries(row); | ||
|  |     // fields will hold the column names and values will hold the values | ||
|  |     const fields = [], values = []; | ||
|  |     // check each key/value pair in the row | ||
|  |     entries.forEach(([k,v]) => { | ||
|  |       // skip null / undefined | ||
|  |       if(v == null) return; | ||
|  |       // highlight-next-line | ||
|  |       fields.push(`${k}`); | ||
|  |       // when the field type is numeric, `true` -> 1 and `false` -> 0 | ||
|  |       if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v); | ||
|  |       // otherwise, | ||
|  |       // highlight-next-line | ||
|  |       else values.push(`"${v.toString().replaceAll('"', '""')}"`); | ||
|  |     }) | ||
|  |     if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`; | ||
|  |   })).filter(x => x); // filter out skipped rows | ||
|  | } | ||
|  | ``` | ||
|  | 
 | ||
|  | </details> | ||
|  | 
 | ||
|  | The first property of a query result is an array of objects that plays nice | ||
|  | with `json_to_sheet`: | ||
|  | 
 | ||
|  | ```js | ||
|  | const aoa = await connection.query(`SELECT * FROM DataTable`)[0]; | ||
|  | const worksheet = XLSX.utils.json_to_sheet(aoa); | ||
|  | ``` |