From e2fb27aa4f2798d8e5211e195698f732f40197d7 Mon Sep 17 00:00:00 2001 From: Asad Date: Wed, 20 Nov 2024 19:43:13 -0500 Subject: [PATCH 1/2] feat(pg): add Ubuntu/Debian PostgreSQL installation guide & enhanced SheetJSPG.js type deduc - Include Ubuntu/Debian installation instructions - Improve PostgreSQL table creation with better type detection - Enhance date format detection and parsing Breaking Changes: - Replace `aoo_to_pg_table` to `sheet_to_pg_table` & now it takes worksheet object instead of array of arrays - Changed type detection algorithm may produce column types --- .gitignore | 2 +- SheetJSPG.js | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 7157897..3c16c33 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ node_modules/ .log :.prettierrc SheetJSPGExport.xlsx -pres.numbers \ No newline at end of file +pres.numbers diff --git a/SheetJSPG.js b/SheetJSPG.js index 81ace3f..b1b9d9b 100644 --- a/SheetJSPG.js +++ b/SheetJSPG.js @@ -1,12 +1,12 @@ const pg = require("pg"), format = require("pg-format"); const XLSX = require("xlsx"); const opts = { - database:"SheetJSPG", - host: "127.0.0.1", // localhost - port: 5432, - user: "postgres", - password: "7509" -}; + database:"SheetJSPG", + host: "127.0.0.1", // localhost + port: 5432, + user: "postgres", + password: "7509" + }; function deduceType(cells) { if (!cells || cells.length === 0) return 'text'; @@ -170,4 +170,4 @@ try { /* disconnect */ await client.end(); } -})(); \ No newline at end of file +})(); From bfc7a81446a13077de2e8a3e5c699af18522acd1 Mon Sep 17 00:00:00 2001 From: Asad Date: Tue, 3 Dec 2024 15:26:19 -0500 Subject: [PATCH 2/2] chore: organize directory --- SheetJSPG.js | 173 ------------------ sql-types.js => index.js | 0 test.js | 6 +- test_files/boolean_formats.xlsx | Bin 5570 -> 5570 bytes test_files/date_formats.xlsx | Bin 5558 -> 5558 bytes .../gen_test_files.py | 25 +-- test_files/number_formats.xlsx | Bin 5635 -> 5635 bytes test_files/precision.xlsx | Bin 5621 -> 5621 bytes .../requirements.txt | 0 test_files/special_values.xlsx | Bin 5459 -> 5459 bytes test_files/string_formats.xlsx | Bin 5646 -> 5646 bytes 11 files changed, 11 insertions(+), 193 deletions(-) delete mode 100644 SheetJSPG.js rename sql-types.js => index.js (100%) rename gen_test_files.py => test_files/gen_test_files.py (89%) rename requirements.txt => test_files/requirements.txt (100%) diff --git a/SheetJSPG.js b/SheetJSPG.js deleted file mode 100644 index b1b9d9b..0000000 --- a/SheetJSPG.js +++ /dev/null @@ -1,173 +0,0 @@ -const pg = require("pg"), format = require("pg-format"); -const XLSX = require("xlsx"); -const opts = { - database:"SheetJSPG", - host: "127.0.0.1", // localhost - port: 5432, - user: "postgres", - password: "7509" - }; - -function deduceType(cells) { - if (!cells || cells.length === 0) return 'text'; - - const nonEmptyCells = cells.filter(cell => cell && cell.v != null); - if (nonEmptyCells.length === 0) return 'text'; - - // Check for dates by looking at both cell type and formatted value - const isDateCell = cell => cell?.t === 'd' || (cell?.t === 'n' && cell.w && /\d{4}-\d{2}-\d{2}|\d{1,2}\/\d{1,2}\/\d{4}|\d{2}-[A-Za-z]{3}-\d{4}|[A-Za-z]{3}-\d{2}|\d{1,2}-[A-Za-z]{3}/.test(cell.w)); - - if (nonEmptyCells.some(isDateCell)) { return 'date'; } - - // Check for booleans - const allBooleans = nonEmptyCells.every(cell => cell.t === 'b'); - if (allBooleans) { return 'boolean'; } - - // Check for numbers - const allNumbers = nonEmptyCells.every(cell => cell.t === 'n' || (cell.t === 's' && !isNaN(cell.v.replace(/[,$\s%()]/g, '')))); - - if (allNumbers) { - const numbers = nonEmptyCells.map(cell => { - if (cell.t === 'n') return cell.v; - return parseFloat(cell.v.replace(/[,$\s%()]/g, '')); - }); - - const needsPrecision = numbers.some(num => { - const str = num.toString(); - return str.includes('e') || - (str.includes('.') && str.split('.')[1].length > 6) || - Math.abs(num) > 1e15; - }); - - return needsPrecision ? 'numeric' : 'double precision'; - } - return 'text'; // default to string type -} - -function parseValue(cell, type) { - if (!cell || cell.v == null) return null; - - switch (type) { - case 'date': - if (cell.t === 'd') { return cell.v.toISOString().split('T')[0]; } - if (cell.t === 'n') { - const date = new Date((cell.v - 25569) * 86400 * 1000); - return date.toISOString().split('T')[0]; - } - return null; - - case 'numeric': - case 'double precision': - if (cell.t === 'n') return cell.v; - if (cell.t === 's') { - const cleaned = cell.v.replace(/[,$\s%()]/g, ''); - if (!isNaN(cleaned)) return parseFloat(cleaned); - } - return null; - - case 'boolean': - return cell.t === 'b' ? cell.v : null; - - default: - return String(cell.v); - } -} - -/* create table and load data given a worksheet and a PostgreSQL client */ -async function sheet_to_pg_table(client, worksheet, tableName) { - if (!worksheet['!ref']) return; - - const range = XLSX.utils.decode_range(worksheet['!ref']); - - /* Extract headers from first row, clean names for PostgreSQL */ - const headers = []; - for (let col = range.s.c; col <= range.e.c; col++) { - const cellAddress = XLSX.utils.encode_cell({ r: range.s.r, c: col }); - const cell = worksheet[cellAddress]; - const headerValue = cell ? String(cell.v).replace(/[^a-zA-Z0-9_]/g, '_') : `column_${col + 1}`; - headers.push(headerValue.toLowerCase()); - } - - /* Group cell values by column for type deduction */ - const columnValues = headers.map(() => []); - for (let row = range.s.r + 1; row <= range.e.r; row++) { - for (let col = range.s.c; col <= range.e.c; col++) { - const cellAddress = XLSX.utils.encode_cell({ r: row, c: col }); - const cell = worksheet[cellAddress]; - columnValues[col].push(cell); - } - } - - /* Deduce PostgreSQL type for each column */ - const types = {}; - headers.forEach((header, idx) => { - types[header] = deduceType(columnValues[idx]); - }); - - /* Delete table if it exists in the DB */ - await client.query(format('DROP TABLE IF EXISTS %I', tableName)); - - /* Create table */ - const createTableSQL = format( - 'CREATE TABLE %I (%s)', - tableName, - headers.map(header => format('%I %s', header, types[header])).join(', ') - ); - await client.query(createTableSQL); - - /* Insert data row by row */ - for (let row = range.s.r + 1; row <= range.e.r; row++) { - const values = headers.map((header, col) => { - const cellAddress = XLSX.utils.encode_cell({ r: row, c: col }); - const cell = worksheet[cellAddress]; - return parseValue(cell, types[header]); - }); - - const insertSQL = format( - 'INSERT INTO %I (%s) VALUES (%s)', - tableName, - headers.map(h => format('%I', h)).join(', '), - values.map(() => '%L').join(', ') - ); - await client.query(format(insertSQL, ...values)); - } -} - -(async() => { - -/* read file and get first worksheet */ -const oldwb = XLSX.readFile("pres.numbers"); -const oldws = oldwb.Sheets[oldwb.SheetNames[0]]; - -/* import data to postgres */ -let client = new pg.Client(opts); -try { - /* open connection to PostgreSQL database */ - await client.connect(); - - /* create table and load data given a worksheet */ - await sheet_to_pg_table(client, oldws, "Presidents"); -} finally { - /* disconnect */ - await client.end(); -} - -/* export data to xlsx */ -client = new pg.Client(opts); -try { - /* open connection to PostgreSQL database */ - await client.connect(); - - /* fetch all data from specified table */ - const res = await client.query(format(`SELECT * FROM %I`, "Presidents")); - - /* export to file */ - const newws = XLSX.utils.json_to_sheet(res.rows); - const newwb = XLSX.utils.book_new(); - XLSX.utils.book_append_sheet(newwb, newws, "Export"); - XLSX.writeFile(newwb, "SheetJSPGExport.xlsx"); -} finally { - /* disconnect */ - await client.end(); -} -})(); diff --git a/sql-types.js b/index.js similarity index 100% rename from sql-types.js rename to index.js diff --git a/test.js b/test.js index 20e1b7c..d551923 100644 --- a/test.js +++ b/test.js @@ -1,6 +1,6 @@ const XLSX = require('xlsx'); const { Client } = require('pg'); -const { sheet_to_pg_table } = require('./sql-types'); +const { sheet_to_pg_table } = require('./index'); const path = require('path'); async function readExcelAndTest(filename, tableName) { @@ -19,7 +19,7 @@ async function readExcelAndTest(filename, tableName) { const client = new Client({ host: 'localhost', database: 'SheetJSPG', - user: 'postgres', + user: 'dark', password: '7509' }); @@ -67,4 +67,4 @@ async function runAllTests() { } } -runAllTests().catch(console.error); \ No newline at end of file +runAllTests().catch(console.error); diff --git a/test_files/boolean_formats.xlsx b/test_files/boolean_formats.xlsx index 80e5da0d9d6468dbbfa047f6ca7b58c94e1314c4..a7b35b9a8bdec5a44cea6f7d3fe33ef7b9c1a7e8 100644 GIT binary patch delta 324 zcmV-K0lWUfE5a+VK@bE0nW0vbMGzl<%}&EG5QOhMMdW+pI000wB$WX5L?tAOK-?^Q z+gijwvJGk89w%+nR_cl4dS<_!UAda?RGa*Q^U~-Vkt|0EXl{hmn;N~Z7U>KnJ}@m< zYc$lT2ao2>O~ozc#=)yI7F>|vlL*qDa$BQq2$o{(`4$xOS#;^I|2m9(1HRTJSG{zCvO0!{MCk0Iddu*dJ9*r zM{#oRyqt_(*JWKfT4R2KKbJ3WKr+rV^ZT2P_!ZRQSAzhijP+%Xs3o1o7BM z{mR>6k$oSCBnF`SxJTuqe0p9bEl^YB#cfK8G%r`=fs&Hm&Ax__7yT!oIt=+Y@d()G WzWfBuAG3!MxB~c^gQRMGzl<%TB{E5JmTVMdW?rJOF`}q!OU6sDwli6+6qG zwifY2wjs^mQebh8bka^aN1_XcLI zCvkG`otTVW*CkysT0{N}uUD_{Jgcy5x*73)E{{m-w;>hij?xt8n6t0RGem zedg`B$e|B-6atV#+>`QIJijz0OH}7+ej8_5oTp8mlj4!w6<@>1tNt@k9*6v!cmf=9 WUw?x753`36xB~>Bc^gQRLlu8|1)Q@0 diff --git a/test_files/date_formats.xlsx b/test_files/date_formats.xlsx index 61f26abe3996489214c751ed5b1db1aac58cf60e..58eb0a82b514b37da4a9a6d6747e1796e950d2bf 100644 GIT binary patch delta 324 zcmV-K0lWUTE4C}JH4p>KnJ}@m< zYc$lT2ao2>O~ozc#=)yI7F>|vlL*qDa$BQq2$o{(`4$xOS#;^I|2m9(1HRTJSG{zCvO0!{MCk0Iddu*dJ9*r zM{#oRyqt_(*JWKfT4R2KKbJ3WKr+rV^ZT2P_!ZRQSAzhijP+%Xs3o1o7BM z{mR>6k$oSCBnF`SxJTuqe0p9bEl^YB#cfK8G%r`=fs&Hm&Ax__7yT!oIt=+Y@d()G WzWfBuAG3QAxB~Qebh8bka^aN1_XcLI zCvkG`otTVW*CkysT0{N}uUD_{Jgcy5x*73)E{{m-w;>hij?xt8n6t0RGem zedg`B$e|B-6atV#+>`QIJijz0OH}7+ej8_5oTp8mlj4!w6<@>1tNt@k9*6v!cmf=9 WUw?x753_p_xB~>Bc^gQRHx++9b)0Je diff --git a/gen_test_files.py b/test_files/gen_test_files.py similarity index 89% rename from gen_test_files.py rename to test_files/gen_test_files.py index 06a9b1f..0394cbe 100644 --- a/gen_test_files.py +++ b/test_files/gen_test_files.py @@ -2,11 +2,6 @@ import pandas as pd from datetime import datetime import numpy as np import os - -def create_test_directory(): - """Create a directory for test files if it doesn't exist""" - if not os.path.exists('test_files'): - os.makedirs('test_files') def generate_number_formats_test(): """Test Case 1: Common spreadsheet number formats""" @@ -22,8 +17,7 @@ def generate_number_formats_test(): ] }) - # Create Excel writer with xlsxwriter engine - writer = pd.ExcelWriter('test_files/number_formats.xlsx', engine='xlsxwriter') + writer = pd.ExcelWriter('number_formats.xlsx', engine='xlsxwriter') df.to_excel(writer, index=False, sheet_name='Sheet1') # Get workbook and worksheet objects @@ -55,7 +49,7 @@ def generate_date_formats_test(): ] }) - writer = pd.ExcelWriter('test_files/date_formats.xlsx', engine='xlsxwriter') + writer = pd.ExcelWriter('date_formats.xlsx', engine='xlsxwriter') df.to_excel(writer, index=False, sheet_name='Sheet1') workbook = writer.book @@ -98,7 +92,7 @@ def generate_special_values_test(): ] }) - writer = pd.ExcelWriter('test_files/special_values.xlsx', engine='xlsxwriter') + writer = pd.ExcelWriter('special_values.xlsx', engine='xlsxwriter') df.to_excel(writer, index=False, sheet_name='Sheet1') writer.close() @@ -117,7 +111,7 @@ def generate_precision_test(): ] }) - writer = pd.ExcelWriter('test_files/precision.xlsx', engine='xlsxwriter') + writer = pd.ExcelWriter('precision.xlsx', engine='xlsxwriter') df.to_excel(writer, index=False, sheet_name='Sheet1') workbook = writer.book @@ -156,7 +150,7 @@ def generate_string_formats_test(): ] }) - writer = pd.ExcelWriter('test_files/string_formats.xlsx', engine='xlsxwriter') + writer = pd.ExcelWriter('string_formats.xlsx', engine='xlsxwriter') df.to_excel(writer, index=False, sheet_name='Sheet1') workbook = writer.book @@ -179,7 +173,7 @@ def generate_boolean_formats_test(): ] }) - writer = pd.ExcelWriter('test_files/boolean_formats.xlsx', engine='xlsxwriter') + writer = pd.ExcelWriter('boolean_formats.xlsx', engine='xlsxwriter') df.to_excel(writer, index=False, sheet_name='Sheet1') workbook = writer.book @@ -203,9 +197,6 @@ def generate_boolean_formats_test(): def main(): - """Geneate all test Excel files""" - create_test_directory() - print("Generating test Excel files...") generate_number_formats_test() generate_date_formats_test() @@ -213,7 +204,7 @@ def main(): generate_precision_test() generate_string_formats_test() generate_boolean_formats_test() - print("Test files generated in 'test_files' directory") + print("Test files generated") if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/test_files/number_formats.xlsx b/test_files/number_formats.xlsx index 8161803b9cb93bf54d29fef82265370467f396df..c1d477d3fba08d208a8a93a986143d1d13a83822 100644 GIT binary patch delta 324 zcmV-K0lWT#EQ2htf)E4%nW0vbh7ccr%}&EG5QOhMMdW+pI000wB$WX5L?tAOK-?^Q z+gijwvJGk89w%+nR_cl4dS<_!UAda?RGa*Q^U~-Vkt|0EXl{hmn;N~Z7U>KnJ}@m< zYc$lT2ao2>O~ozc#=)yI7F>|vlL*qDa$BQq2$o{(`4$xOS#;^I|2m9(1HRTJSG{zCvO0!{MCk0Iddu*dJ9*r zM{#oRyqt_(*JWKfT4R2KKbJ3WKr+rV^ZT2P_!ZRQSAzhijP+%Xs3o1o7BM z{mR>6k$oSCBnF`SxJTuqe0p9bEl^YB#cfK8G%r`=fs&Hm&Ax__7yT!oIt=+Y@d()G WzWfBuAG632xB~Qebh8bka^aN1_XcLI zCvkG`otTVW*CkysT0{N}uUD_{Jgcy5x*73)E{{m-w;>hij?xt8n6t0RGem zedg`B$e|B-6atV#+>`QIJijz0OH}7+ej8_5oTp8mlj4!w6<@>1tNt@k9*6v!cmf=9 WUw?x753|S-xB~>Bc^gQRgcW~*m7O~P diff --git a/test_files/precision.xlsx b/test_files/precision.xlsx index c437017e57aa9acae5b1a462a4940fd7efd25cff..d24693b944df20c7c0dcd5ab61a6c83f0a8c5e26 100644 GIT binary patch delta 324 zcmV-K0lWV7EA=a|bPxmpnW0vbcn}|d%}&EG5QOhMMdW+pI000wB$WX5L?tAOK-?^Q z+gijwvJGk89w%+nR_cl4dS<_!UAda?RGa*Q^U~-Vkt|0EXl{hmn;N~Z7U>KnJ}@m< zYc$lT2ao2>O~ozc#=)yI7F>|vlL*qDa$BQq2$o{(`4$xOS#;^I|2m9(1HRTJSG{zCvO0!{MCk0Iddu*dJ9*r zM{#oRyqt_(*JWKfT4R2KKbJ3WKr+rV^ZT2P_!ZRQSAzhijP+%Xs3o1o7BM z{mR>6k$oSCBnF`SxJTuqe0p9bEl^YB#cfK8G%r`=fs&Hm&Ax__7yT!oIt=+Y@d()G WzWfBuAG5jQebh8bka^aN1_XcLI zCvkG`otTVW*CkysT0{N}uUD_{Jgcy5x*73)E{{m-w;>hij?xt8n6t0RGem zedg`B$e|B-6atV#+>`QIJijz0OH}7+ej8_5oTp8mlj4!w6<@>1tNt@k9*6v!cmf=9 WUw?x753{-vxB~>Bc^gQRb`^hnc%9_{ diff --git a/requirements.txt b/test_files/requirements.txt similarity index 100% rename from requirements.txt rename to test_files/requirements.txt diff --git a/test_files/special_values.xlsx b/test_files/special_values.xlsx index 56a0a279680b95a9dc6b594094fe80e96e5ba5f5..e6934e086cbf48b18ad29b072dd69e38e6e7a1ce 100644 GIT binary patch delta 324 zcmV-K0lWUwD$^>k(hmdxnW0vb)(;KnJ}@m< zYc$lT2ao2>O~ozc#=)yI7F>|vlL*qDa$BQq2$o{(`4$xOS#;^I|2m9(1HRTJSG{zCvO0!{MCk0Iddu*dJ9*r zM{#oRyqt_(*JWKfT4R2KKbJ3WKr+rV^ZT2P_!ZRQSAzhijP+%Xs3o1o7BM z{mR>6k$oSCBnF`SxJTuqe0p9bEl^YB#cfK8G%r`=fs&Hm&Ax__7yT!oIt=+Y@d()G WzWfBuAF~({xB~k(hmfnc^gQR)(;Qebh8bka^aN1_XcLI zCvkG`otTVW*CkysT0{N}uUD_{Jgcy5x*73)E{{m-w;>hij?xt8n6t0RGem zedg`B$e|B-6atV#+>`QIJijz0OH}7+ej8_5oTp8mlj4!w6<@>1tNt@k9*6v!cmf=9 WUw?x753?8%xB~>Bc^gQR)D(XJ^_~;} diff --git a/test_files/string_formats.xlsx b/test_files/string_formats.xlsx index 59fc2fa35b878d8f29c6dabeeb9561a5ecc425ca..31649822afa4f221d554b932ccbec4b178b6b4c2 100644 GIT binary patch delta 332 zcmV-S0ki&&ERHO&jSvI?nW0vbkq{q$%}&EG5QOhMMdW+pI000wB$WX5L?tAOK-?^Q z+gijwvJGk89w%+nR_cl4dS<_!UAda?RGa*Q^U~-Vkt|0EXl{hmn;N~Z7U>KnJ}@m< zYc$lT2ao2>O~ozc#=)yI7F>|vlL*qDa$BQq2$o{(`4$xOS#;^I|2m9(1HRTJSG{zCvO0!{MCk0Iddu*dJ9*r zM{#oRyqt_(*JWKfT4R2KKbJ3WKr+rV^ZT2P_!ZRQSAzhijP+%Xs3o1o7BM z{mR>6k$oSCBnF`SxJTuqe0p9fEl^YB#cfK8G%r`=fs&Hm&Ax__7yT!oIt=+Y@d()G ezWfBuA5cpJ1e3rO8nf{cngav?nW0vbj}?DzoSD!7 delta 332 zcmV-S0ki&&ERHO&jSvK&c^gQRkq{q$%TB{E5JmTVMdW?rJOF`}q!OU6sDwli6+6qG zwifY2wjs^mQebh8bka^aN1_XcLI zCvkG`otTVW*CkysT0{N}uUD_{Jgcy5x*73)E{{m-w;>hij?xt8n6t0RGem zedg`B$e|B-6atV#+>`QIJijz4OH}7+ej8_5oTp8mlj4!w6<@>1tNt@k9*6v!cmf=9 eUw?x74^T@31e3rO8nf{cngax&c^gQRj}?ERv!A5^