Spaces:
Runtime error
Runtime error
Commit ·
14a0b87
1
Parent(s): ff48602
cache for tablesAndCols and minor changes
Browse filesMinor changes
1. Remove unused query helper tab. Keep only CoT
2. Generate csv result file (Rename the button)
3. Rename Run query (Setup tab) to schema and setup
- app.py +20 -62
- configProd.py +2 -1
- utils.py +13 -1
app.py
CHANGED
|
@@ -26,32 +26,14 @@ warnings.filterwarnings("ignore")
|
|
| 26 |
|
| 27 |
dbCreds = DataWrapper(DB_CREDS_DATA)
|
| 28 |
dbEngine = DbEngine(dbCreds)
|
| 29 |
-
|
| 30 |
tablesAndCols = getAllTablesInfo(dbEngine, SCHEMA_NAME)
|
| 31 |
-
|
| 32 |
-
# tablesAndCols['tbl_d_product_style_flags'] = ["product_id", "contemp_style_flag", "trad_style_flag", "country_style_flag", "trans_style_flag",
|
| 33 |
-
# "mc_style_flag", "farm_style_flag", "wi_style_flag","iron_style_flag","crystal_style_flag","coast_style_flag","rustic_style_flag","ind_style_flag",
|
| 34 |
-
# "glam_style_flag","ac_style_flag","kids_style_flag","asian_style_flag","tiff_style_flag","trop_style_flag","um_style_flag",
|
| 35 |
-
# "sw_style_flag", "themed_style_flag", "west_style_flag", "style", "sku#"]
|
| 36 |
|
| 37 |
metadataLayout = MetaDataLayout(schemaName=SCHEMA_NAME, allTablesAndCols=tablesAndCols)
|
| 38 |
metadataLayout.setSelection(DEFAULT_TABLES_COLS)
|
| 39 |
|
| 40 |
selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
openAIClient = OpenAI(api_key=OPENAI_API_KEY)
|
| 44 |
-
gptInstanceForTableCols = ChatgptManager(openAIClient, model=GPT_MODEL)
|
| 45 |
-
gptInstanceForQuery = ChatgptManager(openAIClient, model=GPT_MODEL)
|
| 46 |
-
queryHelper = QueryHelper(gptInstanceForTableCols=gptInstanceForTableCols,
|
| 47 |
-
gptInstanceForQuery=gptInstanceForQuery,
|
| 48 |
-
schemaName=SCHEMA_NAME,platform=PLATFORM,
|
| 49 |
-
metadataLayout=metadataLayout,
|
| 50 |
-
sampleDataRows=SAMPLE_ROW_MAX,
|
| 51 |
-
gptSampleRows=GPT_SAMPLE_ROWS,
|
| 52 |
-
dbEngine=dbEngine,
|
| 53 |
-
getSampleDataForTablesAndCols=getSampleDataForTablesAndCols)
|
| 54 |
-
|
| 55 |
|
| 56 |
openAIClient2 = OpenAI(api_key=OPENAI_API_KEY)
|
| 57 |
gptInstanceForCoT = ChatgptManager(openAIClient2, model=GPT_MODEL)
|
|
@@ -70,23 +52,6 @@ def checkAuth(username, password):
|
|
| 70 |
return False
|
| 71 |
|
| 72 |
|
| 73 |
-
|
| 74 |
-
# Function to save history of chat
|
| 75 |
-
def respond(message, chatHistory):
|
| 76 |
-
"""gpt response handler for gradio ui"""
|
| 77 |
-
global queryHelper
|
| 78 |
-
try:
|
| 79 |
-
botMessage = queryHelper.getQueryForUserInput(message)
|
| 80 |
-
except Exception as e:
|
| 81 |
-
errorMessage = {"function":"queryHelper.getQueryForUserInput","error":str(e), "userInput":message}
|
| 82 |
-
saveLog(errorMessage, 'error')
|
| 83 |
-
raise ValueError(str(e))
|
| 84 |
-
queryGenerated = extractSqlFromGptResponse(botMessage)
|
| 85 |
-
logMessage = {"userInput":message, "queryGenerated":queryGenerated, "completeGptResponse":botMessage, "function":"queryHelper.getQueryForUserInput"}
|
| 86 |
-
saveLog(logMessage)
|
| 87 |
-
chatHistory.append((message, botMessage))
|
| 88 |
-
return "", chatHistory
|
| 89 |
-
|
| 90 |
# Function to save history of chat
|
| 91 |
def respondCoT(message, chatHistory):
|
| 92 |
"""gpt response handler for gradio ui"""
|
|
@@ -115,7 +80,7 @@ eg\n select * from schema.table limit 200\n"""
|
|
| 115 |
return sql, disclaimerOutputStripping
|
| 116 |
|
| 117 |
def onGetResultCsvFile(sql):
|
| 118 |
-
global dbEngine,
|
| 119 |
sql, disclaimerOutputStripping = preProcessSQL(sql=sql)
|
| 120 |
if not isDataQuery(sql):
|
| 121 |
return "Sorry not allowed to run. As the query modifies the data."
|
|
@@ -142,7 +107,7 @@ def onGetResultCsvFile(sql):
|
|
| 142 |
return fileComponent
|
| 143 |
|
| 144 |
def testSQL(sql):
|
| 145 |
-
global dbEngine,
|
| 146 |
|
| 147 |
sql, disclaimerOutputStripping = preProcessSQL(sql=sql)
|
| 148 |
if not isDataQuery(sql):
|
|
@@ -167,9 +132,9 @@ def testSQL(sql):
|
|
| 167 |
|
| 168 |
def onSelectedTablesChange(tablesSelected):
|
| 169 |
#Updates tables visible and allow selecting columns for them
|
| 170 |
-
global
|
| 171 |
print(f"Selected tables : {tablesSelected}")
|
| 172 |
-
metadataLayout =
|
| 173 |
allTablesAndCols = metadataLayout.getAllTablesCols()
|
| 174 |
selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
|
| 175 |
allTablesList = list(allTablesAndCols.keys())
|
|
@@ -189,8 +154,8 @@ def onSelectedTablesChange(tablesSelected):
|
|
| 189 |
|
| 190 |
def onSelectedColumnsChange(*tableBoxes):
|
| 191 |
#update selection of columns and tables (include new tables and cols in gpts context)
|
| 192 |
-
global
|
| 193 |
-
metadataLayout =
|
| 194 |
allTablesAndCols = metadataLayout.getAllTablesCols()
|
| 195 |
allTablesList = list(allTablesAndCols.keys())
|
| 196 |
tablesAndCols = {}
|
|
@@ -205,17 +170,17 @@ def onSelectedColumnsChange(*tableBoxes):
|
|
| 205 |
|
| 206 |
metadataLayout.setSelection(tablesAndCols=tablesAndCols)
|
| 207 |
print("metadata updated")
|
| 208 |
-
print("Updating
|
| 209 |
-
|
| 210 |
return "Columns udpated"
|
| 211 |
|
| 212 |
def onResetToDefaultSelection():
|
| 213 |
-
global
|
| 214 |
-
metadataLayout =
|
| 215 |
metadataLayout.setSelection(tablesAndCols=tablesAndCols)
|
| 216 |
-
|
| 217 |
|
| 218 |
-
metadataLayout =
|
| 219 |
allTablesAndCols = metadataLayout.getAllTablesCols()
|
| 220 |
selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
|
| 221 |
allTablesList = list(allTablesAndCols.keys())
|
|
@@ -241,19 +206,12 @@ def onSyncLogsWithDataDir():
|
|
| 241 |
|
| 242 |
|
| 243 |
with gr.Blocks() as demo:
|
| 244 |
-
|
| 245 |
with gr.Tab("Query Helper"):
|
| 246 |
gr.Markdown("""<h1><center> Query Helper</center></h1>""")
|
| 247 |
chatbot = gr.Chatbot()
|
| 248 |
msg = gr.Textbox()
|
| 249 |
clear = gr.ClearButton([msg, chatbot])
|
| 250 |
-
msg.submit(respond, [msg, chatbot], [msg, chatbot])
|
| 251 |
-
|
| 252 |
-
with gr.Tab("Query Helper CoT"):
|
| 253 |
-
gr.Markdown("""<h1><center> Query Helper CoT</center></h1>""")
|
| 254 |
-
chatbot = gr.Chatbot()
|
| 255 |
-
msg = gr.Textbox()
|
| 256 |
-
clear = gr.ClearButton([msg, chatbot])
|
| 257 |
msg.submit(respondCoT, [msg, chatbot], [msg, chatbot])
|
| 258 |
|
| 259 |
# screen 2 : To run sql query against database
|
|
@@ -268,15 +226,15 @@ with gr.Blocks() as demo:
|
|
| 268 |
text_button.click(testSQL, inputs=text_input, outputs=[text_output, table_output])
|
| 269 |
|
| 270 |
csvFileComponent = gr.File([], file_count='multiple')
|
| 271 |
-
downloadCsv = gr.Button("
|
| 272 |
downloadCsv.click(onGetResultCsvFile, inputs=text_input, outputs=csvFileComponent)
|
| 273 |
|
| 274 |
# screen 3 : To set creds, schema, tables and columns
|
| 275 |
with gr.Tab("Setup"):
|
| 276 |
-
gr.Markdown("""<h1><center>
|
| 277 |
text_input = gr.Textbox(label = 'schema name', value= SCHEMA_NAME)
|
| 278 |
-
allTablesAndCols =
|
| 279 |
-
selectedTablesAndCols =
|
| 280 |
allTablesList = list(allTablesAndCols.keys())
|
| 281 |
selectedTablesList = list(selectedTablesAndCols.keys())
|
| 282 |
|
|
@@ -320,5 +278,5 @@ with gr.Blocks() as demo:
|
|
| 320 |
fileComponent = gr.File(downloadableFilesPaths, file_count='multiple')
|
| 321 |
refreshLogs = gr.Button("Sync Log files from /data")
|
| 322 |
refreshLogs.click(onSyncLogsWithDataDir, inputs=None, outputs=fileComponent)
|
| 323 |
-
|
| 324 |
demo.launch(share=True, debug=True, ssl_verify=False, auth=checkAuth)
|
|
|
|
| 26 |
|
| 27 |
dbCreds = DataWrapper(DB_CREDS_DATA)
|
| 28 |
dbEngine = DbEngine(dbCreds)
|
| 29 |
+
print("getting tablesAndCols..")
|
| 30 |
tablesAndCols = getAllTablesInfo(dbEngine, SCHEMA_NAME)
|
| 31 |
+
print("Done.")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
metadataLayout = MetaDataLayout(schemaName=SCHEMA_NAME, allTablesAndCols=tablesAndCols)
|
| 34 |
metadataLayout.setSelection(DEFAULT_TABLES_COLS)
|
| 35 |
|
| 36 |
selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
openAIClient2 = OpenAI(api_key=OPENAI_API_KEY)
|
| 39 |
gptInstanceForCoT = ChatgptManager(openAIClient2, model=GPT_MODEL)
|
|
|
|
| 52 |
return False
|
| 53 |
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
# Function to save history of chat
|
| 56 |
def respondCoT(message, chatHistory):
|
| 57 |
"""gpt response handler for gradio ui"""
|
|
|
|
| 80 |
return sql, disclaimerOutputStripping
|
| 81 |
|
| 82 |
def onGetResultCsvFile(sql):
|
| 83 |
+
global dbEngine, queryHelperCot
|
| 84 |
sql, disclaimerOutputStripping = preProcessSQL(sql=sql)
|
| 85 |
if not isDataQuery(sql):
|
| 86 |
return "Sorry not allowed to run. As the query modifies the data."
|
|
|
|
| 107 |
return fileComponent
|
| 108 |
|
| 109 |
def testSQL(sql):
|
| 110 |
+
global dbEngine, queryHelperCot
|
| 111 |
|
| 112 |
sql, disclaimerOutputStripping = preProcessSQL(sql=sql)
|
| 113 |
if not isDataQuery(sql):
|
|
|
|
| 132 |
|
| 133 |
def onSelectedTablesChange(tablesSelected):
|
| 134 |
#Updates tables visible and allow selecting columns for them
|
| 135 |
+
global queryHelperCot
|
| 136 |
print(f"Selected tables : {tablesSelected}")
|
| 137 |
+
metadataLayout = queryHelperCot.getMetadata()
|
| 138 |
allTablesAndCols = metadataLayout.getAllTablesCols()
|
| 139 |
selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
|
| 140 |
allTablesList = list(allTablesAndCols.keys())
|
|
|
|
| 154 |
|
| 155 |
def onSelectedColumnsChange(*tableBoxes):
|
| 156 |
#update selection of columns and tables (include new tables and cols in gpts context)
|
| 157 |
+
global queryHelperCot
|
| 158 |
+
metadataLayout = queryHelperCot.getMetadata()
|
| 159 |
allTablesAndCols = metadataLayout.getAllTablesCols()
|
| 160 |
allTablesList = list(allTablesAndCols.keys())
|
| 161 |
tablesAndCols = {}
|
|
|
|
| 170 |
|
| 171 |
metadataLayout.setSelection(tablesAndCols=tablesAndCols)
|
| 172 |
print("metadata updated")
|
| 173 |
+
print("Updating queryHelperCot state, and sample data")
|
| 174 |
+
queryHelperCot.updateMetadata(metadataLayout)
|
| 175 |
return "Columns udpated"
|
| 176 |
|
| 177 |
def onResetToDefaultSelection():
|
| 178 |
+
global queryHelperCot
|
| 179 |
+
metadataLayout = queryHelperCot.getMetadata()
|
| 180 |
metadataLayout.setSelection(tablesAndCols=tablesAndCols)
|
| 181 |
+
queryHelperCot.updateMetadata(metadataLayout)
|
| 182 |
|
| 183 |
+
metadataLayout = queryHelperCot.getMetadata()
|
| 184 |
allTablesAndCols = metadataLayout.getAllTablesCols()
|
| 185 |
selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
|
| 186 |
allTablesList = list(allTablesAndCols.keys())
|
|
|
|
| 206 |
|
| 207 |
|
| 208 |
with gr.Blocks() as demo:
|
| 209 |
+
|
| 210 |
with gr.Tab("Query Helper"):
|
| 211 |
gr.Markdown("""<h1><center> Query Helper</center></h1>""")
|
| 212 |
chatbot = gr.Chatbot()
|
| 213 |
msg = gr.Textbox()
|
| 214 |
clear = gr.ClearButton([msg, chatbot])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
msg.submit(respondCoT, [msg, chatbot], [msg, chatbot])
|
| 216 |
|
| 217 |
# screen 2 : To run sql query against database
|
|
|
|
| 226 |
text_button.click(testSQL, inputs=text_input, outputs=[text_output, table_output])
|
| 227 |
|
| 228 |
csvFileComponent = gr.File([], file_count='multiple')
|
| 229 |
+
downloadCsv = gr.Button("Generate csv result file")
|
| 230 |
downloadCsv.click(onGetResultCsvFile, inputs=text_input, outputs=csvFileComponent)
|
| 231 |
|
| 232 |
# screen 3 : To set creds, schema, tables and columns
|
| 233 |
with gr.Tab("Setup"):
|
| 234 |
+
gr.Markdown("""<h1><center> Setup Tab </center></h1>""")
|
| 235 |
text_input = gr.Textbox(label = 'schema name', value= SCHEMA_NAME)
|
| 236 |
+
allTablesAndCols = queryHelperCot.getMetadata().getAllTablesCols()
|
| 237 |
+
selectedTablesAndCols = queryHelperCot.getMetadata().getSelectedTablesAndCols()
|
| 238 |
allTablesList = list(allTablesAndCols.keys())
|
| 239 |
selectedTablesList = list(selectedTablesAndCols.keys())
|
| 240 |
|
|
|
|
| 278 |
fileComponent = gr.File(downloadableFilesPaths, file_count='multiple')
|
| 279 |
refreshLogs = gr.Button("Sync Log files from /data")
|
| 280 |
refreshLogs.click(onSyncLogsWithDataDir, inputs=None, outputs=fileComponent)
|
| 281 |
+
print("Ready to launch...")
|
| 282 |
demo.launch(share=True, debug=True, ssl_verify=False, auth=checkAuth)
|
configProd.py
CHANGED
|
@@ -26,4 +26,5 @@ STORAGE_DIR = os.getenv("HF_HOME", "/data")
|
|
| 26 |
logsDir = STORAGE_DIR
|
| 27 |
|
| 28 |
TABLES_DATA_DIR = os.path.join(STORAGE_DIR, "tablesData")
|
| 29 |
-
RESULT_CSV_DIR = os.path.join(STORAGE_DIR, "csvResults")
|
|
|
|
|
|
| 26 |
logsDir = STORAGE_DIR
|
| 27 |
|
| 28 |
TABLES_DATA_DIR = os.path.join(STORAGE_DIR, "tablesData")
|
| 29 |
+
RESULT_CSV_DIR = os.path.join(STORAGE_DIR, "csvResults")
|
| 30 |
+
SCHEMA_INFO_FILE_PATH = os.path.join(STORAGE_DIR, "schemaInfo.pickle")
|
utils.py
CHANGED
|
@@ -2,6 +2,9 @@ import psycopg2
|
|
| 2 |
import re
|
| 3 |
import pandas as pd
|
| 4 |
from persistStorage import retrieveTablesDataFromLocalDb, saveTablesDataToLocalDB
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
class DataWrapper:
|
| 7 |
def __init__(self, data):
|
|
@@ -105,8 +108,14 @@ def executeColumnsQuery(dbEngine, columnQuery):
|
|
| 105 |
def closeDbEngine(dbEngine):
|
| 106 |
dbEngine.disconnect()
|
| 107 |
|
| 108 |
-
def getAllTablesInfo(dbEngine, schemaName):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
tablesAndCols = {}
|
|
|
|
| 110 |
allTablesQuery = f"""SELECT table_name FROM information_schema.tables
|
| 111 |
WHERE table_schema = '{schemaName}'"""
|
| 112 |
tables = executeQuery(dbEngine, allTablesQuery)
|
|
@@ -115,6 +124,9 @@ def getAllTablesInfo(dbEngine, schemaName):
|
|
| 115 |
columnsQuery = f"""Select * FROM {schemaName}.{tableName} LIMIT 0"""
|
| 116 |
columns = executeColumnsQuery(dbEngine, columnsQuery)
|
| 117 |
tablesAndCols[tableName] = columns
|
|
|
|
|
|
|
|
|
|
| 118 |
return tablesAndCols
|
| 119 |
|
| 120 |
def getSampleDataForTablesAndCols(dbEngine, schemaName, tablesAndCols, maxRows):
|
|
|
|
| 2 |
import re
|
| 3 |
import pandas as pd
|
| 4 |
from persistStorage import retrieveTablesDataFromLocalDb, saveTablesDataToLocalDB
|
| 5 |
+
from config import SCHEMA_INFO_FILE_PATH
|
| 6 |
+
import os
|
| 7 |
+
import pickle
|
| 8 |
|
| 9 |
class DataWrapper:
|
| 10 |
def __init__(self, data):
|
|
|
|
| 108 |
def closeDbEngine(dbEngine):
|
| 109 |
dbEngine.disconnect()
|
| 110 |
|
| 111 |
+
def getAllTablesInfo(dbEngine, schemaName, useCache=True):
|
| 112 |
+
if useCache:
|
| 113 |
+
if os.path.isfile(SCHEMA_INFO_FILE_PATH):
|
| 114 |
+
with open(SCHEMA_INFO_FILE_PATH,'rb') as fh:
|
| 115 |
+
tablesAndCols = pickle.load(fh)
|
| 116 |
+
return tablesAndCols
|
| 117 |
tablesAndCols = {}
|
| 118 |
+
print("Getting tables Info, list of tables and columns...")
|
| 119 |
allTablesQuery = f"""SELECT table_name FROM information_schema.tables
|
| 120 |
WHERE table_schema = '{schemaName}'"""
|
| 121 |
tables = executeQuery(dbEngine, allTablesQuery)
|
|
|
|
| 124 |
columnsQuery = f"""Select * FROM {schemaName}.{tableName} LIMIT 0"""
|
| 125 |
columns = executeColumnsQuery(dbEngine, columnsQuery)
|
| 126 |
tablesAndCols[tableName] = columns
|
| 127 |
+
|
| 128 |
+
with open(SCHEMA_INFO_FILE_PATH, 'wb') as fh:
|
| 129 |
+
pickle.dump(tablesAndCols, fh)
|
| 130 |
return tablesAndCols
|
| 131 |
|
| 132 |
def getSampleDataForTablesAndCols(dbEngine, schemaName, tablesAndCols, maxRows):
|