anumaurya114exp commited on
Commit
14a0b87
·
1 Parent(s): ff48602

cache for tablesAndCols and minor changes

Browse files

Minor changes
1. Remove unused query helper tab. Keep only CoT
2. Generate csv result file (Rename the button)
3. Rename Run query (Setup tab) to schema and setup

Files changed (3) hide show
  1. app.py +20 -62
  2. configProd.py +2 -1
  3. utils.py +13 -1
app.py CHANGED
@@ -26,32 +26,14 @@ warnings.filterwarnings("ignore")
26
 
27
  dbCreds = DataWrapper(DB_CREDS_DATA)
28
  dbEngine = DbEngine(dbCreds)
29
-
30
  tablesAndCols = getAllTablesInfo(dbEngine, SCHEMA_NAME)
31
- ##ToDo Resolve it and remove ittablesAndCols not getting flags table.
32
- # tablesAndCols['tbl_d_product_style_flags'] = ["product_id", "contemp_style_flag", "trad_style_flag", "country_style_flag", "trans_style_flag",
33
- # "mc_style_flag", "farm_style_flag", "wi_style_flag","iron_style_flag","crystal_style_flag","coast_style_flag","rustic_style_flag","ind_style_flag",
34
- # "glam_style_flag","ac_style_flag","kids_style_flag","asian_style_flag","tiff_style_flag","trop_style_flag","um_style_flag",
35
- # "sw_style_flag", "themed_style_flag", "west_style_flag", "style", "sku#"]
36
 
37
  metadataLayout = MetaDataLayout(schemaName=SCHEMA_NAME, allTablesAndCols=tablesAndCols)
38
  metadataLayout.setSelection(DEFAULT_TABLES_COLS)
39
 
40
  selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
41
-
42
-
43
- openAIClient = OpenAI(api_key=OPENAI_API_KEY)
44
- gptInstanceForTableCols = ChatgptManager(openAIClient, model=GPT_MODEL)
45
- gptInstanceForQuery = ChatgptManager(openAIClient, model=GPT_MODEL)
46
- queryHelper = QueryHelper(gptInstanceForTableCols=gptInstanceForTableCols,
47
- gptInstanceForQuery=gptInstanceForQuery,
48
- schemaName=SCHEMA_NAME,platform=PLATFORM,
49
- metadataLayout=metadataLayout,
50
- sampleDataRows=SAMPLE_ROW_MAX,
51
- gptSampleRows=GPT_SAMPLE_ROWS,
52
- dbEngine=dbEngine,
53
- getSampleDataForTablesAndCols=getSampleDataForTablesAndCols)
54
-
55
 
56
  openAIClient2 = OpenAI(api_key=OPENAI_API_KEY)
57
  gptInstanceForCoT = ChatgptManager(openAIClient2, model=GPT_MODEL)
@@ -70,23 +52,6 @@ def checkAuth(username, password):
70
  return False
71
 
72
 
73
-
74
- # Function to save history of chat
75
- def respond(message, chatHistory):
76
- """gpt response handler for gradio ui"""
77
- global queryHelper
78
- try:
79
- botMessage = queryHelper.getQueryForUserInput(message)
80
- except Exception as e:
81
- errorMessage = {"function":"queryHelper.getQueryForUserInput","error":str(e), "userInput":message}
82
- saveLog(errorMessage, 'error')
83
- raise ValueError(str(e))
84
- queryGenerated = extractSqlFromGptResponse(botMessage)
85
- logMessage = {"userInput":message, "queryGenerated":queryGenerated, "completeGptResponse":botMessage, "function":"queryHelper.getQueryForUserInput"}
86
- saveLog(logMessage)
87
- chatHistory.append((message, botMessage))
88
- return "", chatHistory
89
-
90
  # Function to save history of chat
91
  def respondCoT(message, chatHistory):
92
  """gpt response handler for gradio ui"""
@@ -115,7 +80,7 @@ eg\n select * from schema.table limit 200\n"""
115
  return sql, disclaimerOutputStripping
116
 
117
  def onGetResultCsvFile(sql):
118
- global dbEngine, queryHelper
119
  sql, disclaimerOutputStripping = preProcessSQL(sql=sql)
120
  if not isDataQuery(sql):
121
  return "Sorry not allowed to run. As the query modifies the data."
@@ -142,7 +107,7 @@ def onGetResultCsvFile(sql):
142
  return fileComponent
143
 
144
  def testSQL(sql):
145
- global dbEngine, queryHelper
146
 
147
  sql, disclaimerOutputStripping = preProcessSQL(sql=sql)
148
  if not isDataQuery(sql):
@@ -167,9 +132,9 @@ def testSQL(sql):
167
 
168
  def onSelectedTablesChange(tablesSelected):
169
  #Updates tables visible and allow selecting columns for them
170
- global queryHelper
171
  print(f"Selected tables : {tablesSelected}")
172
- metadataLayout = queryHelper.getMetadata()
173
  allTablesAndCols = metadataLayout.getAllTablesCols()
174
  selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
175
  allTablesList = list(allTablesAndCols.keys())
@@ -189,8 +154,8 @@ def onSelectedTablesChange(tablesSelected):
189
 
190
  def onSelectedColumnsChange(*tableBoxes):
191
  #update selection of columns and tables (include new tables and cols in gpts context)
192
- global queryHelper
193
- metadataLayout = queryHelper.getMetadata()
194
  allTablesAndCols = metadataLayout.getAllTablesCols()
195
  allTablesList = list(allTablesAndCols.keys())
196
  tablesAndCols = {}
@@ -205,17 +170,17 @@ def onSelectedColumnsChange(*tableBoxes):
205
 
206
  metadataLayout.setSelection(tablesAndCols=tablesAndCols)
207
  print("metadata updated")
208
- print("Updating queryHelper state, and sample data")
209
- queryHelper.updateMetadata(metadataLayout)
210
  return "Columns udpated"
211
 
212
  def onResetToDefaultSelection():
213
- global queryHelper
214
- metadataLayout = queryHelper.getMetadata()
215
  metadataLayout.setSelection(tablesAndCols=tablesAndCols)
216
- queryHelper.updateMetadata(metadataLayout)
217
 
218
- metadataLayout = queryHelper.getMetadata()
219
  allTablesAndCols = metadataLayout.getAllTablesCols()
220
  selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
221
  allTablesList = list(allTablesAndCols.keys())
@@ -241,19 +206,12 @@ def onSyncLogsWithDataDir():
241
 
242
 
243
  with gr.Blocks() as demo:
244
- # screen 1 : Chatbot for question answering to generate sql query from user input in english
245
  with gr.Tab("Query Helper"):
246
  gr.Markdown("""<h1><center> Query Helper</center></h1>""")
247
  chatbot = gr.Chatbot()
248
  msg = gr.Textbox()
249
  clear = gr.ClearButton([msg, chatbot])
250
- msg.submit(respond, [msg, chatbot], [msg, chatbot])
251
-
252
- with gr.Tab("Query Helper CoT"):
253
- gr.Markdown("""<h1><center> Query Helper CoT</center></h1>""")
254
- chatbot = gr.Chatbot()
255
- msg = gr.Textbox()
256
- clear = gr.ClearButton([msg, chatbot])
257
  msg.submit(respondCoT, [msg, chatbot], [msg, chatbot])
258
 
259
  # screen 2 : To run sql query against database
@@ -268,15 +226,15 @@ with gr.Blocks() as demo:
268
  text_button.click(testSQL, inputs=text_input, outputs=[text_output, table_output])
269
 
270
  csvFileComponent = gr.File([], file_count='multiple')
271
- downloadCsv = gr.Button("Get result as csv")
272
  downloadCsv.click(onGetResultCsvFile, inputs=text_input, outputs=csvFileComponent)
273
 
274
  # screen 3 : To set creds, schema, tables and columns
275
  with gr.Tab("Setup"):
276
- gr.Markdown("""<h1><center> Run Query </center></h1>""")
277
  text_input = gr.Textbox(label = 'schema name', value= SCHEMA_NAME)
278
- allTablesAndCols = queryHelper.getMetadata().getAllTablesCols()
279
- selectedTablesAndCols = queryHelper.getMetadata().getSelectedTablesAndCols()
280
  allTablesList = list(allTablesAndCols.keys())
281
  selectedTablesList = list(selectedTablesAndCols.keys())
282
 
@@ -320,5 +278,5 @@ with gr.Blocks() as demo:
320
  fileComponent = gr.File(downloadableFilesPaths, file_count='multiple')
321
  refreshLogs = gr.Button("Sync Log files from /data")
322
  refreshLogs.click(onSyncLogsWithDataDir, inputs=None, outputs=fileComponent)
323
-
324
  demo.launch(share=True, debug=True, ssl_verify=False, auth=checkAuth)
 
26
 
27
  dbCreds = DataWrapper(DB_CREDS_DATA)
28
  dbEngine = DbEngine(dbCreds)
29
+ print("getting tablesAndCols..")
30
  tablesAndCols = getAllTablesInfo(dbEngine, SCHEMA_NAME)
31
+ print("Done.")
 
 
 
 
32
 
33
  metadataLayout = MetaDataLayout(schemaName=SCHEMA_NAME, allTablesAndCols=tablesAndCols)
34
  metadataLayout.setSelection(DEFAULT_TABLES_COLS)
35
 
36
  selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  openAIClient2 = OpenAI(api_key=OPENAI_API_KEY)
39
  gptInstanceForCoT = ChatgptManager(openAIClient2, model=GPT_MODEL)
 
52
  return False
53
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  # Function to save history of chat
56
  def respondCoT(message, chatHistory):
57
  """gpt response handler for gradio ui"""
 
80
  return sql, disclaimerOutputStripping
81
 
82
  def onGetResultCsvFile(sql):
83
+ global dbEngine, queryHelperCot
84
  sql, disclaimerOutputStripping = preProcessSQL(sql=sql)
85
  if not isDataQuery(sql):
86
  return "Sorry not allowed to run. As the query modifies the data."
 
107
  return fileComponent
108
 
109
  def testSQL(sql):
110
+ global dbEngine, queryHelperCot
111
 
112
  sql, disclaimerOutputStripping = preProcessSQL(sql=sql)
113
  if not isDataQuery(sql):
 
132
 
133
  def onSelectedTablesChange(tablesSelected):
134
  #Updates tables visible and allow selecting columns for them
135
+ global queryHelperCot
136
  print(f"Selected tables : {tablesSelected}")
137
+ metadataLayout = queryHelperCot.getMetadata()
138
  allTablesAndCols = metadataLayout.getAllTablesCols()
139
  selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
140
  allTablesList = list(allTablesAndCols.keys())
 
154
 
155
  def onSelectedColumnsChange(*tableBoxes):
156
  #update selection of columns and tables (include new tables and cols in gpts context)
157
+ global queryHelperCot
158
+ metadataLayout = queryHelperCot.getMetadata()
159
  allTablesAndCols = metadataLayout.getAllTablesCols()
160
  allTablesList = list(allTablesAndCols.keys())
161
  tablesAndCols = {}
 
170
 
171
  metadataLayout.setSelection(tablesAndCols=tablesAndCols)
172
  print("metadata updated")
173
+ print("Updating queryHelperCot state, and sample data")
174
+ queryHelperCot.updateMetadata(metadataLayout)
175
  return "Columns udpated"
176
 
177
  def onResetToDefaultSelection():
178
+ global queryHelperCot
179
+ metadataLayout = queryHelperCot.getMetadata()
180
  metadataLayout.setSelection(tablesAndCols=tablesAndCols)
181
+ queryHelperCot.updateMetadata(metadataLayout)
182
 
183
+ metadataLayout = queryHelperCot.getMetadata()
184
  allTablesAndCols = metadataLayout.getAllTablesCols()
185
  selectedTablesAndCols = metadataLayout.getSelectedTablesAndCols()
186
  allTablesList = list(allTablesAndCols.keys())
 
206
 
207
 
208
  with gr.Blocks() as demo:
209
+
210
  with gr.Tab("Query Helper"):
211
  gr.Markdown("""<h1><center> Query Helper</center></h1>""")
212
  chatbot = gr.Chatbot()
213
  msg = gr.Textbox()
214
  clear = gr.ClearButton([msg, chatbot])
 
 
 
 
 
 
 
215
  msg.submit(respondCoT, [msg, chatbot], [msg, chatbot])
216
 
217
  # screen 2 : To run sql query against database
 
226
  text_button.click(testSQL, inputs=text_input, outputs=[text_output, table_output])
227
 
228
  csvFileComponent = gr.File([], file_count='multiple')
229
+ downloadCsv = gr.Button("Generate csv result file")
230
  downloadCsv.click(onGetResultCsvFile, inputs=text_input, outputs=csvFileComponent)
231
 
232
  # screen 3 : To set creds, schema, tables and columns
233
  with gr.Tab("Setup"):
234
+ gr.Markdown("""<h1><center> Setup Tab </center></h1>""")
235
  text_input = gr.Textbox(label = 'schema name', value= SCHEMA_NAME)
236
+ allTablesAndCols = queryHelperCot.getMetadata().getAllTablesCols()
237
+ selectedTablesAndCols = queryHelperCot.getMetadata().getSelectedTablesAndCols()
238
  allTablesList = list(allTablesAndCols.keys())
239
  selectedTablesList = list(selectedTablesAndCols.keys())
240
 
 
278
  fileComponent = gr.File(downloadableFilesPaths, file_count='multiple')
279
  refreshLogs = gr.Button("Sync Log files from /data")
280
  refreshLogs.click(onSyncLogsWithDataDir, inputs=None, outputs=fileComponent)
281
+ print("Ready to launch...")
282
  demo.launch(share=True, debug=True, ssl_verify=False, auth=checkAuth)
configProd.py CHANGED
@@ -26,4 +26,5 @@ STORAGE_DIR = os.getenv("HF_HOME", "/data")
26
  logsDir = STORAGE_DIR
27
 
28
  TABLES_DATA_DIR = os.path.join(STORAGE_DIR, "tablesData")
29
- RESULT_CSV_DIR = os.path.join(STORAGE_DIR, "csvResults")
 
 
26
  logsDir = STORAGE_DIR
27
 
28
  TABLES_DATA_DIR = os.path.join(STORAGE_DIR, "tablesData")
29
+ RESULT_CSV_DIR = os.path.join(STORAGE_DIR, "csvResults")
30
+ SCHEMA_INFO_FILE_PATH = os.path.join(STORAGE_DIR, "schemaInfo.pickle")
utils.py CHANGED
@@ -2,6 +2,9 @@ import psycopg2
2
  import re
3
  import pandas as pd
4
  from persistStorage import retrieveTablesDataFromLocalDb, saveTablesDataToLocalDB
 
 
 
5
 
6
  class DataWrapper:
7
  def __init__(self, data):
@@ -105,8 +108,14 @@ def executeColumnsQuery(dbEngine, columnQuery):
105
  def closeDbEngine(dbEngine):
106
  dbEngine.disconnect()
107
 
108
- def getAllTablesInfo(dbEngine, schemaName):
 
 
 
 
 
109
  tablesAndCols = {}
 
110
  allTablesQuery = f"""SELECT table_name FROM information_schema.tables
111
  WHERE table_schema = '{schemaName}'"""
112
  tables = executeQuery(dbEngine, allTablesQuery)
@@ -115,6 +124,9 @@ def getAllTablesInfo(dbEngine, schemaName):
115
  columnsQuery = f"""Select * FROM {schemaName}.{tableName} LIMIT 0"""
116
  columns = executeColumnsQuery(dbEngine, columnsQuery)
117
  tablesAndCols[tableName] = columns
 
 
 
118
  return tablesAndCols
119
 
120
  def getSampleDataForTablesAndCols(dbEngine, schemaName, tablesAndCols, maxRows):
 
2
  import re
3
  import pandas as pd
4
  from persistStorage import retrieveTablesDataFromLocalDb, saveTablesDataToLocalDB
5
+ from config import SCHEMA_INFO_FILE_PATH
6
+ import os
7
+ import pickle
8
 
9
  class DataWrapper:
10
  def __init__(self, data):
 
108
  def closeDbEngine(dbEngine):
109
  dbEngine.disconnect()
110
 
111
+ def getAllTablesInfo(dbEngine, schemaName, useCache=True):
112
+ if useCache:
113
+ if os.path.isfile(SCHEMA_INFO_FILE_PATH):
114
+ with open(SCHEMA_INFO_FILE_PATH,'rb') as fh:
115
+ tablesAndCols = pickle.load(fh)
116
+ return tablesAndCols
117
  tablesAndCols = {}
118
+ print("Getting tables Info, list of tables and columns...")
119
  allTablesQuery = f"""SELECT table_name FROM information_schema.tables
120
  WHERE table_schema = '{schemaName}'"""
121
  tables = executeQuery(dbEngine, allTablesQuery)
 
124
  columnsQuery = f"""Select * FROM {schemaName}.{tableName} LIMIT 0"""
125
  columns = executeColumnsQuery(dbEngine, columnsQuery)
126
  tablesAndCols[tableName] = columns
127
+
128
+ with open(SCHEMA_INFO_FILE_PATH, 'wb') as fh:
129
+ pickle.dump(tablesAndCols, fh)
130
  return tablesAndCols
131
 
132
  def getSampleDataForTablesAndCols(dbEngine, schemaName, tablesAndCols, maxRows):