anumaurya114exp commited on
Commit
4966101
·
1 Parent(s): cc58896

reverting to previous commit

Browse files
Files changed (4) hide show
  1. app.py +6 -38
  2. configProd.py +3 -4
  3. persistStorage.py +1 -22
  4. queryHelperManagerCoT.py +2 -5
app.py CHANGED
@@ -9,7 +9,7 @@ import os
9
  import warnings
10
 
11
 
12
- from persistStorage import saveLog, getAllLogFilesPaths, getNewCsvFilePath, removeAllCsvFiles
13
  from config import *
14
  from constants import *
15
  from utils import *
@@ -19,7 +19,7 @@ from queryHelperManagerCoT import QueryHelperChainOfThought
19
 
20
 
21
  pd.set_option('display.max_columns', None)
22
- pd.set_option('display.max_rows', None)
23
 
24
  # Filter out all warning messages
25
  warnings.filterwarnings("ignore")
@@ -107,40 +107,13 @@ def preProcessSQL(sql):
107
  sql=sql.replace(';', '')
108
  disclaimerOutputStripping = ""
109
  if ('limit' in sql[-15:].lower())==False:
110
- sql = sql + ' ' + 'limit 100'
111
- disclaimerOutputStripping = """Results are stripped to show only top 100 rows.
112
- Please add your custom limit to get extended result.
113
- eg\n select * from schema.table limit 200n\n"""
114
  sql = sqlparse.format(sql, reindent=True, keyword_case='upper')
115
  return sql, disclaimerOutputStripping
116
 
117
- def onGetResultCsvFile(sql):
118
- global dbEngine, queryHelper
119
- sql, disclaimerOutputStripping = preProcessSQL(sql=sql)
120
- if not isDataQuery(sql):
121
- return "Sorry not allowed to run. As the query modifies the data."
122
- try:
123
- dbEngine2 = DbEngine(dbCreds)
124
- dbEngine2.connect()
125
- conn = dbEngine2.getConnection()
126
- df = pd.read_sql_query(sql, con=conn)
127
-
128
- dbEngine2.disconnect()
129
- # return disclaimerOutputStripping + str(pd.DataFrame(df))
130
- except Exception as e:
131
- # errorMessage = {"function":"testSQL","error":str(e), "userInput":sql}
132
- # saveLog(errorMessage, 'error')
133
- dbEngine2.disconnect()
134
- df = pd.DataFrame()
135
- # print(f"Error occured during running the query {sql}.\n and the error is {str(e)}")
136
-
137
- removeAllCsvFiles()
138
- csvFilePath = getNewCsvFilePath()
139
- df.to_csv(csvFilePath, index=False)
140
- downloadableFilesPaths = getAllLogFilesPaths()
141
- fileComponent = gr.File(csvFilePath)
142
- return fileComponent
143
-
144
  def testSQL(sql):
145
  global dbEngine, queryHelper
146
 
@@ -262,11 +235,6 @@ with gr.Blocks() as demo:
262
  text_button = gr.Button("RUN QUERY")
263
  clear = gr.ClearButton([text_input, text_output])
264
  text_button.click(testSQL, inputs=text_input, outputs=text_output)
265
-
266
- csvFileComponent = gr.File([], file_count='multiple')
267
- downloadCsv = gr.Button("Get result as csv")
268
- downloadCsv.click(onGetResultCsvFile, inputs=text_input, outputs=csvFileComponent)
269
-
270
  # screen 3 : To set creds, schema, tables and columns
271
  with gr.Tab("Setup"):
272
  gr.Markdown("""<h1><center> Run Query </center></h1>""")
 
9
  import warnings
10
 
11
 
12
+ from persistStorage import saveLog, getAllLogFilesPaths
13
  from config import *
14
  from constants import *
15
  from utils import *
 
19
 
20
 
21
  pd.set_option('display.max_columns', None)
22
+ pd.set_option('display.max_rows', 10)
23
 
24
  # Filter out all warning messages
25
  warnings.filterwarnings("ignore")
 
107
  sql=sql.replace(';', '')
108
  disclaimerOutputStripping = ""
109
  if ('limit' in sql[-15:].lower())==False:
110
+ sql = sql + ' ' + 'limit 5'
111
+ disclaimerOutputStripping = """Results are stripped to show only top 5 rows.
112
+ Please add your custom limit to get extend result.
113
+ eg\n select * from schema.table limit 20\n\n"""
114
  sql = sqlparse.format(sql, reindent=True, keyword_case='upper')
115
  return sql, disclaimerOutputStripping
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  def testSQL(sql):
118
  global dbEngine, queryHelper
119
 
 
235
  text_button = gr.Button("RUN QUERY")
236
  clear = gr.ClearButton([text_input, text_output])
237
  text_button.click(testSQL, inputs=text_input, outputs=text_output)
 
 
 
 
 
238
  # screen 3 : To set creds, schema, tables and columns
239
  with gr.Tab("Setup"):
240
  gr.Markdown("""<h1><center> Run Query </center></h1>""")
configProd.py CHANGED
@@ -22,8 +22,7 @@ PASSWD = os.getenv("passwd")
22
 
23
  DB_CREDS_DATA = ({"database":dbName, "user":userDB, "password":pwdDB, "host":host, "port":port})
24
 
25
- STORAGE_DIR = os.getenv("HF_HOME", "/data")
26
- logsDir = STORAGE_DIR
27
 
28
- TABLES_DATA_DIR = os.path.join(STORAGE_DIR, "tablesData")
29
- RESULT_CSV_DIR = os.path.join(STORAGE_DIR, "csvResults")
 
 
22
 
23
  DB_CREDS_DATA = ({"database":dbName, "user":userDB, "password":pwdDB, "host":host, "port":port})
24
 
 
 
25
 
26
+ logsDir = os.getenv("HF_HOME", "/data")
27
+
28
+ TABLES_DATA_DIR = os.path.join(os.getenv("HF_HOME", "/data"), "tablesData")
persistStorage.py CHANGED
@@ -3,41 +3,20 @@ import sqlite3
3
  from datetime import datetime, timedelta
4
  import pytz
5
  import os
6
- from config import HUGGING_FACE_TOKEN, TABLES_DATA_DIR, logsDir, RESULT_CSV_DIR
7
  import pandas as pd
8
  import csv
9
- import random
10
 
11
  try:
12
  os.makedirs(TABLES_DATA_DIR, exist_ok=True)
13
  except:
14
  pass
15
 
16
- try:
17
- os.makedirs(RESULT_CSV_DIR, exist_ok=True)
18
- except:
19
- pass
20
-
21
  # Set the time zone to Pacific Time Zone
22
  TIME_ZONE = 'US/Pacific'
23
  TIMEZONE_OBJ = pytz.timezone(TIME_ZONE)
24
  CACHE_TIME_EXPIRE = 20 #days
25
 
26
- def getNewCsvFilePath():
27
- fileName = "ResultCsv_" + "".join([str(random.randint(0,9)) for i in range(3)]) + ".csv"
28
- fileNameWithpath = os.path.join(RESULT_CSV_DIR, fileName)
29
- return fileNameWithpath
30
-
31
- def removeAllCsvFiles():
32
- files = os.listdir(RESULT_CSV_DIR)
33
- for fileName in files:
34
- fileNameWithPath = os.path.join(RESULT_CSV_DIR, fileName)
35
- try:
36
- os.remove(fileNameWithPath)
37
- except:
38
- pass
39
-
40
-
41
  def append_dict_to_csv(file_path, row_data):
42
  fieldnames = row_data.keys()
43
 
 
3
  from datetime import datetime, timedelta
4
  import pytz
5
  import os
6
+ from config import HUGGING_FACE_TOKEN, TABLES_DATA_DIR, logsDir
7
  import pandas as pd
8
  import csv
 
9
 
10
  try:
11
  os.makedirs(TABLES_DATA_DIR, exist_ok=True)
12
  except:
13
  pass
14
 
 
 
 
 
 
15
  # Set the time zone to Pacific Time Zone
16
  TIME_ZONE = 'US/Pacific'
17
  TIMEZONE_OBJ = pytz.timezone(TIME_ZONE)
18
  CACHE_TIME_EXPIRE = 20 #days
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def append_dict_to_csv(file_path, row_data):
21
  fieldnames = row_data.keys()
22
 
queryHelperManagerCoT.py CHANGED
@@ -59,11 +59,8 @@ class QueryHelperChainOfThought:
59
  except:
60
  print("Couldn't parse desired result from gpt response using method 2")
61
  if parsedSql:
62
- try:
63
- formattedSql = sqlparse.format(sqlResult, reindent=True)
64
- responseToReturn = formattedSql
65
- except:
66
- responseToReturn = sqlResult
67
  else:
68
  responseToReturn = gptResponse
69
  return responseToReturn
 
59
  except:
60
  print("Couldn't parse desired result from gpt response using method 2")
61
  if parsedSql:
62
+ formattedSql = sqlparse.format(sqlResult, reindent=True)
63
+ responseToReturn = formattedSql
 
 
 
64
  else:
65
  responseToReturn = gptResponse
66
  return responseToReturn