Spaces:

Express-Analytics
/

QueryHelper

Runtime error

App Files Files Community

anumaurya114exp commited on Dec 22, 2023

Commit

4099f5c

1 Parent(s): 5cb76b5

added different history and context for different logins

Browse files

Files changed (4) hide show

app.py +24 -11
configProd.py +2 -1
gptManager.py +31 -5
queryHelperManagerCoT.py +65 -11

app.py CHANGED Viewed

@@ -14,7 +14,6 @@ from config import *
 from constants import *
 from utils import *
 from gptManager import ChatgptManager
-from queryHelperManager import QueryHelper
 from queryHelperManagerCoT import QueryHelperChainOfThought
@@ -24,6 +23,7 @@ pd.set_option('display.max_rows', None)
 # Filter out all warning messages
 warnings.filterwarnings("ignore")
 dbCreds = DataWrapper(DB_CREDS_DATA)
 dbEngine = DbEngine(dbCreds)
 print("getting tablesAndCols..")
@@ -48,24 +48,31 @@ queryHelperCot = QueryHelperChainOfThought(gptInstanceForCoT=gptInstanceForCoT,
 def checkAuth(username, password):
   global ADMIN, PASSWD
   if username == ADMIN and password == PASSWD:
-      return True
   return False
 # Function to save history of chat
-def respondCoT(message, chatHistory):
   """gpt response handler for gradio ui"""
   global queryHelperCot
   try:
-    botMessage  = queryHelperCot.getQueryForUserInputCoT(message)
   except Exception as e:
-    errorMessage = {"function":"queryHelperCot.getQueryForUserInput","error":str(e), "userInput":message}
     saveLog(errorMessage, 'error')
     raise ValueError(str(e))
-  logMessage = {"userInput":message, "completeGptResponse":botMessage, "function":"queryHelperCot.getQueryForUserInputCoT"}
   saveLog(logMessage)
   chatHistory.append((message, botMessage))
-  return "", chatHistory
 def preProcessSQL(sql):
@@ -206,13 +213,19 @@ def onSyncLogsWithDataDir():
 with gr.Blocks() as demo:
     with gr.Tab("Query Helper"):
         gr.Markdown("""<h1><center> Query Helper</center></h1>""")
         chatbot = gr.Chatbot()
         msg = gr.Textbox()
-        clear = gr.ClearButton([msg, chatbot])
-        msg.submit(respondCoT, [msg, chatbot], [msg, chatbot])
     # screen 2 : To run sql query against database
     with gr.Tab("Run Query"):

 from constants import *
 from utils import *
 from gptManager import ChatgptManager
 from queryHelperManagerCoT import QueryHelperChainOfThought
 # Filter out all warning messages
 warnings.filterwarnings("ignore")
+LOGGED_IN_USERS = []
 dbCreds = DataWrapper(DB_CREDS_DATA)
 dbEngine = DbEngine(dbCreds)
 print("getting tablesAndCols..")
 def checkAuth(username, password):
   global ADMIN, PASSWD
   if username == ADMIN and password == PASSWD:
+    LOGGED_IN_USERS.append(username)
+    print("user logged in...",username)
+    return True
   return False
 # Function to save history of chat
+def respondCoT(message, chatHistory, verboseChatHistory, loggedUser):
   """gpt response handler for gradio ui"""
   global queryHelperCot
+  if len(loggedUser)==0:
+    loggedUser.append(LOGGED_IN_USERS[-1])
   try:
+    # botMessage  = queryHelperCot.getQueryForUserInputCoT(message)
+    botMessage, verboseBotMessage  = queryHelperCot.getQueryForUserInputWithHistory(verboseChatHistory, message)
   except Exception as e:
+    errorMessage = {"function":"queryHelperCot.getQueryForUserInputWithHistory","error":str(e), "userInput":message}
     saveLog(errorMessage, 'error')
     raise ValueError(str(e))
+  logMessage = {"userInput":message, "completeGptResponse":verboseBotMessage,
+                "parsedResponse":botMessage, "function":"queryHelperCot.getQueryForUserInputWithHistory"}
   saveLog(logMessage)
   chatHistory.append((message, botMessage))
+  verboseChatHistory.append((message, verboseBotMessage))
+  return "", chatHistory, verboseChatHistory, loggedUser
 def preProcessSQL(sql):
 with gr.Blocks() as demo:
+    loggedUser = gr.State([])
+    verboseChatHistory = gr.State([])
     with gr.Tab("Query Helper"):
         gr.Markdown("""<h1><center> Query Helper</center></h1>""")
         chatbot = gr.Chatbot()
         msg = gr.Textbox()
+        def clearChatHistory():
+          return []
+        clear = gr.ClearButton([msg, chatbot, verboseChatHistory], value="Clear Chat")
+        clearButton = gr.Button("Clear Context")
+        clearButton.click(clearChatHistory, inputs=None, outputs=[verboseChatHistory])
+        msg.submit(respondCoT, [msg, chatbot, verboseChatHistory, loggedUser], [msg, chatbot, verboseChatHistory, loggedUser])
     # screen 2 : To run sql query against database
     with gr.Tab("Run Query"):

configProd.py CHANGED Viewed

@@ -27,4 +27,5 @@ logsDir = STORAGE_DIR
 TABLES_DATA_DIR = os.path.join(STORAGE_DIR, "tablesData")
 RESULT_CSV_DIR = os.path.join(STORAGE_DIR, "csvResults")
-SCHEMA_INFO_FILE_PATH = os.path.join(STORAGE_DIR, "schemaInfo.pickle")

 TABLES_DATA_DIR = os.path.join(STORAGE_DIR, "tablesData")
 RESULT_CSV_DIR = os.path.join(STORAGE_DIR, "csvResults")
+SCHEMA_INFO_FILE_PATH = os.path.join(STORAGE_DIR, "schemaInfo.pickle")
+# USERS_INFO_FILE_PATH = os.path.join(STORAGE_DIR, "usersInfo.pickle")

gptManager.py CHANGED Viewed

@@ -17,13 +17,39 @@ class ChatgptManager:
     else:
       del self.messages[0]
       self.messages.insert(0, systemMessage)
   def getResponseForUserInput(self, userInput):
     #send only recent history to gpt.
-    self.messages = self.getRecentContextOnly()
     userMessage = {"role":"user", "content":userInput}
     self.messages.append(userMessage)
     print(self.messages, "messages being sent to gpt for completion.")
     try:
       completion = self.client.chat.completions.create(
         model=self.model,
@@ -39,8 +65,8 @@ class ChatgptManager:
     self.messages.append({"role": "assistant", "content": gptResponse})
     return gptResponse
-  def getRecentContextOnly(self):
     #take only systemp prompt and recent self.contextHistoryLen user input and self.contextHistoryLen assistant messages
-    if len(self.messages)<2*self.contextHistoryLen+1:
-        return self.messages[:]
-    return [self.messages[0]] + self.messages[-2*self.contextHistoryLen:]

     else:
       del self.messages[0]
       self.messages.insert(0, systemMessage)
+  def getResponseForChatHistory(self, chatHistory, userInput):
+    """
+    Assume incoming chat History as [("user message", "bot message"), ("user message", "bot message")]
+    and systemp prompt is fixed.
+    """
+    messages = [self.messages[0]]
+    for history in chatHistory:
+      userMessage, botMessage = history
+      messages.append({"role":"user", "content":userMessage})
+      messages.append({"role":"assistant", "content":botMessage})
+    messages = self.getRecentContextOnly(messages)
+    messages.append({"role":"user", "content":userInput})
+    try:
+      completion = self.client.chat.completions.create(
+        model=self.model,
+        messages=messages,
+        temperature=0,
+      )
+      gptResponse = completion.choices[0].message.content
+    except Exception as e:
+      if not self.throwError:
+        errorText = "Error while connecting with gpt " + str(e)[:100] + "..."
+        return errorText
+    return gptResponse
   def getResponseForUserInput(self, userInput):
     #send only recent history to gpt.
+    self.messages = self.getRecentContextOnly(self.messages)
     userMessage = {"role":"user", "content":userInput}
     self.messages.append(userMessage)
     print(self.messages, "messages being sent to gpt for completion.")
+    print(help(self.client.chat.completions.create),'\n\n\n')
     try:
       completion = self.client.chat.completions.create(
         model=self.model,
     self.messages.append({"role": "assistant", "content": gptResponse})
     return gptResponse
+  def getRecentContextOnly(self, messages):
     #take only systemp prompt and recent self.contextHistoryLen user input and self.contextHistoryLen assistant messages
+    if len(messages)<2*self.contextHistoryLen+1:
+        return messages[:]
+    return [messages[0]] + messages[-2*self.contextHistoryLen:]

queryHelperManagerCoT.py CHANGED Viewed

@@ -37,6 +37,50 @@ class QueryHelperChainOfThought:
     self.metadataLayout = metadataLayout
     self._onMetadataChange()
   def getQueryForUserInputCoT(self, userInput):
     prompt = self.getPromptForCot()
     self.gptInstanceForCoT.setSystemPrompt(prompt)
@@ -45,10 +89,11 @@ class QueryHelperChainOfThought:
     parsedSql = False
     if tryParsing:
       try:
-        txt = gptResponse.split("```json")[-1].split('```')[0].replace('\n', '')
         sqlResult = json.loads(txt)['finalResult']
         parsedSql = True
         tryParsing = False
       except:
         print("Couldn't parse desired result from gpt response using method 1.")
     if tryParsing:
@@ -56,8 +101,17 @@ class QueryHelperChainOfThought:
         sqlResult = json.loads(gptResponse)['finalResult']
         parsedSql = True
         tryParsing = False
       except:
         print("Couldn't parse desired result from gpt response using method 2")
     if parsedSql:
       isFormatted = False
       try:
@@ -100,7 +154,7 @@ class QueryHelperChainOfThought:
           },
           "subquery2": {
             "inputSubquery": ["subquery1"],
-            "description":"extracts state, category, and total sales information from a subquery named "subquery1," filtering the results to include only categories with ranks up to 5 and sorting them by state and category rank."
             "result":"SELECT state, category, total_sales
 FROM ranked_categories
 WHERE category_rank <= 5
@@ -126,13 +180,13 @@ ORDER BY state, category_rank"
     promptColumnsInfo = self.getSystemPromptForQuery(selectedTablesAndCols)
     prompt = f"""You are a powerful text to sql model. Your task is to return sql query which answers
-    user's input. Please follow subquery structure if the sql needs to have multiple subqueries.
     ###example userInput is {egUserInput}. output is {cotSubtaskOutput}. Output should be in json format as provided. Only output should be in response, nothing else.\n\n
     tables information are {promptTableInfo}.
-    columns data are {promptColumnsInfo}.
     """
-    prompt += f"and table Relations are {TABLE_RELATIONS}"
     return prompt
@@ -145,11 +199,11 @@ ORDER BY state, category_rank"
     promptTableInfo = f"""You are a powerful text to sql model. Answer which tables and columns are needed
     to answer user input using sql query. and following are tables and columns info. and example user input and result query."""
     for idx, tableName in enumerate(selectedTablesAndCols.keys(), start=1):
-        promptTableInfo += f"table name {tableName} and summary is {tableSummaryDict[tableName]}"
-        promptTableInfo += f" and columns {', '.join(selectedTablesAndCols[tableName])} \n"
     promptTableInfo += "XXXX"
     #Join statements
-    promptTableInfo += f"and table Relations are {TABLE_RELATIONS}"
     return promptTableInfo
@@ -166,9 +220,9 @@ GROUP BY a.customer_id
 ORDER BY chandelier_count DESC"""
     question = "top 5 customers who bought most chandeliers in nov 2023"
-    promptForQuery = f"""You are a powerful text to sql model. Answer user input with sql query. And the query needs to run on {platform}. and schemaName is {schemaName}. There is example user input and desired generated sql query. Follow similar patterns as example. eg case insensitive, explicit variable declaration etc. user input : {question}, query : {exampleQuery}. and table's data is \n"""
     for idx, tableName in enumerate(prospectTablesAndCols.keys(), start=1):
-        promptForQuery += f"table name is {tableName}, table data is {self.sampleData[tableName][prospectTablesAndCols[tableName]].head(self.gptSampleRows)}"
-    promptForQuery += f"and table Relations are {TABLE_RELATIONS}"
     return promptForQuery.replace("\\"," ").replace("  "," ").replace("XXXX", "    ")

     self.metadataLayout = metadataLayout
     self._onMetadataChange()
+  def getQueryForUserInputWithHistory(self, verboseChatHistory, userInput):
+    prompt = self.getPromptForCot()
+    self.gptInstanceForCoT.setSystemPrompt(prompt)
+    gptResponse = self.gptInstanceForCoT.getResponseForChatHistory(verboseChatHistory, userInput)
+    verboseResponse = gptResponse
+    tryParsing = True
+    parsedSql = False
+    if tryParsing:
+      try:
+        txt = gptResponse.split("```json")[-1].split('```')[0].replace('\n', ' ')
+        sqlResult = json.loads(txt)['finalResult']
+        parsedSql = True
+        tryParsing = False
+        print("parsed desired result from gpt response using method 1.")
+      except:
+        print("Couldn't parse desired result from gpt response using method 1.")
+    if tryParsing:
+      try:
+        sqlResult = json.loads(gptResponse.replace("```json","").replace("```","").replace('\n', ' '))['finalResult']
+        parsedSql = True
+        tryParsing = False
+        print("parsed desired result from gpt response using method 2.")
+      except:
+        print("Couldn't parse desired result from gpt response using method 2")
+    if parsedSql:
+      isFormatted = False
+      try:
+        formattedSql = sqlparse.format(sqlResult, reindent=True)
+        responseToReturn = formattedSql
+        isFormatted = True
+      except:
+        isFormatted = False
+      if not isFormatted:
+        try:
+          formattedSql = sqlparse.format(sqlResult['result'], reindent=True)
+          responseToReturn = formattedSql
+          print("gpt didn't give parsed result. So parsing again. the formatting.")
+        except:
+          responseToReturn = str(sqlResult)
+    else:
+      responseToReturn = gptResponse
+    return responseToReturn, verboseResponse
   def getQueryForUserInputCoT(self, userInput):
     prompt = self.getPromptForCot()
     self.gptInstanceForCoT.setSystemPrompt(prompt)
     parsedSql = False
     if tryParsing:
       try:
+        txt = gptResponse.split("```json")[-1].split('```')[0].replace('\n', ' ')
         sqlResult = json.loads(txt)['finalResult']
         parsedSql = True
         tryParsing = False
+        print("parsed desired result from gpt response using method 1.")
       except:
         print("Couldn't parse desired result from gpt response using method 1.")
     if tryParsing:
         sqlResult = json.loads(gptResponse)['finalResult']
         parsedSql = True
         tryParsing = False
+        print("parsed desired result from gpt response using method 2.")
       except:
         print("Couldn't parse desired result from gpt response using method 2")
+    if tryParsing:
+      try:
+        sqlResult = json.loads(gptResponse.replace("```json","").replace("```","").replace('\n', ' '))['finalResult']
+        parsedSql = True
+        tryParsing = False
+        print("parsed desired result from gpt response using method 3.")
+      except:
+        print("Couldn't parse desired result from gpt response using method 3")
     if parsedSql:
       isFormatted = False
       try:
           },
           "subquery2": {
             "inputSubquery": ["subquery1"],
+            "description":"extracts state, category, and total sales information from a subquery named subquery1, filtering the results to include only categories with ranks up to 5 and sorting them by state and category rank.",
             "result":"SELECT state, category, total_sales
 FROM ranked_categories
 WHERE category_rank <= 5
     promptColumnsInfo = self.getSystemPromptForQuery(selectedTablesAndCols)
     prompt = f"""You are a powerful text to sql model. Your task is to return sql query which answers
+    user's input. Please follow subquery structure if the sql needs to have multiple subqueries. Your response should be in JSON format.
     ###example userInput is {egUserInput}. output is {cotSubtaskOutput}. Output should be in json format as provided. Only output should be in response, nothing else.\n\n
     tables information are {promptTableInfo}.
+     columns data are {promptColumnsInfo}.
     """
+    prompt += f"and table Relations are {TABLE_RELATIONS} "
     return prompt
     promptTableInfo = f"""You are a powerful text to sql model. Answer which tables and columns are needed
     to answer user input using sql query. and following are tables and columns info. and example user input and result query."""
     for idx, tableName in enumerate(selectedTablesAndCols.keys(), start=1):
+        promptTableInfo += f"table name {tableName} and summary is {tableSummaryDict[tableName]} "
+        promptTableInfo += f" and columns {', '.join(selectedTablesAndCols[tableName])} \n "
     promptTableInfo += "XXXX"
     #Join statements
+    promptTableInfo += f" and table Relations are {TABLE_RELATIONS} "
     return promptTableInfo
 ORDER BY chandelier_count DESC"""
     question = "top 5 customers who bought most chandeliers in nov 2023"
+    promptForQuery = f"""You are a powerful text to sql model. Answer user input with sql query. And the query needs to run on {platform}. and schemaName is {schemaName}. There is example user input and desired generated sql query. Follow similar patterns as example. eg case insensitive, explicit variable declaration etc. user input : {question}, query : {exampleQuery}. and table's data is \n """
     for idx, tableName in enumerate(prospectTablesAndCols.keys(), start=1):
+        promptForQuery += f"table name is {tableName}, table data is {self.sampleData[tableName][prospectTablesAndCols[tableName]].head(self.gptSampleRows)} \n "
+    promptForQuery += f"and table Relations are {TABLE_RELATIONS} \n "
     return promptForQuery.replace("\\"," ").replace("  "," ").replace("XXXX", "    ")