Spaces:

Express-Analytics
/

QueryHelper

Runtime error

App Files Files Community

anumaurya114exp commited on Feb 22, 2024

Commit

c4e1fde

verified ·

1 Parent(s): 53577dd

update test error handling

Browse files

Files changed (1) hide show

queryHelperManagerCoT.py +15 -54

queryHelperManagerCoT.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from gptManager import ChatgptManager
-from utils import *
 import json
 import sqlparse
-from constants import TABLE_RELATIONS
 class QueryHelperChainOfThought:
   def __init__(self, gptInstanceForCoT: ChatgptManager,
@@ -42,53 +42,13 @@ class QueryHelperChainOfThought:
     self.gptInstanceForCoT.setSystemPrompt(prompt)
     gptResponse = self.gptInstanceForCoT.getResponseForChatHistory(verboseChatHistory, userInput)
     verboseResponse = gptResponse
-    tryParsing = True
-    parsedSql = False
-    if tryParsing:
-      try:
-        txt = gptResponse.split("```json")[-1].split('```')[0].replace('\n', ' ')
-        sqlResult = json.loads(txt)['finalResult']
-        parsedSql = True
-        tryParsing = False
-        print("parsed desired result from gpt response using method 1.")
-      except:
-        print("Couldn't parse desired result from gpt response using method 1.")
-    if tryParsing:
-      try:
-        sqlResult = json.loads(gptResponse.replace("```json","").replace("```","").replace('\n', ' '))['finalResult']
-        parsedSql = True
-        tryParsing = False
-        print("parsed desired result from gpt response using method 2.")
-      except:
-        print("Couldn't parse desired result from gpt response using method 2")
-    if tryParsing:
-      try:
-        txt = gptResponse.split("```json")[-1].split('```')[0].replace('\n', ' ')
-        jsonResponse = json.loads(txt)
-        sqlResult = jsonResponse[list(jsonResponse.keys())[0]]['result']
-        parsedSql = True
-        tryParsing = False
-        print("parsed desired result from gpt response using method 3.")
-      except:
-        print("Couldn't parse desired result from gpt response using method 3.")
-    if parsedSql:
-      isFormatted = False
-      try:
-        formattedSql = sqlparse.format(sqlResult, reindent=True)
-        responseToReturn = formattedSql
-        isFormatted = True
-      except:
-        isFormatted = False
-      if not isFormatted:
-        try:
-          formattedSql = sqlparse.format(sqlResult['result'], reindent=True)
-          responseToReturn = formattedSql
-          print("gpt didn't give parsed result. So parsing again. the formatting.")
-        except:
-          responseToReturn = str(sqlResult)
     else:
-      responseToReturn = gptResponse
-    return responseToReturn, verboseResponse
   def getQueryForUserInputCoT(self, userInput):
@@ -144,7 +104,7 @@ class QueryHelperChainOfThought:
   def getPromptForCot(self):
     schemaName = self.schemaName
     platform = self.platform
-    tableSummaryDict = json.load(open(self.tableSummaryJson, 'r'))
     selectedTablesAndCols = self.metadataLayout.getSelectedTablesAndCols()
     egUserInput = "I want to get top 5 product categories by state, then rank categories on decreasing order of total sales"
@@ -237,6 +197,8 @@ ORDER BY state, category_rank"
     prompt = f"""You are a powerful text to sql model. Your task is to return sql query which answers
     user's input. Please follow subquery structure if the sql needs to have multiple subqueries. Your response should be in JSON format.
     ###example userInput is {egUserInput}. output is {cotSubtaskOutput}. Output should be in json format as provided. Only output should be in response, nothing else.\n\n
     tables information are {promptTableInfo}.
      columns data are {promptColumnsInfo}.
@@ -254,7 +216,7 @@ ORDER BY state, category_rank"
   def getSystemPromptForTableCols(self):
     schemaName = self.schemaName
     platform = self.platform
-    tableSummaryDict = json.load(open(self.tableSummaryJson, 'r'))
     selectedTablesAndCols = self.metadataLayout.getSelectedTablesAndCols()
     promptTableInfo = f"""You are a powerful text to sql model. Answer which tables and columns are needed
@@ -271,7 +233,7 @@ ORDER BY state, category_rank"
   def getSystemPromptForQuery(self, prospectTablesAndCols):
     schemaName = self.schemaName
     platform = self.platform
-    tableSummaryDict = json.load(open(self.tableSummaryJson,'r'))
     exampleQuery = """SELECT a.customer_id, COUNT(a.product_id) as chandelier_count
 FROM lpdatamart.tbl_f_sales a
 JOIN lpdatamart.tbl_d_product b ON a.product_id = b.product_id
@@ -285,5 +247,4 @@ ORDER BY chandelier_count DESC"""
     for idx, tableName in enumerate(prospectTablesAndCols.keys(), start=1):
         promptForQuery += f"table name is {tableName}, table data is {self.sampleData[tableName][prospectTablesAndCols[tableName]].head(self.gptSampleRows)} \n "
     promptForQuery += f"and table Relations are {TABLE_RELATIONS} \n "
-    return promptForQuery.replace("\\"," ").replace("  "," ").replace("XXXX", "    ")

+from .gptManager import ChatgptManager
+from .utils import *
 import json
 import sqlparse
+from .constants import TABLE_RELATIONS
 class QueryHelperChainOfThought:
   def __init__(self, gptInstanceForCoT: ChatgptManager,
     self.gptInstanceForCoT.setSystemPrompt(prompt)
     gptResponse = self.gptInstanceForCoT.getResponseForChatHistory(verboseChatHistory, userInput)
     verboseResponse = gptResponse
+    query, jsonResponse = getQueryFromGptResponse(gptResponse=gptResponse)
+    if query!=gptResponse:
+      finalQuery = construct_final_query(query, jsonResponse)
     else:
+      finalQuery = query
+    return finalQuery, verboseResponse
   def getQueryForUserInputCoT(self, userInput):
   def getPromptForCot(self):
     schemaName = self.schemaName
     platform = self.platform
+    tableSummaryDict = json.load(open(r"./core/queryHelper/tableSummaryDict.json", 'r'))
     selectedTablesAndCols = self.metadataLayout.getSelectedTablesAndCols()
     egUserInput = "I want to get top 5 product categories by state, then rank categories on decreasing order of total sales"
     prompt = f"""You are a powerful text to sql model. Your task is to return sql query which answers
     user's input. Please follow subquery structure if the sql needs to have multiple subqueries. Your response should be in JSON format.
+    Answer user input with sql query. And the query needs to run on {platform}. and schemaName is {schemaName}.
+    And use columns and tables provided, in case, you need additional column information, please ask the user.
     ###example userInput is {egUserInput}. output is {cotSubtaskOutput}. Output should be in json format as provided. Only output should be in response, nothing else.\n\n
     tables information are {promptTableInfo}.
      columns data are {promptColumnsInfo}.
   def getSystemPromptForTableCols(self):
     schemaName = self.schemaName
     platform = self.platform
+    tableSummaryDict = json.load(open(r"./core/queryHelper/tableSummaryDict.json", 'r'))
     selectedTablesAndCols = self.metadataLayout.getSelectedTablesAndCols()
     promptTableInfo = f"""You are a powerful text to sql model. Answer which tables and columns are needed
   def getSystemPromptForQuery(self, prospectTablesAndCols):
     schemaName = self.schemaName
     platform = self.platform
+    tableSummaryDict = json.load(open(r"./core/queryHelper/tableSummaryDict.json",'r'))
     exampleQuery = """SELECT a.customer_id, COUNT(a.product_id) as chandelier_count
 FROM lpdatamart.tbl_f_sales a
 JOIN lpdatamart.tbl_d_product b ON a.product_id = b.product_id
     for idx, tableName in enumerate(prospectTablesAndCols.keys(), start=1):
         promptForQuery += f"table name is {tableName}, table data is {self.sampleData[tableName][prospectTablesAndCols[tableName]].head(self.gptSampleRows)} \n "
     promptForQuery += f"and table Relations are {TABLE_RELATIONS} \n "
+    return promptForQuery.replace("\\"," ").replace("  "," ").replace("XXXX", "    ")