Spaces:

Express-Analytics
/

QueryHelper

Runtime error

App Files Files Community

anumaurya114exp commited on Feb 22, 2024

Commit

cd72a4f

verified ·

1 Parent(s): c4e1fde

error handling of testing

Browse files

Files changed (1) hide show

utils.py +105 -2

utils.py CHANGED Viewed

@@ -5,6 +5,8 @@ from persistStorage import retrieveTablesDataFromLocalDb, saveTablesDataToLocalD
 from config import SCHEMA_INFO_FILE_PATH
 import os
 import pickle
 class DataWrapper:
   def __init__(self, data):
@@ -28,7 +30,10 @@ class MetaDataLayout:
         "selectedTables":{},
         "allTables":allTablesAndCols
     }
   def setSelection(self, tablesAndCols):
     """
     tablesAndCols : {"table1":["col1", "col2"], "table1":["cola","colb"]}
@@ -185,4 +190,102 @@ def preProcessGptQueryReponse(gptResponse, metadataLayout: MetaDataLayout):
    schemaName = metadataLayout.schemaName
    tablesList = metadataLayout.getAllTablesCols().keys()
    gptResponse = addSchemaToTableInSQL(gptResponse, schemaName=schemaName, tablesList=tablesList)
-   return gptResponse

 from config import SCHEMA_INFO_FILE_PATH
 import os
 import pickle
+import sqlparse
+import json
 class DataWrapper:
   def __init__(self, data):
         "selectedTables":{},
         "allTables":allTablesAndCols
     }
+  def getDataLayout(self):
+    return self.datalayout
   def setSelection(self, tablesAndCols):
     """
     tablesAndCols : {"table1":["col1", "col2"], "table1":["cola","colb"]}
    schemaName = metadataLayout.schemaName
    tablesList = metadataLayout.getAllTablesCols().keys()
    gptResponse = addSchemaToTableInSQL(gptResponse, schemaName=schemaName, tablesList=tablesList)
+   return gptResponse
+def remove_with_as(sql_query):
+    pattern = r'WITH\s+.*?AS\s*\((?:[^()]|\((?:[^()]+|\([^()]*\))*\))*\)\s*,?'
+    cte_pattern = re.compile(pattern, re.IGNORECASE | re.DOTALL)
+    sql_query = cte_pattern.sub('', sql_query)
+    pattern = r'WITH\s+.*?AS\s*\((?:[^()]|\((?:[^()]+|\([^()]*\))*\))*\)\s*'
+    cte_pattern = re.compile(pattern, re.IGNORECASE | re.DOTALL)
+    sql_query = cte_pattern.sub('', sql_query)
+    pattern = r'subquery\d+\s+AS\s*\((?:[^()]|\((?:[^()]+|\([^()]*\))*\))*\)\s*'
+    cte_pattern = re.compile(pattern, re.IGNORECASE | re.DOTALL)
+    sql_query = cte_pattern.sub('', sql_query)
+    return sql_query
+def construct_with_stats(final_query, subquery_info):
+    with_as_statements = []
+    for subquery_name, subquery in subquery_info.items():
+        with_as_statement = f"{subquery_name} AS (\n{subquery['result']}\n)"
+        with_as_statements.append(with_as_statement)
+    with_as_statement = ",\n".join(with_as_statements)
+    final_query_with_with_as = f"WITH {with_as_statement}\n{final_query}"
+    return final_query_with_with_as
+def get_keys_matching_pattern(dictionary, pattern):
+    return [key for key in dictionary.keys() if re.match(pattern, key)]
+def get_subquery_info(json_response):
+    subquery_keys = get_keys_matching_pattern(json_response, r'subquery\d+')
+    return {key:json_response[key] for key in subquery_keys}
+def construct_final_query(query, json_response):
+    query = remove_with_as(query)
+    subquery_info = get_subquery_info(json_response)
+    final_query_with_with_as = construct_with_stats(query, subquery_info)
+    final_query_with_with_as = final_query_with_with_as.replace('\n\n','\n')
+    final_query_with_with_as = sqlparse.format(final_query_with_with_as, reindent=True)
+    return final_query_with_with_as
+def getQueryFromGptResponse(gptResponse):
+  tryParsing = True
+  parsedSql = False
+  if tryParsing:
+    try:
+      txt = gptResponse.split("```json")[-1].split('```')[0].replace('\n', ' ')
+      jsonResponse = json.loads(txt)
+      sqlResult = jsonResponse['finalResult']
+      parsedSql = True
+      tryParsing = False
+      print("parsed desired result from gpt response using method 1.")
+    except:
+      print("Couldn't parse desired result from gpt response using method 1.")
+  if tryParsing:
+    try:
+      jsonResponse = json.loads(gptResponse.replace("```json","").replace("```","").replace('\n', ' '))
+      sqlResult = jsonResponse['finalResult']
+      parsedSql = True
+      tryParsing = False
+      print("parsed desired result from gpt response using method 2.")
+    except:
+      print("Couldn't parse desired result from gpt response using method 2")
+  if tryParsing:
+    try:
+      txt = gptResponse.split("```json")[-1].split('```')[0].replace('\n', ' ')
+      jsonResponse = json.loads(txt)
+      sqlResult = jsonResponse[list(jsonResponse.keys())[0]]['result']
+      parsedSql = True
+      tryParsing = False
+      print("parsed desired result from gpt response using method 3.")
+    except:
+      print("Couldn't parse desired result from gpt response using method 3.")
+  if parsedSql:
+    isFormatted = False
+    try:
+      formattedSql = sqlparse.format(sqlResult, reindent=True)
+      responseToReturn = formattedSql
+      isFormatted = True
+    except:
+      isFormatted = False
+    if not isFormatted:
+      try:
+        formattedSql = sqlparse.format(sqlResult['result'], reindent=True)
+        responseToReturn = formattedSql
+        print("gpt didn't give parsed result. So parsing again. the formatting.")
+      except:
+        responseToReturn = str(sqlResult)
+  else:
+    responseToReturn = gptResponse
+    jsonResponse = {}
+  return responseToReturn, jsonResponse