Spaces:

nolanzandi
/

virtual-data-analyst

Running

App Files Files Community

nolanzandi commited on May 12, 2025

Commit

80b9b18

verified ·

1 Parent(s): e448d98

Improve query functions (#41)

Browse files

- Improve query functions (6348bd4d9c9ac20c6836aebbd62742baea683030)

Files changed (2) hide show

functions/query_functions.py +29 -10
templates/sql_db.py +1 -1

functions/query_functions.py CHANGED Viewed

@@ -30,9 +30,10 @@ class SQLiteQuery:
         for query in queries:
           result = pd.read_sql(query, self.connection)
           result.to_csv(f'{dir_path}/file_upload/query.csv', index=False)
           results.append(f"{result}")
         self.connection.close()
-        return {"results": results, "queries": queries}
@@ -43,7 +44,9 @@ def sqlite_query_func(queries: List[str], session_hash, **kwargs):
       result = sql_query.run(queries, session_hash)
       if len(result["results"][0]) > 1000:
         print("QUERY TOO LARGE")
-        return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file. If you need to display the results directly, perhaps use the table_generation_func function."}
       else:
         return {"reply": result["results"][0]}
@@ -75,9 +78,10 @@ class PostgreSQLQuery:
           print(query)
           result = pd.read_sql_query(query, self.connection)
           result.to_csv(f'{dir_path}/sql/query.csv', index=False)
           results.append(f"{result}")
         self.connection.close()
-        return {"results": results, "queries": queries}
@@ -89,7 +93,9 @@ def sql_query_func(queries: List[str], session_hash, args, **kwargs):
       print(result)
       if len(result["results"][0]) > 1000:
         print("QUERY TOO LARGE")
-        return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file. If you need to display the results directly, perhaps use the table_generation_func function."}
       else:
         return {"reply": result["results"][0]}
@@ -143,10 +149,11 @@ class DocDBQuery:
         docs = collection.aggregate_pandas_all(query_list)
         print("DATA FRAME COMPLETE")
         docs.to_csv(f'{dir_path}/doc_db/query.csv', index=False)
         print("CSV COMPLETE")
         results.append(f"{docs}")
         self.client.close()
-        return {"results": results, "queries": aggregation_pipeline}
@@ -157,7 +164,9 @@ def doc_db_query_func(aggregation_pipeline: List[str], db_collection: AnyStr, se
       print("RESULT")
       if len(result["results"][0]) > 1000:
         print("QUERY TOO LARGE")
-        return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file. If you need to display the results directly, perhaps use the table_generation_func function."}
       else:
         return {"reply": result["results"][0]}
@@ -200,9 +209,10 @@ class GraphQLQuery:
           #print(response_frame)
           response_frame.to_csv(f'{dir_path}/graphql/query.csv', index=False)
           print("CSV COMPLETE")
           results.append(f"{response_frame}")
-          return {"results": results, "queries": graphql_query}
@@ -213,7 +223,9 @@ def graphql_query_func(graphql_query: AnyStr, session_hash, args, **kwargs):
       print("RESULT")
       if len(result["results"][0]) > 1000:
         print("QUERY TOO LARGE")
-        return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file. If you need to display the results directly, perhaps use the table_generation_func function."}
       else:
         return {"reply": result["results"][0]}
@@ -256,9 +268,16 @@ def graphql_csv_query(csv_query: AnyStr, session_hash, **kwargs):
       print("GRAPHQL CSV QUERY")
       queried_df = sqldf(csv_query, locals())
       print(queried_df)
       queried_df.to_csv(f'{dir_path}/graphql/query.csv', index=False)
-      return {"reply": "The new query results are in our query.csv file. If you need to display the results directly, perhaps use the table_generation_func function."}
     except Exception as e:
       reply = f"""There was an error querying our query.csv file with the query:{csv_query}
@@ -266,4 +285,4 @@ def graphql_csv_query(csv_query: AnyStr, session_hash, **kwargs):
               You should probably try again.
               """
       print(reply)
-      return {"reply": reply}

         for query in queries:
           result = pd.read_sql(query, self.connection)
           result.to_csv(f'{dir_path}/file_upload/query.csv', index=False)
+          column_names = list(result.columns)
           results.append(f"{result}")
         self.connection.close()
+        return {"results": results, "queries": queries, "csv_columns": column_names}
       result = sql_query.run(queries, session_hash)
       if len(result["results"][0]) > 1000:
         print("QUERY TOO LARGE")
+        return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
+                The column names of this query.csv file are: {result["csv_columns"]}.
+                If you need to display the results directly, perhaps use the table_generation_func function."""}
       else:
         return {"reply": result["results"][0]}
           print(query)
           result = pd.read_sql_query(query, self.connection)
           result.to_csv(f'{dir_path}/sql/query.csv', index=False)
+          column_names = list(result.columns)
           results.append(f"{result}")
         self.connection.close()
+        return {"results": results, "queries": queries, "csv_columns": column_names}
       print(result)
       if len(result["results"][0]) > 1000:
         print("QUERY TOO LARGE")
+        return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
+                The column names of this query.csv file are: {result["csv_columns"]}.
+                If you need to display the results directly, perhaps use the table_generation_func function."""}
       else:
         return {"reply": result["results"][0]}
         docs = collection.aggregate_pandas_all(query_list)
         print("DATA FRAME COMPLETE")
         docs.to_csv(f'{dir_path}/doc_db/query.csv', index=False)
+        column_names = list(docs.columns)
         print("CSV COMPLETE")
         results.append(f"{docs}")
         self.client.close()
+        return {"results": results, "queries": aggregation_pipeline, "csv_columns": column_names}
       print("RESULT")
       if len(result["results"][0]) > 1000:
         print("QUERY TOO LARGE")
+        return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
+                The column names of this query.csv file are: {result["csv_columns"]}.
+                If you need to display the results directly, perhaps use the table_generation_func function."""}
       else:
         return {"reply": result["results"][0]}
           #print(response_frame)
           response_frame.to_csv(f'{dir_path}/graphql/query.csv', index=False)
+          column_names = list(response_frame.columns)
           print("CSV COMPLETE")
           results.append(f"{response_frame}")
+          return {"results": results, "queries": graphql_query, "csv_columns": column_names}
       print("RESULT")
       if len(result["results"][0]) > 1000:
         print("QUERY TOO LARGE")
+        return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
+                The column names of this query.csv file are: {result["csv_columns"]}.
+                If you need to display the results directly, perhaps use the table_generation_func function."""}
       else:
         return {"reply": result["results"][0]}
       print("GRAPHQL CSV QUERY")
       queried_df = sqldf(csv_query, locals())
       print(queried_df)
+      column_names = list(queried_df.columns)
       queried_df.to_csv(f'{dir_path}/graphql/query.csv', index=False)
+      if len(queried_df) > 1000:
+        print("CSV QUERY TOO LARGE")
+        return {"reply": f"""The new query results are in our query.csv file.
+                The column names of this query.csv file are: {column_names}.
+                If you need to display the results directly, perhaps use the table_generation_func function."""}
+      else:
+        return {"reply": str(queried_df)}
     except Exception as e:
       reply = f"""There was an error querying our query.csv file with the query:{csv_query}
               You should probably try again.
               """
       print(reply)
+      return {"reply": reply}

templates/sql_db.py CHANGED Viewed

@@ -49,7 +49,7 @@ with gr.Blocks() as demo:
                                             ["Describe the dataset"],
                                             ["What is the total revenue generated by each store?"],
                                             ["Can you generate and display a bar chart of film category to number of films in that category?"],
-                                            ["Can you generate a pie chart showing the top 10 most rented films by revenue vs all other films?"],
                                             ["Can you generate a line chart of rental revenue over time?"],
                                             ["What is the relationship between film length and rental frequency?"]
                                         ]

                                             ["Describe the dataset"],
                                             ["What is the total revenue generated by each store?"],
                                             ["Can you generate and display a bar chart of film category to number of films in that category?"],
+                                            ["Can you generate a pie chart showing the top 10 most rented films by revenue?"],
                                             ["Can you generate a line chart of rental revenue over time?"],
                                             ["What is the relationship between film length and rental frequency?"]
                                         ]