Improve query functions (#41)
Browse files- Improve query functions (6348bd4d9c9ac20c6836aebbd62742baea683030)
- functions/query_functions.py +29 -10
- templates/sql_db.py +1 -1
functions/query_functions.py
CHANGED
|
@@ -30,9 +30,10 @@ class SQLiteQuery:
|
|
| 30 |
for query in queries:
|
| 31 |
result = pd.read_sql(query, self.connection)
|
| 32 |
result.to_csv(f'{dir_path}/file_upload/query.csv', index=False)
|
|
|
|
| 33 |
results.append(f"{result}")
|
| 34 |
self.connection.close()
|
| 35 |
-
return {"results": results, "queries": queries}
|
| 36 |
|
| 37 |
|
| 38 |
|
|
@@ -43,7 +44,9 @@ def sqlite_query_func(queries: List[str], session_hash, **kwargs):
|
|
| 43 |
result = sql_query.run(queries, session_hash)
|
| 44 |
if len(result["results"][0]) > 1000:
|
| 45 |
print("QUERY TOO LARGE")
|
| 46 |
-
return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file.
|
|
|
|
|
|
|
| 47 |
else:
|
| 48 |
return {"reply": result["results"][0]}
|
| 49 |
|
|
@@ -75,9 +78,10 @@ class PostgreSQLQuery:
|
|
| 75 |
print(query)
|
| 76 |
result = pd.read_sql_query(query, self.connection)
|
| 77 |
result.to_csv(f'{dir_path}/sql/query.csv', index=False)
|
|
|
|
| 78 |
results.append(f"{result}")
|
| 79 |
self.connection.close()
|
| 80 |
-
return {"results": results, "queries": queries}
|
| 81 |
|
| 82 |
|
| 83 |
|
|
@@ -89,7 +93,9 @@ def sql_query_func(queries: List[str], session_hash, args, **kwargs):
|
|
| 89 |
print(result)
|
| 90 |
if len(result["results"][0]) > 1000:
|
| 91 |
print("QUERY TOO LARGE")
|
| 92 |
-
return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file.
|
|
|
|
|
|
|
| 93 |
else:
|
| 94 |
return {"reply": result["results"][0]}
|
| 95 |
|
|
@@ -143,10 +149,11 @@ class DocDBQuery:
|
|
| 143 |
docs = collection.aggregate_pandas_all(query_list)
|
| 144 |
print("DATA FRAME COMPLETE")
|
| 145 |
docs.to_csv(f'{dir_path}/doc_db/query.csv', index=False)
|
|
|
|
| 146 |
print("CSV COMPLETE")
|
| 147 |
results.append(f"{docs}")
|
| 148 |
self.client.close()
|
| 149 |
-
return {"results": results, "queries": aggregation_pipeline}
|
| 150 |
|
| 151 |
|
| 152 |
|
|
@@ -157,7 +164,9 @@ def doc_db_query_func(aggregation_pipeline: List[str], db_collection: AnyStr, se
|
|
| 157 |
print("RESULT")
|
| 158 |
if len(result["results"][0]) > 1000:
|
| 159 |
print("QUERY TOO LARGE")
|
| 160 |
-
return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file.
|
|
|
|
|
|
|
| 161 |
else:
|
| 162 |
return {"reply": result["results"][0]}
|
| 163 |
|
|
@@ -200,9 +209,10 @@ class GraphQLQuery:
|
|
| 200 |
#print(response_frame)
|
| 201 |
|
| 202 |
response_frame.to_csv(f'{dir_path}/graphql/query.csv', index=False)
|
|
|
|
| 203 |
print("CSV COMPLETE")
|
| 204 |
results.append(f"{response_frame}")
|
| 205 |
-
return {"results": results, "queries": graphql_query}
|
| 206 |
|
| 207 |
|
| 208 |
|
|
@@ -213,7 +223,9 @@ def graphql_query_func(graphql_query: AnyStr, session_hash, args, **kwargs):
|
|
| 213 |
print("RESULT")
|
| 214 |
if len(result["results"][0]) > 1000:
|
| 215 |
print("QUERY TOO LARGE")
|
| 216 |
-
return {"reply": "query result too large to be processed by llm, the query results are in our query.csv file.
|
|
|
|
|
|
|
| 217 |
else:
|
| 218 |
return {"reply": result["results"][0]}
|
| 219 |
|
|
@@ -256,9 +268,16 @@ def graphql_csv_query(csv_query: AnyStr, session_hash, **kwargs):
|
|
| 256 |
print("GRAPHQL CSV QUERY")
|
| 257 |
queried_df = sqldf(csv_query, locals())
|
| 258 |
print(queried_df)
|
|
|
|
| 259 |
queried_df.to_csv(f'{dir_path}/graphql/query.csv', index=False)
|
| 260 |
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
except Exception as e:
|
| 264 |
reply = f"""There was an error querying our query.csv file with the query:{csv_query}
|
|
@@ -266,4 +285,4 @@ def graphql_csv_query(csv_query: AnyStr, session_hash, **kwargs):
|
|
| 266 |
You should probably try again.
|
| 267 |
"""
|
| 268 |
print(reply)
|
| 269 |
-
return {"reply": reply}
|
|
|
|
| 30 |
for query in queries:
|
| 31 |
result = pd.read_sql(query, self.connection)
|
| 32 |
result.to_csv(f'{dir_path}/file_upload/query.csv', index=False)
|
| 33 |
+
column_names = list(result.columns)
|
| 34 |
results.append(f"{result}")
|
| 35 |
self.connection.close()
|
| 36 |
+
return {"results": results, "queries": queries, "csv_columns": column_names}
|
| 37 |
|
| 38 |
|
| 39 |
|
|
|
|
| 44 |
result = sql_query.run(queries, session_hash)
|
| 45 |
if len(result["results"][0]) > 1000:
|
| 46 |
print("QUERY TOO LARGE")
|
| 47 |
+
return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
|
| 48 |
+
The column names of this query.csv file are: {result["csv_columns"]}.
|
| 49 |
+
If you need to display the results directly, perhaps use the table_generation_func function."""}
|
| 50 |
else:
|
| 51 |
return {"reply": result["results"][0]}
|
| 52 |
|
|
|
|
| 78 |
print(query)
|
| 79 |
result = pd.read_sql_query(query, self.connection)
|
| 80 |
result.to_csv(f'{dir_path}/sql/query.csv', index=False)
|
| 81 |
+
column_names = list(result.columns)
|
| 82 |
results.append(f"{result}")
|
| 83 |
self.connection.close()
|
| 84 |
+
return {"results": results, "queries": queries, "csv_columns": column_names}
|
| 85 |
|
| 86 |
|
| 87 |
|
|
|
|
| 93 |
print(result)
|
| 94 |
if len(result["results"][0]) > 1000:
|
| 95 |
print("QUERY TOO LARGE")
|
| 96 |
+
return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
|
| 97 |
+
The column names of this query.csv file are: {result["csv_columns"]}.
|
| 98 |
+
If you need to display the results directly, perhaps use the table_generation_func function."""}
|
| 99 |
else:
|
| 100 |
return {"reply": result["results"][0]}
|
| 101 |
|
|
|
|
| 149 |
docs = collection.aggregate_pandas_all(query_list)
|
| 150 |
print("DATA FRAME COMPLETE")
|
| 151 |
docs.to_csv(f'{dir_path}/doc_db/query.csv', index=False)
|
| 152 |
+
column_names = list(docs.columns)
|
| 153 |
print("CSV COMPLETE")
|
| 154 |
results.append(f"{docs}")
|
| 155 |
self.client.close()
|
| 156 |
+
return {"results": results, "queries": aggregation_pipeline, "csv_columns": column_names}
|
| 157 |
|
| 158 |
|
| 159 |
|
|
|
|
| 164 |
print("RESULT")
|
| 165 |
if len(result["results"][0]) > 1000:
|
| 166 |
print("QUERY TOO LARGE")
|
| 167 |
+
return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
|
| 168 |
+
The column names of this query.csv file are: {result["csv_columns"]}.
|
| 169 |
+
If you need to display the results directly, perhaps use the table_generation_func function."""}
|
| 170 |
else:
|
| 171 |
return {"reply": result["results"][0]}
|
| 172 |
|
|
|
|
| 209 |
#print(response_frame)
|
| 210 |
|
| 211 |
response_frame.to_csv(f'{dir_path}/graphql/query.csv', index=False)
|
| 212 |
+
column_names = list(response_frame.columns)
|
| 213 |
print("CSV COMPLETE")
|
| 214 |
results.append(f"{response_frame}")
|
| 215 |
+
return {"results": results, "queries": graphql_query, "csv_columns": column_names}
|
| 216 |
|
| 217 |
|
| 218 |
|
|
|
|
| 223 |
print("RESULT")
|
| 224 |
if len(result["results"][0]) > 1000:
|
| 225 |
print("QUERY TOO LARGE")
|
| 226 |
+
return {"reply": f"""query result too large to be processed by llm, the query results are in our query.csv file.
|
| 227 |
+
The column names of this query.csv file are: {result["csv_columns"]}.
|
| 228 |
+
If you need to display the results directly, perhaps use the table_generation_func function."""}
|
| 229 |
else:
|
| 230 |
return {"reply": result["results"][0]}
|
| 231 |
|
|
|
|
| 268 |
print("GRAPHQL CSV QUERY")
|
| 269 |
queried_df = sqldf(csv_query, locals())
|
| 270 |
print(queried_df)
|
| 271 |
+
column_names = list(queried_df.columns)
|
| 272 |
queried_df.to_csv(f'{dir_path}/graphql/query.csv', index=False)
|
| 273 |
|
| 274 |
+
if len(queried_df) > 1000:
|
| 275 |
+
print("CSV QUERY TOO LARGE")
|
| 276 |
+
return {"reply": f"""The new query results are in our query.csv file.
|
| 277 |
+
The column names of this query.csv file are: {column_names}.
|
| 278 |
+
If you need to display the results directly, perhaps use the table_generation_func function."""}
|
| 279 |
+
else:
|
| 280 |
+
return {"reply": str(queried_df)}
|
| 281 |
|
| 282 |
except Exception as e:
|
| 283 |
reply = f"""There was an error querying our query.csv file with the query:{csv_query}
|
|
|
|
| 285 |
You should probably try again.
|
| 286 |
"""
|
| 287 |
print(reply)
|
| 288 |
+
return {"reply": reply}
|
templates/sql_db.py
CHANGED
|
@@ -49,7 +49,7 @@ with gr.Blocks() as demo:
|
|
| 49 |
["Describe the dataset"],
|
| 50 |
["What is the total revenue generated by each store?"],
|
| 51 |
["Can you generate and display a bar chart of film category to number of films in that category?"],
|
| 52 |
-
["Can you generate a pie chart showing the top 10 most rented films by revenue
|
| 53 |
["Can you generate a line chart of rental revenue over time?"],
|
| 54 |
["What is the relationship between film length and rental frequency?"]
|
| 55 |
]
|
|
|
|
| 49 |
["Describe the dataset"],
|
| 50 |
["What is the total revenue generated by each store?"],
|
| 51 |
["Can you generate and display a bar chart of film category to number of films in that category?"],
|
| 52 |
+
["Can you generate a pie chart showing the top 10 most rented films by revenue?"],
|
| 53 |
["Can you generate a line chart of rental revenue over time?"],
|
| 54 |
["What is the relationship between film length and rental frequency?"]
|
| 55 |
]
|