Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,9 @@ from dotenv import load_dotenv
|
|
| 12 |
load_dotenv()
|
| 13 |
userdata = os.environ
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
def chat_with_groq(client:groq.Groq,
|
| 17 |
prompt:str,
|
|
@@ -46,29 +49,19 @@ def chat_with_groq(client:groq.Groq,
|
|
| 46 |
# logger.info(f"Completion: {completion}")
|
| 47 |
return completion.choices[0].message.content
|
| 48 |
|
| 49 |
-
def execute_duckdb_query(query:str)->pd.DataFrame:
|
| 50 |
-
"""
|
| 51 |
-
Execute a DuckDB query and return the result as a pandas DataFrame.
|
| 52 |
-
|
| 53 |
-
Args:
|
| 54 |
-
query (str): The DuckDB query to execute.
|
| 55 |
-
|
| 56 |
-
Returns:
|
| 57 |
-
pd.DataFrame: The result of the query as a pandas DataFrame.
|
| 58 |
-
"""
|
| 59 |
-
original_cwd = os.getcwd()
|
| 60 |
-
print(f"PATH:{original_cwd}")
|
| 61 |
-
os.chdir('data')
|
| 62 |
-
print(f"PATH:{os.getcwd()}")
|
| 63 |
-
|
| 64 |
try:
|
| 65 |
conn = duckdb.connect(database=":memory:", read_only=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
query_result = conn.execute(query).fetch_df().reset_index()
|
| 67 |
-
os.chdir(original_cwd)
|
| 68 |
return query_result
|
| 69 |
except Exception as e:
|
| 70 |
-
print(f"Error: {e}")
|
| 71 |
-
os.chdir(original_cwd)
|
| 72 |
raise e
|
| 73 |
def get_summarization(client:groq.Groq,
|
| 74 |
use_question:str,
|
|
@@ -258,10 +251,6 @@ base_prompt = """
|
|
| 258 |
* Ensure that the entire output is returned on only one single line
|
| 259 |
* Keep your query as simple and straightforward as possible; do not use subqueries
|
| 260 |
"""
|
| 261 |
-
table_description = """"""
|
| 262 |
-
tables_string = """"""
|
| 263 |
-
table_1 = """"""
|
| 264 |
-
table_1_wt_xt = """"""
|
| 265 |
user_question = """"""
|
| 266 |
|
| 267 |
# And some rules for querying the dataset:
|
|
@@ -272,38 +261,37 @@ user_question = """"""
|
|
| 272 |
|
| 273 |
|
| 274 |
def upload_file(files) -> List[str]:
|
| 275 |
-
# will have to change to the private system is initiializes
|
| 276 |
model = "llama3-8b-8192"
|
| 277 |
-
api_key:str=userdata.get("GROQ_API_KEY")
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
if type(files) == str:
|
| 281 |
files = [files]
|
|
|
|
| 282 |
stored_paths = []
|
| 283 |
stored_table_descriptions = []
|
| 284 |
tables = []
|
|
|
|
| 285 |
for file in files:
|
| 286 |
filename = Path(file.name).name
|
| 287 |
-
path =
|
| 288 |
|
| 289 |
# Copy the content of the temporary file to our destination
|
| 290 |
-
|
| 291 |
-
destination.write(source.read())
|
| 292 |
|
| 293 |
-
stored_paths.append(str(path
|
| 294 |
-
table_description = identify_column_datatypes_to_SQL_DEF(pd.read_csv(path),api_key,model)
|
| 295 |
-
desc = "Table:
|
| 296 |
stored_table_descriptions.append(desc)
|
| 297 |
tables.append(filename)
|
| 298 |
-
|
|
|
|
|
|
|
|
|
|
| 299 |
tables_string = join_with_and(tables)
|
| 300 |
-
|
| 301 |
-
table_1_wt_xt =
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
table_1 = tables[0]
|
| 305 |
-
table_1_wt_xt = table_1_wt_xt
|
| 306 |
-
return final
|
| 307 |
|
| 308 |
def user_prompt_sanitization(user_prompt:str)->str:
|
| 309 |
guide = """
|
|
@@ -388,6 +376,7 @@ with gr.Blocks() as demo:
|
|
| 388 |
upload_output = gr.Textbox(label="Upload Status", lines=5)
|
| 389 |
|
| 390 |
upload_button.click(upload_file, inputs=file_output, outputs=upload_output)
|
|
|
|
| 391 |
with gr.Tab("Query Interface"):
|
| 392 |
chatbot = gr.Chatbot()
|
| 393 |
with gr.Row():
|
|
@@ -395,8 +384,6 @@ with gr.Blocks() as demo:
|
|
| 395 |
submit_button = gr.Button("Submit")
|
| 396 |
submit_button.click(queryModel, inputs=[user_input], outputs=chatbot)
|
| 397 |
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
demo.launch(share=True)
|
| 401 |
|
| 402 |
|
|
|
|
| 12 |
load_dotenv()
|
| 13 |
userdata = os.environ
|
| 14 |
|
| 15 |
+
DATA_DIR = Path(os.getcwd()) / "data"
|
| 16 |
+
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
| 17 |
+
|
| 18 |
|
| 19 |
def chat_with_groq(client:groq.Groq,
|
| 20 |
prompt:str,
|
|
|
|
| 49 |
# logger.info(f"Completion: {completion}")
|
| 50 |
return completion.choices[0].message.content
|
| 51 |
|
| 52 |
+
def execute_duckdb_query(query: str) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
try:
|
| 54 |
conn = duckdb.connect(database=":memory:", read_only=False)
|
| 55 |
+
|
| 56 |
+
# Load all CSV files from the data directory
|
| 57 |
+
for csv_file in DATA_DIR.glob("*.csv"):
|
| 58 |
+
table_name = csv_file.stem
|
| 59 |
+
conn.execute(f"CREATE TABLE {table_name} AS SELECT * FROM read_csv_auto('{csv_file}')")
|
| 60 |
+
|
| 61 |
query_result = conn.execute(query).fetch_df().reset_index()
|
|
|
|
| 62 |
return query_result
|
| 63 |
except Exception as e:
|
| 64 |
+
print(f"Error executing query: {e}")
|
|
|
|
| 65 |
raise e
|
| 66 |
def get_summarization(client:groq.Groq,
|
| 67 |
use_question:str,
|
|
|
|
| 251 |
* Ensure that the entire output is returned on only one single line
|
| 252 |
* Keep your query as simple and straightforward as possible; do not use subqueries
|
| 253 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
user_question = """"""
|
| 255 |
|
| 256 |
# And some rules for querying the dataset:
|
|
|
|
| 261 |
|
| 262 |
|
| 263 |
def upload_file(files) -> List[str]:
|
|
|
|
| 264 |
model = "llama3-8b-8192"
|
| 265 |
+
api_key: str = userdata.get("GROQ_API_KEY")
|
| 266 |
+
|
| 267 |
+
if isinstance(files, str):
|
|
|
|
| 268 |
files = [files]
|
| 269 |
+
|
| 270 |
stored_paths = []
|
| 271 |
stored_table_descriptions = []
|
| 272 |
tables = []
|
| 273 |
+
|
| 274 |
for file in files:
|
| 275 |
filename = Path(file.name).name
|
| 276 |
+
path = DATA_DIR / filename
|
| 277 |
|
| 278 |
# Copy the content of the temporary file to our destination
|
| 279 |
+
shutil.copy2(file.name, path)
|
|
|
|
| 280 |
|
| 281 |
+
stored_paths.append(str(path))
|
| 282 |
+
table_description = identify_column_datatypes_to_SQL_DEF(pd.read_csv(path), api_key, model)
|
| 283 |
+
desc = f"Table: {filename}\nColumns:\n{table_description}"
|
| 284 |
stored_table_descriptions.append(desc)
|
| 285 |
tables.append(filename)
|
| 286 |
+
|
| 287 |
+
# Update global variables
|
| 288 |
+
global table_description, tables_string, table_1, table_1_wt_xt
|
| 289 |
+
table_description = "\n".join(stored_table_descriptions)
|
| 290 |
tables_string = join_with_and(tables)
|
| 291 |
+
table_1 = tables[0] if tables else ""
|
| 292 |
+
table_1_wt_xt = table_1.split('.')[0] if table_1 else ""
|
| 293 |
+
|
| 294 |
+
return "\n".join(stored_table_descriptions)
|
|
|
|
|
|
|
|
|
|
| 295 |
|
| 296 |
def user_prompt_sanitization(user_prompt:str)->str:
|
| 297 |
guide = """
|
|
|
|
| 376 |
upload_output = gr.Textbox(label="Upload Status", lines=5)
|
| 377 |
|
| 378 |
upload_button.click(upload_file, inputs=file_output, outputs=upload_output)
|
| 379 |
+
|
| 380 |
with gr.Tab("Query Interface"):
|
| 381 |
chatbot = gr.Chatbot()
|
| 382 |
with gr.Row():
|
|
|
|
| 384 |
submit_button = gr.Button("Submit")
|
| 385 |
submit_button.click(queryModel, inputs=[user_input], outputs=chatbot)
|
| 386 |
|
| 387 |
+
demo.launch()
|
|
|
|
|
|
|
| 388 |
|
| 389 |
|