Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,33 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
| 6 |
from groq import Groq
|
| 7 |
import pandas as pd
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
load_dotenv()
|
| 10 |
|
| 11 |
groq_api_key = os.getenv("groq_api_key")
|
|
@@ -83,7 +110,5 @@ def response(user_query, dataset_folder):
|
|
| 83 |
system_prompt = create_prompt(user_query, table_metadata)
|
| 84 |
return generate_sql_query(system_prompt)
|
| 85 |
|
| 86 |
-
#
|
| 87 |
-
|
| 88 |
-
user_query = "Show me the top 10 startups with the highest funding."
|
| 89 |
-
print(response(user_query, dataset_folder))
|
|
|
|
| 6 |
from groq import Groq
|
| 7 |
import pandas as pd
|
| 8 |
|
| 9 |
+
|
| 10 |
+
# Use the current directory for Hugging Face Spaces
|
| 11 |
+
dataset_folder = "./data" # Assuming files are in a 'data/' folder
|
| 12 |
+
|
| 13 |
+
# Verify the folder exists
|
| 14 |
+
if not os.path.exists(dataset_folder):
|
| 15 |
+
dataset_folder = "." # Fallback: Look in the current directory
|
| 16 |
+
|
| 17 |
+
# Load all CSV files in the dataset folder
|
| 18 |
+
dataframes = []
|
| 19 |
+
for file in os.listdir(dataset_folder):
|
| 20 |
+
if file.endswith(".csv"): # Check if the file is a CSV
|
| 21 |
+
df = pd.read_csv(os.path.join(dataset_folder, file))
|
| 22 |
+
dataframes.append(df)
|
| 23 |
+
|
| 24 |
+
# Merge all CSV files into one DataFrame
|
| 25 |
+
full_data = pd.concat(dataframes, ignore_index=True)
|
| 26 |
+
|
| 27 |
+
# Ensure required columns exist
|
| 28 |
+
assert 'question' in full_data.columns, "Dataset must have a 'question' column"
|
| 29 |
+
assert 'query' in full_data.columns, "Dataset must have a 'query' column"
|
| 30 |
+
|
| 31 |
+
print(full_data.head()) # Debugging: Check if data loads correctly
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
| 36 |
load_dotenv()
|
| 37 |
|
| 38 |
groq_api_key = os.getenv("groq_api_key")
|
|
|
|
| 110 |
system_prompt = create_prompt(user_query, table_metadata)
|
| 111 |
return generate_sql_query(system_prompt)
|
| 112 |
|
| 113 |
+
dataset_folder = "./data" # Change this based on where your files are uploaded
|
| 114 |
+
user_query = "Show me the top 10 startups with the highest funding."
|
|
|
|
|
|