kkhushisaid commited on
Commit
403fc41
·
verified ·
1 Parent(s): 4a487b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -4
app.py CHANGED
@@ -6,6 +6,33 @@ from sklearn.metrics.pairwise import cosine_similarity
6
  from groq import Groq
7
  import pandas as pd
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  load_dotenv()
10
 
11
  groq_api_key = os.getenv("groq_api_key")
@@ -83,7 +110,5 @@ def response(user_query, dataset_folder):
83
  system_prompt = create_prompt(user_query, table_metadata)
84
  return generate_sql_query(system_prompt)
85
 
86
- # Example usage:
87
- dataset_folder = r"C:\\Users\\khuma\\data"
88
- user_query = "Show me the top 10 startups with the highest funding."
89
- print(response(user_query, dataset_folder))
 
6
  from groq import Groq
7
  import pandas as pd
8
 
9
+
10
+ # Use the current directory for Hugging Face Spaces
11
+ dataset_folder = "./data" # Assuming files are in a 'data/' folder
12
+
13
+ # Verify the folder exists
14
+ if not os.path.exists(dataset_folder):
15
+ dataset_folder = "." # Fallback: Look in the current directory
16
+
17
+ # Load all CSV files in the dataset folder
18
+ dataframes = []
19
+ for file in os.listdir(dataset_folder):
20
+ if file.endswith(".csv"): # Check if the file is a CSV
21
+ df = pd.read_csv(os.path.join(dataset_folder, file))
22
+ dataframes.append(df)
23
+
24
+ # Merge all CSV files into one DataFrame
25
+ full_data = pd.concat(dataframes, ignore_index=True)
26
+
27
+ # Ensure required columns exist
28
+ assert 'question' in full_data.columns, "Dataset must have a 'question' column"
29
+ assert 'query' in full_data.columns, "Dataset must have a 'query' column"
30
+
31
+ print(full_data.head()) # Debugging: Check if data loads correctly
32
+
33
+
34
+
35
+
36
  load_dotenv()
37
 
38
  groq_api_key = os.getenv("groq_api_key")
 
110
  system_prompt = create_prompt(user_query, table_metadata)
111
  return generate_sql_query(system_prompt)
112
 
113
+ dataset_folder = "./data" # Change this based on where your files are uploaded
114
+ user_query = "Show me the top 10 startups with the highest funding."