datdevsteve commited on
Commit
ac26227
·
verified ·
1 Parent(s): 8eb1cc8

fixes for gaia submission

Browse files
Files changed (1) hide show
  1. app.py +59 -57
app.py CHANGED
@@ -2,35 +2,20 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
- from langchain.agents import create_agent
6
- from langchain_google_genai import ChatGoogleGenerativeAI
7
- # Agent implementation is moved to gaia_agent.py
8
  from gaia_agent import GAIAAgent
9
  from dotenv import load_dotenv
10
 
11
  # Load environment variables
12
- #load_dotenv()
13
 
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
  # --- Agent Setup ---
18
- openai_key = os.getenv("OPENAI_API_KEY")
19
- googleai_key = os.getenv("GOOGLE_API_KEY")
20
-
21
- # Use OpenRouter via LangChain's ChatOpenAI
22
  openrouter_key = os.getenv("OPENROUTER_API_KEY")
23
  if not openrouter_key:
24
  raise RuntimeError("Set OPENROUTER_API_KEY in your .env (OpenRouter API key)")
25
 
26
- # model is created inside gaia_agent module
27
- # (gaia_agent.py will initialize the ChatOpenAI model using OPENROUTER_API_KEY)
28
-
29
-
30
-
31
- # The tools and GAIAAgent implementation live in gaia_agent.py now. This file
32
- # imports GAIAAgent and uses it in run_and_submit_all.
33
-
34
  def run_and_submit_all(profile: gr.OAuthProfile | None):
35
  """
36
  Fetches all questions, runs the GAIAAgent on them, submits all answers,
@@ -38,18 +23,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
38
  """
39
  # --- Determine HF Space Runtime URL and Repo URL ---
40
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
41
-
42
  if profile:
43
  username = f"{profile.username}"
44
  print(f"User logged in: {username}")
45
  else:
46
  print("User not logged in.")
47
  return "Please Login to Hugging Face with the button.", None
48
-
49
  api_url = DEFAULT_API_URL
50
  questions_url = f"{api_url}/questions"
51
  submit_url = f"{api_url}/submit"
52
-
53
  # 1. Instantiate Agent
54
  try:
55
  agent = GAIAAgent()
@@ -60,16 +45,18 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
60
  # In the case of an app running as a Hugging Face space, this link points toward your codebase
61
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local"
62
  print(f"Agent code location: {agent_code}")
63
-
64
  # 2. Fetch Questions
65
  print(f"Fetching questions from: {questions_url}")
66
  try:
67
  response = requests.get(questions_url, timeout=15)
68
  response.raise_for_status()
69
  questions_data = response.json()
 
70
  if not questions_data:
71
  print("Fetched questions list is empty.")
72
  return "Fetched questions list is empty or invalid format.", None
 
73
  print(f"Fetched {len(questions_data)} questions.")
74
  except requests.exceptions.RequestException as e:
75
  print(f"Error fetching questions: {e}")
@@ -81,40 +68,52 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
81
  except Exception as e:
82
  print(f"An unexpected error occurred fetching questions: {e}")
83
  return f"An unexpected error occurred fetching questions: {e}", None
84
-
85
- # 3. Run your Agent
86
  results_log = []
87
  answers_payload = []
 
88
  print(f"Running agent on {len(questions_data)} questions...")
89
  for item in questions_data:
90
  task_id = item.get("task_id")
91
  question_text = item.get("question")
 
92
  if not task_id or question_text is None:
93
  print(f"Skipping item with missing task_id or question: {item}")
94
  continue
 
95
  try:
 
 
 
 
96
  submitted_answer = agent(question_text)
 
 
 
 
97
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
98
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
99
  except Exception as e:
100
- print(f"Error running agent on task {task_id}: {e}")
101
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
102
-
103
  if not answers_payload:
104
  print("Agent did not produce any answers to submit.")
105
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
106
-
107
- # 4. Prepare Submission
108
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
109
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
110
  print(status_update)
111
-
112
  # 5. Submit
113
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
114
  try:
115
  response = requests.post(submit_url, json=submission_data, timeout=60)
116
  response.raise_for_status()
117
  result_data = response.json()
 
118
  final_status = (
119
  f"Submission Successful!\n"
120
  f"User: {result_data.get('username')}\n"
@@ -122,9 +121,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
122
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
123
  f"Message: {result_data.get('message', 'No message received.')}"
124
  )
 
125
  print("Submission successful.")
126
  results_df = pd.DataFrame(results_log)
127
  return final_status, results_df
 
128
  except requests.exceptions.HTTPError as e:
129
  error_detail = f"Server responded with status {e.response.status_code}."
130
  try:
@@ -132,55 +133,56 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
132
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
133
  except requests.exceptions.JSONDecodeError:
134
  error_detail += f" Response: {e.response.text[:500]}"
 
135
  status_message = f"Submission Failed: {error_detail}"
136
  print(status_message)
137
  results_df = pd.DataFrame(results_log)
138
  return status_message, results_df
 
139
  except requests.exceptions.Timeout:
140
  status_message = "Submission Failed: The request timed out."
141
  print(status_message)
142
  results_df = pd.DataFrame(results_log)
143
  return status_message, results_df
 
144
  except requests.exceptions.RequestException as e:
145
  status_message = f"Submission Failed: Network error - {e}"
146
  print(status_message)
147
  results_df = pd.DataFrame(results_log)
148
  return status_message, results_df
 
149
  except Exception as e:
150
  status_message = f"An unexpected error occurred during submission: {e}"
151
  print(status_message)
152
  results_df = pd.DataFrame(results_log)
153
  return status_message, results_df
154
 
155
-
156
  # --- Build Gradio Interface using Blocks ---
157
  with gr.Blocks() as demo:
158
  gr.Markdown("# GAIA Benchmark Agent Evaluation")
159
  gr.Markdown(
160
  """
161
- **Instructions:**
162
- 1. This app integrates a LangChain agent with multiple tools (calculator, Wikipedia, web search, Arxiv).
163
- 2. Log in to your Hugging Face account using the button below.
164
- 3. Click 'Run Evaluation & Submit All Answers' to fetch GAIA questions, run your agent, and submit answers.
165
-
166
- **Agent Tools:**
167
- - Mathematical operations (add, subtract, multiply, divide, modulus)
168
- - Wikipedia search
169
- - Web search (DDGS)
170
- - Arxiv academic paper search
171
- - Web Scraping Tool using BeautifulSoup
172
-
173
- **Note:** Processing all questions may take several minutes depending on the number of questions and API response times.
174
  """
175
  )
176
-
177
  gr.LoginButton()
178
-
179
  run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
180
-
181
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
182
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
183
-
184
  run_button.click(
185
  fn=run_and_submit_all,
186
  outputs=[status_output, results_table]
@@ -192,27 +194,27 @@ if __name__ == "__main__":
192
  # Check for required environment variables
193
  space_host_startup = os.getenv("SPACE_HOST")
194
  space_id_startup = os.getenv("SPACE_ID")
195
- google_api_key = os.getenv("GOOGLE_API_KEY")
196
 
197
  if space_host_startup:
198
  print(f"✅ SPACE_HOST found: {space_host_startup}")
199
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
200
  else:
201
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
202
-
203
  if space_id_startup:
204
  print(f"✅ SPACE_ID found: {space_id_startup}")
205
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
206
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
207
  else:
208
- print("ℹ️ SPACE_ID environment variable not found (running locally?).")
209
-
210
- if google_api_key:
211
- print("✅ GOOGLE_API_KEY found")
212
  else:
213
- print("⚠️ GOOGLE_API_KEY not found - agent will not work without it!")
214
-
215
  print("-"*(60 + len(" App Starting ")) + "\n")
216
-
217
  print("Launching Gradio Interface for GAIA Agent Evaluation...")
218
- demo.launch(debug=True, share=False)
 
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
 
 
5
  from gaia_agent import GAIAAgent
6
  from dotenv import load_dotenv
7
 
8
  # Load environment variables
9
+ # load_dotenv()
10
 
11
  # --- Constants ---
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
  # --- Agent Setup ---
 
 
 
 
15
  openrouter_key = os.getenv("OPENROUTER_API_KEY")
16
  if not openrouter_key:
17
  raise RuntimeError("Set OPENROUTER_API_KEY in your .env (OpenRouter API key)")
18
 
 
 
 
 
 
 
 
 
19
  def run_and_submit_all(profile: gr.OAuthProfile | None):
20
  """
21
  Fetches all questions, runs the GAIAAgent on them, submits all answers,
 
23
  """
24
  # --- Determine HF Space Runtime URL and Repo URL ---
25
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
26
+
27
  if profile:
28
  username = f"{profile.username}"
29
  print(f"User logged in: {username}")
30
  else:
31
  print("User not logged in.")
32
  return "Please Login to Hugging Face with the button.", None
33
+
34
  api_url = DEFAULT_API_URL
35
  questions_url = f"{api_url}/questions"
36
  submit_url = f"{api_url}/submit"
37
+
38
  # 1. Instantiate Agent
39
  try:
40
  agent = GAIAAgent()
 
45
  # In the case of an app running as a Hugging Face space, this link points toward your codebase
46
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local"
47
  print(f"Agent code location: {agent_code}")
48
+
49
  # 2. Fetch Questions
50
  print(f"Fetching questions from: {questions_url}")
51
  try:
52
  response = requests.get(questions_url, timeout=15)
53
  response.raise_for_status()
54
  questions_data = response.json()
55
+
56
  if not questions_data:
57
  print("Fetched questions list is empty.")
58
  return "Fetched questions list is empty or invalid format.", None
59
+
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
 
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
71
+
72
+ # 3. Run your Agent
73
  results_log = []
74
  answers_payload = []
75
+
76
  print(f"Running agent on {len(questions_data)} questions...")
77
  for item in questions_data:
78
  task_id = item.get("task_id")
79
  question_text = item.get("question")
80
+
81
  if not task_id or question_text is None:
82
  print(f"Skipping item with missing task_id or question: {item}")
83
  continue
84
+
85
  try:
86
+ print(f"\n{'='*50}")
87
+ print(f"Processing Task ID: {task_id}")
88
+ print(f"Question: {question_text}")
89
+
90
  submitted_answer = agent(question_text)
91
+
92
+ print(f"Answer: {submitted_answer}")
93
+ print(f"{'='*50}\n")
94
+
95
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
96
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
97
  except Exception as e:
98
+ print(f"Error running agent on task {task_id}: {e}")
99
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
100
+
101
  if not answers_payload:
102
  print("Agent did not produce any answers to submit.")
103
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
104
+
105
+ # 4. Prepare Submission
106
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
107
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
108
  print(status_update)
109
+
110
  # 5. Submit
111
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
112
  try:
113
  response = requests.post(submit_url, json=submission_data, timeout=60)
114
  response.raise_for_status()
115
  result_data = response.json()
116
+
117
  final_status = (
118
  f"Submission Successful!\n"
119
  f"User: {result_data.get('username')}\n"
 
121
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
122
  f"Message: {result_data.get('message', 'No message received.')}"
123
  )
124
+
125
  print("Submission successful.")
126
  results_df = pd.DataFrame(results_log)
127
  return final_status, results_df
128
+
129
  except requests.exceptions.HTTPError as e:
130
  error_detail = f"Server responded with status {e.response.status_code}."
131
  try:
 
133
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
134
  except requests.exceptions.JSONDecodeError:
135
  error_detail += f" Response: {e.response.text[:500]}"
136
+
137
  status_message = f"Submission Failed: {error_detail}"
138
  print(status_message)
139
  results_df = pd.DataFrame(results_log)
140
  return status_message, results_df
141
+
142
  except requests.exceptions.Timeout:
143
  status_message = "Submission Failed: The request timed out."
144
  print(status_message)
145
  results_df = pd.DataFrame(results_log)
146
  return status_message, results_df
147
+
148
  except requests.exceptions.RequestException as e:
149
  status_message = f"Submission Failed: Network error - {e}"
150
  print(status_message)
151
  results_df = pd.DataFrame(results_log)
152
  return status_message, results_df
153
+
154
  except Exception as e:
155
  status_message = f"An unexpected error occurred during submission: {e}"
156
  print(status_message)
157
  results_df = pd.DataFrame(results_log)
158
  return status_message, results_df
159
 
 
160
  # --- Build Gradio Interface using Blocks ---
161
  with gr.Blocks() as demo:
162
  gr.Markdown("# GAIA Benchmark Agent Evaluation")
163
  gr.Markdown(
164
  """
165
+ **Instructions:**
166
+ 1. This app integrates a LangChain ReAct agent with multiple tools (calculator, Wikipedia, web search, Arxiv).
167
+ 2. Log in to your Hugging Face account using the button below.
168
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch GAIA questions, run your agent, and submit answers.
169
+
170
+ **Agent Tools:**
171
+ - Mathematical operations (add, subtract, multiply, divide, modulus)
172
+ - Wikipedia search
173
+ - Web search (DuckDuckGo)
174
+ - Arxiv academic paper search
175
+ - Web Scraping Tool using BeautifulSoup
176
+
177
+ **Note:** Processing all questions may take several minutes depending on the number of questions and API response times.
178
  """
179
  )
180
+
181
  gr.LoginButton()
 
182
  run_button = gr.Button("Run Evaluation & Submit All Answers", variant="primary")
 
183
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
184
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
185
+
186
  run_button.click(
187
  fn=run_and_submit_all,
188
  outputs=[status_output, results_table]
 
194
  # Check for required environment variables
195
  space_host_startup = os.getenv("SPACE_HOST")
196
  space_id_startup = os.getenv("SPACE_ID")
197
+ openrouter_key_startup = os.getenv("OPENROUTER_API_KEY")
198
 
199
  if space_host_startup:
200
  print(f"✅ SPACE_HOST found: {space_host_startup}")
201
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
202
  else:
203
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
204
+
205
  if space_id_startup:
206
  print(f"✅ SPACE_ID found: {space_id_startup}")
207
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
208
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
209
  else:
210
+ print("ℹ️ SPACE_ID environment variable not found (running locally?).")
211
+
212
+ if openrouter_key_startup:
213
+ print("✅ OPENROUTER_API_KEY found")
214
  else:
215
+ print("⚠️ OPENROUTER_API_KEY not found - agent will not work without it!")
216
+
217
  print("-"*(60 + len(" App Starting ")) + "\n")
 
218
  print("Launching Gradio Interface for GAIA Agent Evaluation...")
219
+
220
+ demo.launch(debug=True, share=False)