cousintiz commited on
Commit
bf0e207
·
1 Parent(s): a0e9f32
Files changed (1) hide show
  1. app.py +133 -74
app.py CHANGED
@@ -1,86 +1,103 @@
1
  import os
2
- import gradio as gr
3
  import requests
4
  import pandas as pd
 
5
 
6
- from smolagents import CodeAgent, InferenceClientModel
 
 
7
 
8
 
9
- # --- Constants ---
 
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  GAIA_SYSTEM_PROMPT = """
13
  You are solving GAIA level 1 questions.
14
 
15
- Return only your answer, which should be a number, or a short phrase with as few words as possible,
16
- or a comma separated list of numbers and/or strings.
17
 
18
- If the answer is a number, return only the number without any units unless specified otherwise.
 
19
  If the answer is a string, don't include articles, and don't use abbreviations.
20
  If the answer is a comma separated list, apply the above rules to each element.
21
  Do NOT write 'FINAL ANSWER:' – return only the raw answer.
22
- """
23
 
24
 
25
- # --- Basic Agent Definition ---
 
 
26
  class SmolGaiaAgent:
27
  """
28
- Wraps a smolagents CodeAgent so that we can call it like a simple function:
29
- answer = agent(question).
 
30
  """
31
 
32
- def __init__(self):
33
  print("Initializing SmolGaiaAgent...")
34
 
35
- # 1) Model via Hugging Face Inference Router
36
- # Add HF_TOKEN as a secret in the Space settings if the model requires auth
37
- self.model = InferenceClientModel(
 
38
  model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
39
- api_key=os.getenv("HF_TOKEN"), # can be None for fully public models
40
  )
41
 
42
- # 2) CodeAgent with default toolbox (DuckDuckGoSearchTool, PythonInterpreterTool, Transcriber)
43
- # IMPORTANT: Pass system_prompt during initialization, not during run()
 
 
 
 
 
 
 
44
  self.agent = CodeAgent(
45
- tools=[], # we'll rely on add_base_tools
46
- add_base_tools=True, # gives search + python + speech tools
47
  model=self.model,
 
48
  max_steps=8,
49
  name="gaia_code_agent",
50
- description="Agent that uses web search and python to solve GAIA level 1 questions.",
51
- system_prompt=GAIA_SYSTEM_PROMPT, # FIX: Add system_prompt here
52
  )
53
 
54
  def __call__(self, question: str) -> str:
55
  """
56
- Runs the CodeAgent on one question and returns the final answer string.
57
  """
58
  print(f"[SmolGaiaAgent] Question: {question[:80]}...")
59
- # FIX: Remove system_prompt parameter from run()
 
60
  answer = self.agent.run(question)
61
- answer = str(answer).strip()
62
- print(f"[SmolGaiaAgent] Answer: {answer}")
63
- return answer
64
 
65
 
66
- def run_and_submit_all(request: gr.Request):
 
 
 
67
  """
68
- Fetches all questions, runs the BasicAgent on them, submits all answers,
69
- and displays the results.
70
  """
71
- # Get the user from the request object
72
- # FIX: Changed from profile parameter to request parameter
73
- profile = request.username if hasattr(request, 'username') and request.username else None
74
-
75
- space_id = os.getenv("SPACE_ID")
76
 
77
- if not profile:
 
 
 
78
  print("User not logged in.")
79
  return "Please Login to Hugging Face with the button.", None
80
 
81
- username = profile
82
- print(f"User logged in: {username}")
83
-
84
  api_url = DEFAULT_API_URL
85
  questions_url = f"{api_url}/questions"
86
  submit_url = f"{api_url}/submit"
@@ -91,9 +108,10 @@ def run_and_submit_all(request: gr.Request):
91
  except Exception as e:
92
  print(f"Error instantiating agent: {e}")
93
  return f"Error initializing agent: {e}", None
94
-
 
95
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
96
- print(agent_code)
97
 
98
  # 2. Fetch Questions
99
  print(f"Fetching questions from: {questions_url}")
@@ -102,16 +120,17 @@ def run_and_submit_all(request: gr.Request):
102
  response.raise_for_status()
103
  questions_data = response.json()
104
  if not questions_data:
105
- print("Fetched questions list is empty.")
106
- return "Fetched questions list is empty or invalid format.", None
107
  print(f"Fetched {len(questions_data)} questions.")
108
  except requests.exceptions.RequestException as e:
109
  print(f"Error fetching questions: {e}")
110
  return f"Error fetching questions: {e}", None
111
- except requests.exceptions.JSONDecodeError as e:
112
- print(f"Error decoding JSON response from questions endpoint: {e}")
113
- print(f"Response text: {response.text[:500]}")
114
- return f"Error decoding server response for questions: {e}", None
 
115
  except Exception as e:
116
  print(f"An unexpected error occurred fetching questions: {e}")
117
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -120,27 +139,51 @@ def run_and_submit_all(request: gr.Request):
120
  results_log = []
121
  answers_payload = []
122
  print(f"Running agent on {len(questions_data)} questions...")
 
123
  for item in questions_data:
124
  task_id = item.get("task_id")
125
  question_text = item.get("question")
 
126
  if not task_id or question_text is None:
127
  print(f"Skipping item with missing task_id or question: {item}")
128
  continue
 
129
  try:
130
  submitted_answer = agent(question_text)
131
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
132
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
133
  except Exception as e:
134
- print(f"Error running agent on task {task_id}: {e}")
135
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
136
 
137
  if not answers_payload:
138
  print("Agent did not produce any answers to submit.")
139
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
140
 
141
- # 4. Prepare Submission
142
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
143
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
 
 
 
 
 
 
 
144
  print(status_update)
145
 
146
  # 5. Submit
@@ -149,37 +192,43 @@ def run_and_submit_all(request: gr.Request):
149
  response = requests.post(submit_url, json=submission_data, timeout=60)
150
  response.raise_for_status()
151
  result_data = response.json()
 
152
  final_status = (
153
- f"Submission Successful!\n"
154
  f"User: {result_data.get('username')}\n"
155
  f"Overall Score: {result_data.get('score', 'N/A')}% "
156
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
 
157
  f"Message: {result_data.get('message', 'No message received.')}"
158
  )
159
  print("Submission successful.")
160
  results_df = pd.DataFrame(results_log)
161
  return final_status, results_df
 
162
  except requests.exceptions.HTTPError as e:
163
  error_detail = f"Server responded with status {e.response.status_code}."
164
  try:
165
  error_json = e.response.json()
166
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
167
- except requests.exceptions.JSONDecodeError:
168
  error_detail += f" Response: {e.response.text[:500]}"
169
  status_message = f"Submission Failed: {error_detail}"
170
  print(status_message)
171
  results_df = pd.DataFrame(results_log)
172
  return status_message, results_df
 
173
  except requests.exceptions.Timeout:
174
  status_message = "Submission Failed: The request timed out."
175
  print(status_message)
176
  results_df = pd.DataFrame(results_log)
177
  return status_message, results_df
 
178
  except requests.exceptions.RequestException as e:
179
  status_message = f"Submission Failed: Network error - {e}"
180
  print(status_message)
181
  results_df = pd.DataFrame(results_log)
182
  return status_message, results_df
 
183
  except Exception as e:
184
  status_message = f"An unexpected error occurred during submission: {e}"
185
  print(status_message)
@@ -187,21 +236,27 @@ def run_and_submit_all(request: gr.Request):
187
  return status_message, results_df
188
 
189
 
190
- # --- Build Gradio Interface using Blocks ---
 
 
191
  with gr.Blocks() as demo:
192
  gr.Markdown("# Basic Agent Evaluation Runner")
193
  gr.Markdown(
194
  """
195
  **Instructions:**
196
 
197
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
198
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
199
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
 
 
 
200
 
201
  ---
202
  **Disclaimers:**
203
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
204
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
 
205
  """
206
  )
207
 
@@ -209,16 +264,19 @@ with gr.Blocks() as demo:
209
 
210
  run_button = gr.Button("Run Evaluation & Submit All Answers")
211
 
212
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
213
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
214
-
215
- run_button.click(
216
- fn=run_and_submit_all,
217
- outputs=[status_output, results_table]
218
  )
 
 
 
 
 
 
 
219
 
220
  if __name__ == "__main__":
221
- print("\n" + "-"*30 + " App Starting " + "-"*30)
222
  space_host_startup = os.getenv("SPACE_HOST")
223
  space_id_startup = os.getenv("SPACE_ID")
224
 
@@ -231,11 +289,12 @@ if __name__ == "__main__":
231
  if space_id_startup:
232
  print(f"✅ SPACE_ID found: {space_id_startup}")
233
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
234
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
235
  else:
236
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
237
-
238
- print("-"*(60 + len(" App Starting ")) + "\n")
239
 
 
240
  print("Launching Gradio Interface for Basic Agent Evaluation...")
241
- demo.launch(debug=True, share=False)
 
1
  import os
 
2
  import requests
3
  import pandas as pd
4
+ import gradio as gr
5
 
6
+ from smolagents import CodeAgent
7
+ from smolagents.models import HfApiModel
8
+ from smolagents.tools import DuckDuckGoSearchTool, PythonInterpreterTool
9
 
10
 
11
+ # -------------------------------------------------------------------
12
+ # Constants
13
+ # -------------------------------------------------------------------
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
  GAIA_SYSTEM_PROMPT = """
17
  You are solving GAIA level 1 questions.
18
 
19
+ Return only your answer, which should be a number, or a short phrase with as few
20
+ words as possible, or a comma separated list of numbers and/or strings.
21
 
22
+ If the answer is a number, return only the number without any units unless
23
+ specified otherwise.
24
  If the answer is a string, don't include articles, and don't use abbreviations.
25
  If the answer is a comma separated list, apply the above rules to each element.
26
  Do NOT write 'FINAL ANSWER:' – return only the raw answer.
27
+ """.strip()
28
 
29
 
30
+ # -------------------------------------------------------------------
31
+ # Smolagents-based GAIA Agent
32
+ # -------------------------------------------------------------------
33
  class SmolGaiaAgent:
34
  """
35
+ Simple wrapper around a smolagents CodeAgent so we can call it like:
36
+
37
+ answer = agent(question)
38
  """
39
 
40
+ def __init__(self) -> None:
41
  print("Initializing SmolGaiaAgent...")
42
 
43
+ # 1) Model via Hugging Face Inference (router.huggingface.co under the hood)
44
+ # If HF_TOKEN is set as a Space secret, it will be used; otherwise the
45
+ # HfApiModel will try to use the default HF auth.
46
+ self.model = HfApiModel(
47
  model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
48
+ token=os.getenv("HF_TOKEN"), # safe even if None
49
  )
50
 
51
+ # 2) Tools: web search + python execution
52
+ tools = [
53
+ DuckDuckGoSearchTool(),
54
+ PythonInterpreterTool(),
55
+ ]
56
+
57
+ # 3) CodeAgent – IMPORTANT:
58
+ # * `name` must be a valid Python identifier (no dashes).
59
+ # * `system_prompt` is passed here, NOT to `.run(...)`
60
  self.agent = CodeAgent(
61
+ tools=tools,
 
62
  model=self.model,
63
+ system_prompt=GAIA_SYSTEM_PROMPT,
64
  max_steps=8,
65
  name="gaia_code_agent",
66
+ description="Agent that uses web search and Python to solve GAIA level 1 questions.",
67
+ add_base_tools=False,
68
  )
69
 
70
  def __call__(self, question: str) -> str:
71
  """
72
+ Run the CodeAgent on a single question and return the final answer.
73
  """
74
  print(f"[SmolGaiaAgent] Question: {question[:80]}...")
75
+ # MultiStepAgent.run() in the course infra DOES NOT accept `system_prompt`,
76
+ # so we only pass the question here.
77
  answer = self.agent.run(question)
78
+ answer_str = str(answer).strip()
79
+ print(f"[SmolGaiaAgent] Answer: {answer_str}")
80
+ return answer_str
81
 
82
 
83
+ # -------------------------------------------------------------------
84
+ # Evaluation / Submission logic (kept close to template)
85
+ # -------------------------------------------------------------------
86
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
87
  """
88
+ Fetch all questions, run the SmolGaiaAgent on them, submit all answers,
89
+ and display the results.
90
  """
91
+ # --- Determine HF Space Runtime URL and Repo URL ---
92
+ space_id = os.getenv("SPACE_ID") # Used to build link to this Space's code
 
 
 
93
 
94
+ if profile:
95
+ username = f"{profile.username}"
96
+ print(f"User logged in: {username}")
97
+ else:
98
  print("User not logged in.")
99
  return "Please Login to Hugging Face with the button.", None
100
 
 
 
 
101
  api_url = DEFAULT_API_URL
102
  questions_url = f"{api_url}/questions"
103
  submit_url = f"{api_url}/submit"
 
108
  except Exception as e:
109
  print(f"Error instantiating agent: {e}")
110
  return f"Error initializing agent: {e}", None
111
+
112
+ # Link to your codebase (shown on leaderboard)
113
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
114
+ print(f"Agent code URL: {agent_code}")
115
 
116
  # 2. Fetch Questions
117
  print(f"Fetching questions from: {questions_url}")
 
120
  response.raise_for_status()
121
  questions_data = response.json()
122
  if not questions_data:
123
+ print("Fetched questions list is empty.")
124
+ return "Fetched questions list is empty or invalid format.", None
125
  print(f"Fetched {len(questions_data)} questions.")
126
  except requests.exceptions.RequestException as e:
127
  print(f"Error fetching questions: {e}")
128
  return f"Error fetching questions: {e}", None
129
+ except ValueError as e:
130
+ # JSON decode error
131
+ print(f"Error decoding JSON response from questions endpoint: {e}")
132
+ print(f"Response text (first 500 chars): {response.text[:500]}")
133
+ return f"Error decoding server response for questions: {e}", None
134
  except Exception as e:
135
  print(f"An unexpected error occurred fetching questions: {e}")
136
  return f"An unexpected error occurred fetching questions: {e}", None
 
139
  results_log = []
140
  answers_payload = []
141
  print(f"Running agent on {len(questions_data)} questions...")
142
+
143
  for item in questions_data:
144
  task_id = item.get("task_id")
145
  question_text = item.get("question")
146
+
147
  if not task_id or question_text is None:
148
  print(f"Skipping item with missing task_id or question: {item}")
149
  continue
150
+
151
  try:
152
  submitted_answer = agent(question_text)
153
+ answers_payload.append(
154
+ {"task_id": task_id, "submitted_answer": submitted_answer}
155
+ )
156
+ results_log.append(
157
+ {
158
+ "Task ID": task_id,
159
+ "Question": question_text,
160
+ "Submitted Answer": submitted_answer,
161
+ }
162
+ )
163
  except Exception as e:
164
+ print(f"Error running agent on task {task_id}: {e}")
165
+ results_log.append(
166
+ {
167
+ "Task ID": task_id,
168
+ "Question": question_text,
169
+ "Submitted Answer": f"AGENT ERROR: {e}",
170
+ }
171
+ )
172
 
173
  if not answers_payload:
174
  print("Agent did not produce any answers to submit.")
175
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
176
 
177
+ # 4. Prepare Submission
178
+ submission_data = {
179
+ "username": username.strip(),
180
+ "agent_code": agent_code,
181
+ "answers": answers_payload,
182
+ }
183
+ status_update = (
184
+ f"Agent finished. Submitting {len(answers_payload)} answers "
185
+ f"for user '{username}'..."
186
+ )
187
  print(status_update)
188
 
189
  # 5. Submit
 
192
  response = requests.post(submit_url, json=submission_data, timeout=60)
193
  response.raise_for_status()
194
  result_data = response.json()
195
+
196
  final_status = (
197
+ "Submission Successful!\n"
198
  f"User: {result_data.get('username')}\n"
199
  f"Overall Score: {result_data.get('score', 'N/A')}% "
200
+ f"({result_data.get('correct_count', '?')}/"
201
+ f"{result_data.get('total_attempted', '?')} correct)\n"
202
  f"Message: {result_data.get('message', 'No message received.')}"
203
  )
204
  print("Submission successful.")
205
  results_df = pd.DataFrame(results_log)
206
  return final_status, results_df
207
+
208
  except requests.exceptions.HTTPError as e:
209
  error_detail = f"Server responded with status {e.response.status_code}."
210
  try:
211
  error_json = e.response.json()
212
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
213
+ except ValueError:
214
  error_detail += f" Response: {e.response.text[:500]}"
215
  status_message = f"Submission Failed: {error_detail}"
216
  print(status_message)
217
  results_df = pd.DataFrame(results_log)
218
  return status_message, results_df
219
+
220
  except requests.exceptions.Timeout:
221
  status_message = "Submission Failed: The request timed out."
222
  print(status_message)
223
  results_df = pd.DataFrame(results_log)
224
  return status_message, results_df
225
+
226
  except requests.exceptions.RequestException as e:
227
  status_message = f"Submission Failed: Network error - {e}"
228
  print(status_message)
229
  results_df = pd.DataFrame(results_log)
230
  return status_message, results_df
231
+
232
  except Exception as e:
233
  status_message = f"An unexpected error occurred during submission: {e}"
234
  print(status_message)
 
236
  return status_message, results_df
237
 
238
 
239
+ # -------------------------------------------------------------------
240
+ # Gradio UI
241
+ # -------------------------------------------------------------------
242
  with gr.Blocks() as demo:
243
  gr.Markdown("# Basic Agent Evaluation Runner")
244
  gr.Markdown(
245
  """
246
  **Instructions:**
247
 
248
+ 1. Please clone this space, then modify the code to define your agent's
249
+ logic, the tools, the necessary packages, etc.
250
+ 2. Log in to your Hugging Face account using the button below. This uses
251
+ your HF username for submission.
252
+ 3. Click **'Run Evaluation & Submit All Answers'** to fetch questions,
253
+ run your agent, submit answers, and see the score.
254
 
255
  ---
256
  **Disclaimers:**
257
+ Once you click the submit button, it can take quite some time (the agent
258
+ has to go through all the questions). This space is intentionally
259
+ minimal to encourage you to improve it.
260
  """
261
  )
262
 
 
264
 
265
  run_button = gr.Button("Run Evaluation & Submit All Answers")
266
 
267
+ status_output = gr.Textbox(
268
+ label="Run Status / Submission Result", lines=5, interactive=False
 
 
 
 
269
  )
270
+ results_table = gr.DataFrame(
271
+ label="Questions and Agent Answers",
272
+ wrap=True,
273
+ )
274
+
275
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
276
+
277
 
278
  if __name__ == "__main__":
279
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
280
  space_host_startup = os.getenv("SPACE_HOST")
281
  space_id_startup = os.getenv("SPACE_ID")
282
 
 
289
  if space_id_startup:
290
  print(f"✅ SPACE_ID found: {space_id_startup}")
291
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
292
+ print(
293
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
294
+ )
295
  else:
296
+ print("ℹ️ SPACE_ID environment variable not found (running locally?).")
 
 
297
 
298
+ print("-" * (60 + len(" App Starting ")) + "\n")
299
  print("Launching Gradio Interface for Basic Agent Evaluation...")
300
+ demo.launch(debug=True, share=False)