cousintiz commited on
Commit
827ffd4
·
1 Parent(s): 3b7f153
Files changed (1) hide show
  1. app.py +99 -58
app.py CHANGED
@@ -3,16 +3,18 @@ import gradio as gr
3
  import requests
4
  import pandas as pd
5
 
6
- from smolagents import CodeAgent, InferenceClientModel
7
 
8
 
9
- # --- Constants ---
 
 
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  GAIA_SYSTEM_PROMPT = """
13
  You are solving GAIA level 1 questions.
14
 
15
- Return only your answer, which should be a number, or a short phrase with as few words as possible,
16
  or a comma separated list of numbers and/or strings.
17
 
18
  If the answer is a number, return only the number without any units unless specified otherwise.
@@ -22,63 +24,70 @@ Do NOT write 'FINAL ANSWER:' – return only the raw answer.
22
  """
23
 
24
 
 
 
 
 
25
  """
26
  Wraps a smolagents CodeAgent so that we can call it like a simple function:
27
- answer = agent(question).
28
  """
29
 
30
  def __init__(self):
31
  print("Initializing SmolGaiaAgent...")
32
 
33
- # 1) Model via Hugging Face Inference Router
34
- # Add HF_TOKEN as a secret in the Space settings if the model requires auth
35
- self.model = InferenceClientModel(
36
- model_id="Qwen/Qwen2.5-Coder-32B-Instruct",
37
- api_key=os.getenv("HF_TOKEN"), # can be None for fully public models
38
- )
 
 
 
39
 
40
- # 2) CodeAgent with default toolbox (DuckDuckGoSearchTool, PythonInterpreterTool, Transcriber)
41
- # IMPORTANT: Pass system_prompt during initialization, not during run()
42
  self.agent = CodeAgent(
43
- tools=[], # we'll rely on add_base_tools
44
- add_base_tools=True, # gives search + python + speech tools
45
  model=self.model,
46
  max_steps=8,
47
  name="gaia_code_agent",
48
- description="Agent that uses web search and python to solve GAIA level 1 questions.",
49
- system_prompt=GAIA_SYSTEM_PROMPT, # FIX: Add system_prompt here
50
  )
51
 
52
  def __call__(self, question: str) -> str:
53
  """
54
  Runs the CodeAgent on one question and returns the final answer string.
 
 
55
  """
56
- print(f"[SmolGaiaAgent] Question: {question[:80]}...")
57
- # FIX: Remove system_prompt parameter from run()
58
- answer = self.agent.run(question)
59
  answer = str(answer).strip()
60
  print(f"[SmolGaiaAgent] Answer: {answer}")
61
  return answer
62
 
63
 
64
- def run_and_submit_all(request: gr.Request):
 
 
 
65
  """
66
- Fetches all questions, runs the BasicAgent on them, submits all answers,
67
  and displays the results.
68
  """
69
- # Get the user from the request object
70
- # FIX: Changed from profile parameter to request parameter
71
- profile = request.username if hasattr(request, 'username') and request.username else None
72
-
73
- space_id = os.getenv("SPACE_ID")
74
 
75
- if not profile:
 
 
 
76
  print("User not logged in.")
77
  return "Please Login to Hugging Face with the button.", None
78
 
79
- username = profile
80
- print(f"User logged in: {username}")
81
-
82
  api_url = DEFAULT_API_URL
83
  questions_url = f"{api_url}/questions"
84
  submit_url = f"{api_url}/submit"
@@ -89,9 +98,10 @@ def run_and_submit_all(request: gr.Request):
89
  except Exception as e:
90
  print(f"Error instantiating agent: {e}")
91
  return f"Error initializing agent: {e}", None
92
-
 
93
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
94
- print(agent_code)
95
 
96
  # 2. Fetch Questions
97
  print(f"Fetching questions from: {questions_url}")
@@ -100,21 +110,21 @@ def run_and_submit_all(request: gr.Request):
100
  response.raise_for_status()
101
  questions_data = response.json()
102
  if not questions_data:
103
- print("Fetched questions list is empty.")
104
- return "Fetched questions list is empty or invalid format.", None
105
  print(f"Fetched {len(questions_data)} questions.")
106
  except requests.exceptions.RequestException as e:
107
  print(f"Error fetching questions: {e}")
108
  return f"Error fetching questions: {e}", None
109
  except requests.exceptions.JSONDecodeError as e:
110
- print(f"Error decoding JSON response from questions endpoint: {e}")
111
- print(f"Response text: {response.text[:500]}")
112
- return f"Error decoding server response for questions: {e}", None
113
  except Exception as e:
114
  print(f"An unexpected error occurred fetching questions: {e}")
115
  return f"An unexpected error occurred fetching questions: {e}", None
116
 
117
- # 3. Run your Agent
118
  results_log = []
119
  answers_payload = []
120
  print(f"Running agent on {len(questions_data)} questions...")
@@ -126,22 +136,42 @@ def run_and_submit_all(request: gr.Request):
126
  continue
127
  try:
128
  submitted_answer = agent(question_text)
129
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
130
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
131
  except Exception as e:
132
- print(f"Error running agent on task {task_id}: {e}")
133
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
134
 
135
  if not answers_payload:
136
  print("Agent did not produce any answers to submit.")
137
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
138
 
139
- # 4. Prepare Submission
140
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
141
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
 
 
 
 
 
 
142
  print(status_update)
143
 
144
- # 5. Submit
145
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
146
  try:
147
  response = requests.post(submit_url, json=submission_data, timeout=60)
@@ -185,7 +215,9 @@ def run_and_submit_all(request: gr.Request):
185
  return status_message, results_df
186
 
187
 
188
- # --- Build Gradio Interface using Blocks ---
 
 
189
  with gr.Blocks() as demo:
190
  gr.Markdown("# Basic Agent Evaluation Runner")
191
  gr.Markdown(
@@ -198,8 +230,8 @@ with gr.Blocks() as demo:
198
 
199
  ---
200
  **Disclaimers:**
201
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
202
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
203
  """
204
  )
205
 
@@ -207,16 +239,22 @@ with gr.Blocks() as demo:
207
 
208
  run_button = gr.Button("Run Evaluation & Submit All Answers")
209
 
210
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
211
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
 
 
 
 
212
 
213
  run_button.click(
214
  fn=run_and_submit_all,
215
- outputs=[status_output, results_table]
216
  )
217
 
 
218
  if __name__ == "__main__":
219
- print("\n" + "-"*30 + " App Starting " + "-"*30)
220
  space_host_startup = os.getenv("SPACE_HOST")
221
  space_id_startup = os.getenv("SPACE_ID")
222
 
@@ -229,11 +267,14 @@ if __name__ == "__main__":
229
  if space_id_startup:
230
  print(f"✅ SPACE_ID found: {space_id_startup}")
231
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
232
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
233
  else:
234
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
235
-
236
- print("-"*(60 + len(" App Starting ")) + "\n")
237
 
 
238
  print("Launching Gradio Interface for Basic Agent Evaluation...")
239
- demo.launch(debug=True, share=False)
 
3
  import requests
4
  import pandas as pd
5
 
6
+ from smolagents import CodeAgent, HfApiModel, DuckDuckGoSearchTool, PythonInterpreterTool
7
 
8
 
9
+ # -------------------------------------------------------------------
10
+ # Constants
11
+ # -------------------------------------------------------------------
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
  GAIA_SYSTEM_PROMPT = """
15
  You are solving GAIA level 1 questions.
16
 
17
+ Return only your answer, which should be a number, or a short phrase with as few words as possible,
18
  or a comma separated list of numbers and/or strings.
19
 
20
  If the answer is a number, return only the number without any units unless specified otherwise.
 
24
  """
25
 
26
 
27
+ # -------------------------------------------------------------------
28
+ # smolagents-based GAIA Agent
29
+ # -------------------------------------------------------------------
30
+ class SmolGaiaAgent:
31
  """
32
  Wraps a smolagents CodeAgent so that we can call it like a simple function:
33
+ answer = agent(question)
34
  """
35
 
36
  def __init__(self):
37
  print("Initializing SmolGaiaAgent...")
38
 
39
+ # 1) Model: use default HfApiModel (Qwen2.5-Coder via router.huggingface.co)
40
+ # If you have a token, set it as HF_TOKEN in the Space secrets HfApiModel picks it up.
41
+ self.model = HfApiModel()
42
+
43
+ # 2) Tools: web search + python interpreter
44
+ self.tools = [
45
+ DuckDuckGoSearchTool(),
46
+ PythonInterpreterTool(),
47
+ ]
48
 
49
+ # 3) CodeAgent
50
+ # NOTE: name must be a valid Python identifier (no dashes, not a keyword)
51
  self.agent = CodeAgent(
52
+ tools=self.tools,
 
53
  model=self.model,
54
  max_steps=8,
55
  name="gaia_code_agent",
56
+ description="Agent that uses web search and Python to solve GAIA level 1 questions.",
 
57
  )
58
 
59
  def __call__(self, question: str) -> str:
60
  """
61
  Runs the CodeAgent on one question and returns the final answer string.
62
+ We inline the GAIA system prompt into the user message instead of using a
63
+ `system_prompt` kwarg (not supported in MultiStepAgent.run()).
64
  """
65
+ print(f"[SmolGaiaAgent] Question (first 80 chars): {question[:80]}...")
66
+ prompt = f"{GAIA_SYSTEM_PROMPT.strip()}\n\nQuestion:\n{question}"
67
+ answer = self.agent.run(prompt)
68
  answer = str(answer).strip()
69
  print(f"[SmolGaiaAgent] Answer: {answer}")
70
  return answer
71
 
72
 
73
+ # -------------------------------------------------------------------
74
+ # Evaluation + Submission Logic (from the course template)
75
+ # -------------------------------------------------------------------
76
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
77
  """
78
+ Fetches all questions, runs the SmolGaiaAgent on them, submits all answers,
79
  and displays the results.
80
  """
81
+ # --- Determine HF Space Runtime URL and Repo URL ---
82
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
 
 
 
83
 
84
+ if profile:
85
+ username = f"{profile.username}"
86
+ print(f"User logged in: {username}")
87
+ else:
88
  print("User not logged in.")
89
  return "Please Login to Hugging Face with the button.", None
90
 
 
 
 
91
  api_url = DEFAULT_API_URL
92
  questions_url = f"{api_url}/questions"
93
  submit_url = f"{api_url}/submit"
 
98
  except Exception as e:
99
  print(f"Error instantiating agent: {e}")
100
  return f"Error initializing agent: {e}", None
101
+
102
+ # Link to this Space's code (shown on leaderboard)
103
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
104
+ print(f"Agent code link: {agent_code}")
105
 
106
  # 2. Fetch Questions
107
  print(f"Fetching questions from: {questions_url}")
 
110
  response.raise_for_status()
111
  questions_data = response.json()
112
  if not questions_data:
113
+ print("Fetched questions list is empty.")
114
+ return "Fetched questions list is empty or invalid format.", None
115
  print(f"Fetched {len(questions_data)} questions.")
116
  except requests.exceptions.RequestException as e:
117
  print(f"Error fetching questions: {e}")
118
  return f"Error fetching questions: {e}", None
119
  except requests.exceptions.JSONDecodeError as e:
120
+ print(f"Error decoding JSON response from questions endpoint: {e}")
121
+ print(f"Response text: {response.text[:500]}")
122
+ return f"Error decoding server response for questions: {e}", None
123
  except Exception as e:
124
  print(f"An unexpected error occurred fetching questions: {e}")
125
  return f"An unexpected error occurred fetching questions: {e}", None
126
 
127
+ # 3. Run your Agent on each question
128
  results_log = []
129
  answers_payload = []
130
  print(f"Running agent on {len(questions_data)} questions...")
 
136
  continue
137
  try:
138
  submitted_answer = agent(question_text)
139
+ answers_payload.append(
140
+ {"task_id": task_id, "submitted_answer": submitted_answer}
141
+ )
142
+ results_log.append(
143
+ {
144
+ "Task ID": task_id,
145
+ "Question": question_text,
146
+ "Submitted Answer": submitted_answer,
147
+ }
148
+ )
149
  except Exception as e:
150
+ print(f"Error running agent on task {task_id}: {e}")
151
+ results_log.append(
152
+ {
153
+ "Task ID": task_id,
154
+ "Question": question_text,
155
+ "Submitted Answer": f"AGENT ERROR: {e}",
156
+ }
157
+ )
158
 
159
  if not answers_payload:
160
  print("Agent did not produce any answers to submit.")
161
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
162
 
163
+ # 4. Prepare Submission
164
+ submission_data = {
165
+ "username": username.strip(),
166
+ "agent_code": agent_code,
167
+ "answers": answers_payload,
168
+ }
169
+ status_update = (
170
+ f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
171
+ )
172
  print(status_update)
173
 
174
+ # 5. Submit to scoring API
175
  print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
176
  try:
177
  response = requests.post(submit_url, json=submission_data, timeout=60)
 
215
  return status_message, results_df
216
 
217
 
218
+ # -------------------------------------------------------------------
219
+ # Gradio UI
220
+ # -------------------------------------------------------------------
221
  with gr.Blocks() as demo:
222
  gr.Markdown("# Basic Agent Evaluation Runner")
223
  gr.Markdown(
 
230
 
231
  ---
232
  **Disclaimers:**
233
+ Once clicking on the "submit" button, it can take quite some time (this is the time for the agent to go through all the questions).
234
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution.
235
  """
236
  )
237
 
 
239
 
240
  run_button = gr.Button("Run Evaluation & Submit All Answers")
241
 
242
+ status_output = gr.Textbox(
243
+ label="Run Status / Submission Result", lines=5, interactive=False
244
+ )
245
+ results_table = gr.DataFrame(
246
+ label="Questions and Agent Answers",
247
+ wrap=True,
248
+ )
249
 
250
  run_button.click(
251
  fn=run_and_submit_all,
252
+ outputs=[status_output, results_table],
253
  )
254
 
255
+
256
  if __name__ == "__main__":
257
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
258
  space_host_startup = os.getenv("SPACE_HOST")
259
  space_id_startup = os.getenv("SPACE_ID")
260
 
 
267
  if space_id_startup:
268
  print(f"✅ SPACE_ID found: {space_id_startup}")
269
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
270
+ print(
271
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
272
+ )
273
  else:
274
+ print(
275
+ "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
276
+ )
277
 
278
+ print("-" * (60 + len(" App Starting ")) + "\n")
279
  print("Launching Gradio Interface for Basic Agent Evaluation...")
280
+ demo.launch(debug=True, share=False)