MasterOfHugs commited on
Commit
41f4523
·
verified ·
1 Parent(s): 81917a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +213 -65
app.py CHANGED
@@ -1,34 +1,203 @@
 
 
1
  import os
2
- import gradio as gr
 
 
 
 
 
 
 
 
 
3
  import requests
4
- import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # (Keep Constants as is)
8
- # --- Constants ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
- and displays the results.
26
  """
27
- # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
@@ -38,15 +207,14 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
38
  questions_url = f"{api_url}/questions"
39
  submit_url = f"{api_url}/submit"
40
 
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
50
 
51
  # 2. Fetch Questions
52
  print(f"Fetching questions from: {questions_url}")
@@ -55,21 +223,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
- except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
71
 
72
- # 3. Run your Agent
73
  results_log = []
74
  answers_payload = []
75
  print(f"Running agent on {len(questions_data)} questions...")
@@ -80,18 +244,18 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
@@ -117,7 +281,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
117
  try:
118
  error_json = e.response.json()
119
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
- except requests.exceptions.JSONDecodeError:
121
  error_detail += f" Response: {e.response.text[:500]}"
122
  status_message = f"Submission Failed: {error_detail}"
123
  print(status_message)
@@ -139,58 +303,42 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
139
  results_df = pd.DataFrame(results_log)
140
  return status_message, results_df
141
 
142
-
143
- # --- Build Gradio Interface using Blocks ---
 
144
  with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
148
- **Instructions:**
149
-
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
-
154
- ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
  """
159
  )
160
 
161
  gr.LoginButton()
162
 
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
-
165
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
- run_button.click(
170
- fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
- )
173
 
174
  if __name__ == "__main__":
175
  print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
-
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
182
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
  else:
184
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
-
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"✅ SPACE_ID found: {space_id_startup}")
188
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
  else:
191
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
-
193
  print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
1
+ #!/usr/bin/env python3
2
+ # app.py - Minimal GAIA-ready agent + runner (Gradio UI)
3
  import os
4
+ import re
5
+ import json
6
+ import ast
7
+ import operator
8
+ import subprocess
9
+ import sys
10
+ import importlib
11
+ import datetime
12
+ import pytz
13
+ import yaml
14
  import requests
 
15
  import pandas as pd
16
+ import gradio as gr
17
+
18
+ # Ensure import helper (optional; mostly for local dev if you want auto-install)
19
+ def ensure_import(pkg_name, import_name=None):
20
+ import_name = import_name or pkg_name
21
+ try:
22
+ return importlib.import_module(import_name)
23
+ except ImportError:
24
+ # don't auto-install in production spaces; keep simple behavior
25
+ raise
26
+
27
+ # ---- smolagents & local tool imports ----
28
+ # Assumes smolagents and tools.final_answer are available in your environment
29
+ from smolagents import CodeAgent, HfApiModel, tool, DuckDuckGoSearchTool
30
+ from tools.final_answer import FinalAnswerTool
31
+
32
+ # -------------------------
33
+ # Minimal toolset for GAIA
34
+ # -------------------------
35
+
36
+ # Safe calculator using ast (no eval)
37
+ _allowed_ops = {
38
+ ast.Add: operator.add, ast.Sub: operator.sub, ast.Mult: operator.mul,
39
+ ast.Div: operator.truediv, ast.Pow: operator.pow, ast.USub: operator.neg,
40
+ ast.Mod: operator.mod,
41
+ }
42
+
43
+ def _eval_node(node):
44
+ if isinstance(node, ast.Constant): # Python 3.8+: ast.Num replaced by ast.Constant
45
+ return node.value
46
+ if isinstance(node, ast.Num): # fallback for some versions
47
+ return node.n
48
+ if isinstance(node, ast.UnaryOp) and type(node.op) in _allowed_ops:
49
+ return _allowed_ops[type(node.op)](_eval_node(node.operand))
50
+ if isinstance(node, ast.BinOp) and type(node.op) in _allowed_ops:
51
+ return _allowed_ops[type(node.op)](_eval_node(node.left), _eval_node(node.right))
52
+ raise ValueError("Unsupported or unsafe expression")
53
+
54
+ def safe_calc(expr: str):
55
+ tree = ast.parse(expr, mode='eval')
56
+ return _eval_node(tree.body)
57
+
58
+ @tool
59
+ def calculator(expr: str) -> str:
60
+ """Compute numeric expressions safely (no eval). Returns JSON string."""
61
+ try:
62
+ # clean expression (allow digits, operators, parentheses, spaces)
63
+ expr_clean = expr.strip()
64
+ val = safe_calc(expr_clean)
65
+ return json.dumps({"expression": expr_clean, "result": float(val)})
66
+ except Exception as e:
67
+ return json.dumps({"error": f"Calc error: {e}"})
68
+
69
+ @tool
70
+ def get_current_time_in_timezone(timezone: str) -> str:
71
+ """Return current time in timezone as JSON string."""
72
+ try:
73
+ tz = pytz.timezone(timezone)
74
+ local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
75
+ return json.dumps({"timezone": timezone, "local_time": local_time})
76
+ except Exception as e:
77
+ return json.dumps({"error": f"Timezone error: {e}"})
78
+
79
+ # FinalAnswerTool (exists in repo)
80
+ final_answer = FinalAnswerTool()
81
+
82
+ # Load prompts if present (optional)
83
+ prompt_templates = None
84
+ try:
85
+ with open("prompts.yaml", "r") as fh:
86
+ prompt_templates = yaml.safe_load(fh)
87
+ except Exception:
88
+ prompt_templates = None
89
+
90
+ # Instantiate a compact CodeAgent (adjust model_id if needed)
91
+ code_agent = CodeAgent(
92
+ model=HfApiModel(
93
+ max_tokens=1024,
94
+ temperature=0.15,
95
+ model_id='Qwen/Qwen2.5-Coder-32B-Instruct' # change if overloaded or unavailable
96
+ ),
97
+ tools=[final_answer, calculator, get_current_time_in_timezone],
98
+ max_steps=6,
99
+ verbosity_level=0,
100
+ prompt_templates=prompt_templates
101
+ )
102
+
103
+ # Try to create a DuckDuckGo search tool for web fallback (may fail depending on smolagents version)
104
+ search_tool = None
105
+ try:
106
+ search_tool = DuckDuckGoSearchTool()
107
+ except Exception:
108
+ search_tool = None # fallback gracefully if unavailable
109
+
110
+ # -------------------------
111
+ # Minimal GaiaAgent wrapper
112
+ # -------------------------
113
+ class GaiaAgentMinimal:
114
+ def __init__(self, code_agent, search_tool=None):
115
+ self.code_agent = code_agent
116
+ self.search_tool = search_tool
117
 
118
+ def _is_calc(self, q: str) -> bool:
119
+ # heuristics: contains digits and math operators
120
+ return bool(re.search(r'[\d]', q)) and any(op in q for op in ['+', '-', '*', '/', '%', '^'])
121
+
122
+ def _is_time(self, q: str) -> bool:
123
+ ql = q.lower()
124
+ return "time" in ql or "heure" in ql or "quelle heure" in ql or "what time" in ql
125
+
126
+ def run(self, question: str) -> str:
127
+ """Return a string (or JSON string) answer for the GAIA runner."""
128
+ try:
129
+ q = question.strip()
130
+
131
+ # 1) calculator
132
+ if self._is_calc(q):
133
+ # extract the first expression-like substring
134
+ m = re.search(r'([0-9\.\s\+\-\*\/\^\%\(\)]+)', q)
135
+ expr = m.group(1) if m else q
136
+ return calculator(expr)
137
+
138
+ # 2) time requests
139
+ if self._is_time(q):
140
+ # quick timezone guess
141
+ if "paris" in q.lower() or "france" in q.lower():
142
+ tz = "Europe/Paris"
143
+ else:
144
+ # try to extract "in X" patterns (e.g., in London)
145
+ m = re.search(r'in\s+([A-Za-z_\/]+)', q, re.I)
146
+ tz = m.group(1) if m else "UTC"
147
+ return get_current_time_in_timezone(tz)
148
+
149
+ # 3) quick web search fallback (if available)
150
+ if self.search_tool:
151
+ try:
152
+ if hasattr(self.search_tool, "search"):
153
+ res = self.search_tool.search(q, top_k=1)
154
+ elif hasattr(self.search_tool, "run"):
155
+ res = self.search_tool.run(q)
156
+ else:
157
+ res = self.search_tool(q)
158
+ # Return as JSON string to be stable for scoring parsing
159
+ return json.dumps({"source": "web_search", "snippet": str(res)})
160
+ except Exception:
161
+ pass
162
+
163
+ # 4) fallback to LLM CodeAgent
164
+ try:
165
+ resp = None
166
+ # Try typical call signatures (some smolagents versions use call directly)
167
+ if hasattr(self.code_agent, "run"):
168
+ resp = self.code_agent.run(q)
169
+ else:
170
+ resp = self.code_agent(q)
171
+ # Normalize response
172
+ if isinstance(resp, dict):
173
+ for key in ("final_answer", "answer", "result", "output"):
174
+ if key in resp:
175
+ return str(resp[key])
176
+ return json.dumps(resp)
177
+ return str(resp)
178
+ except Exception as e:
179
+ # last-resort fallback
180
+ return json.dumps({"error": f"LLM error: {e}"})
181
+ except Exception as e:
182
+ return json.dumps({"error": f"Agent internal error: {e}"})
183
+
184
+ # Instantiate gaia_agent for the runner to use
185
+ gaia_agent = GaiaAgentMinimal(code_agent, search_tool)
186
+
187
+ # -------------------------
188
+ # GAIA runner (unchanged logic, uses gaia_agent)
189
+ # -------------------------
190
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
191
 
192
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
193
  """
194
+ Fetches all questions, runs gaia_agent on them, submits all answers,
195
+ and returns status and results table.
196
  """
197
+ space_id = os.getenv("SPACE_ID")
 
198
 
199
  if profile:
200
+ username = f"{profile.username}"
201
  print(f"User logged in: {username}")
202
  else:
203
  print("User not logged in.")
 
207
  questions_url = f"{api_url}/questions"
208
  submit_url = f"{api_url}/submit"
209
 
210
+ # 1. Agent is ready (we use gaia_agent defined above)
211
  try:
212
+ agent = gaia_agent
213
  except Exception as e:
214
  print(f"Error instantiating agent: {e}")
215
  return f"Error initializing agent: {e}", None
216
+
217
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "unknown"
 
218
 
219
  # 2. Fetch Questions
220
  print(f"Fetching questions from: {questions_url}")
 
223
  response.raise_for_status()
224
  questions_data = response.json()
225
  if not questions_data:
226
+ print("Fetched questions list is empty.")
227
+ return "Fetched questions list is empty or invalid format.", None
228
  print(f"Fetched {len(questions_data)} questions.")
229
  except requests.exceptions.RequestException as e:
230
  print(f"Error fetching questions: {e}")
231
  return f"Error fetching questions: {e}", None
 
 
 
 
232
  except Exception as e:
233
  print(f"An unexpected error occurred fetching questions: {e}")
234
  return f"An unexpected error occurred fetching questions: {e}", None
235
 
236
+ # 3. Run agent on each question
237
  results_log = []
238
  answers_payload = []
239
  print(f"Running agent on {len(questions_data)} questions...")
 
244
  print(f"Skipping item with missing task_id or question: {item}")
245
  continue
246
  try:
247
+ submitted_answer = agent.run(question_text)
248
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
249
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
250
  except Exception as e:
251
+ print(f"Error running agent on task {task_id}: {e}")
252
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
253
 
254
  if not answers_payload:
255
  print("Agent did not produce any answers to submit.")
256
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
257
 
258
+ # 4. Prepare submission
259
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
260
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
261
  print(status_update)
 
281
  try:
282
  error_json = e.response.json()
283
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
284
+ except Exception:
285
  error_detail += f" Response: {e.response.text[:500]}"
286
  status_message = f"Submission Failed: {error_detail}"
287
  print(status_message)
 
303
  results_df = pd.DataFrame(results_log)
304
  return status_message, results_df
305
 
306
+ # -------------------------
307
+ # Gradio UI
308
+ # -------------------------
309
  with gr.Blocks() as demo:
310
+ gr.Markdown("# Minimal GAIA Agent Runner")
311
  gr.Markdown(
312
  """
313
+ Instructions:
314
+ 1. Log in to your Hugging Face account with the button below.
315
+ 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, submit answers and get the score.
 
 
 
 
 
 
 
316
  """
317
  )
318
 
319
  gr.LoginButton()
320
 
321
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
322
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
323
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
324
 
325
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
326
 
327
  if __name__ == "__main__":
328
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
329
  space_host_startup = os.getenv("SPACE_HOST")
330
+ space_id_startup = os.getenv("SPACE_ID")
 
331
  if space_host_startup:
332
  print(f"✅ SPACE_HOST found: {space_host_startup}")
333
  print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
334
  else:
335
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
336
+ if space_id_startup:
 
337
  print(f"✅ SPACE_ID found: {space_id_startup}")
338
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
 
339
  else:
340
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
341
  print("-"*(60 + len(" App Starting ")) + "\n")
342
 
343
  print("Launching Gradio Interface for Basic Agent Evaluation...")
344
+ demo.launch(debug=True, share=False)