MasterOfHugs commited on
Commit
f6f9436
·
verified ·
1 Parent(s): 4b7798e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -170
app.py CHANGED
@@ -1,13 +1,11 @@
1
  #!/usr/bin/env python3
2
- # app.py - Minimal GAIA-ready agent + runner (Gradio UI)
 
3
  import os
4
  import re
5
  import json
6
  import ast
7
  import operator
8
- import subprocess
9
- import sys
10
- import importlib
11
  import datetime
12
  import pytz
13
  import yaml
@@ -15,25 +13,16 @@ import requests
15
  import pandas as pd
16
  import gradio as gr
17
 
18
- # Ensure import helper (optional; mostly for local dev if you want auto-install)
19
- def ensure_import(pkg_name, import_name=None):
20
- import_name = import_name or pkg_name
21
- try:
22
- return importlib.import_module(import_name)
23
- except ImportError:
24
- # don't auto-install in production spaces; keep simple behavior
25
- raise
26
-
27
- # ---- smolagents & local tool imports ----
28
- # Assumes smolagents and tools.final_answer are available in your environment
29
- from smolagents import CodeAgent, HfApiModel, tool, DuckDuckGoSearchTool
30
  from tools.final_answer import FinalAnswerTool
31
 
32
  # -------------------------
33
- # Minimal toolset for GAIA
34
  # -------------------------
35
 
36
- # Safe calculator using ast (no eval)
37
  _allowed_ops = {
38
  ast.Add: operator.add, ast.Sub: operator.sub, ast.Mult: operator.mul,
39
  ast.Div: operator.truediv, ast.Pow: operator.pow, ast.USub: operator.neg,
@@ -41,15 +30,15 @@ _allowed_ops = {
41
  }
42
 
43
  def _eval_node(node):
44
- if isinstance(node, ast.Constant): # Python 3.8+: ast.Num replaced by ast.Constant
45
  return node.value
46
- if isinstance(node, ast.Num): # fallback for some versions
47
  return node.n
48
  if isinstance(node, ast.UnaryOp) and type(node.op) in _allowed_ops:
49
  return _allowed_ops[type(node.op)](_eval_node(node.operand))
50
  if isinstance(node, ast.BinOp) and type(node.op) in _allowed_ops:
51
  return _allowed_ops[type(node.op)](_eval_node(node.left), _eval_node(node.right))
52
- raise ValueError("Unsupported or unsafe expression")
53
 
54
  def safe_calc(expr: str):
55
  tree = ast.parse(expr, mode='eval')
@@ -57,18 +46,14 @@ def safe_calc(expr: str):
57
 
58
  @tool
59
  def calculator(expr: str) -> str:
60
- """Compute numeric expressions safely (no eval). Returns JSON string."""
61
  try:
62
- # clean expression (allow digits, operators, parentheses, spaces)
63
- expr_clean = expr.strip()
64
- val = safe_calc(expr_clean)
65
- return json.dumps({"expression": expr_clean, "result": float(val)})
66
  except Exception as e:
67
  return json.dumps({"error": f"Calc error: {e}"})
68
 
69
  @tool
70
  def get_current_time_in_timezone(timezone: str) -> str:
71
- """Return current time in timezone as JSON string."""
72
  try:
73
  tz = pytz.timezone(timezone)
74
  local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
@@ -76,10 +61,10 @@ def get_current_time_in_timezone(timezone: str) -> str:
76
  except Exception as e:
77
  return json.dumps({"error": f"Timezone error: {e}"})
78
 
79
- # FinalAnswerTool (exists in repo)
80
  final_answer = FinalAnswerTool()
81
 
82
- # Load prompts if present (optional)
83
  prompt_templates = None
84
  try:
85
  with open("prompts.yaml", "r") as fh:
@@ -87,36 +72,27 @@ try:
87
  except Exception:
88
  prompt_templates = None
89
 
90
- # Instantiate a compact CodeAgent (adjust model_id if needed)
 
 
 
 
91
  code_agent = CodeAgent(
92
- model=HfApiModel(
93
- max_tokens=1024,
94
- temperature=0.15,
95
- model_id='Qwen/Qwen2.5-Coder-32B-Instruct' # change if overloaded or unavailable
96
- ),
97
  tools=[final_answer, calculator, get_current_time_in_timezone],
98
  max_steps=6,
99
  verbosity_level=0,
100
  prompt_templates=prompt_templates
101
  )
102
 
103
- # Try to create a DuckDuckGo search tool for web fallback (may fail depending on smolagents version)
104
- search_tool = None
105
- try:
106
- search_tool = DuckDuckGoSearchTool()
107
- except Exception:
108
- search_tool = None # fallback gracefully if unavailable
109
-
110
  # -------------------------
111
- # Minimal GaiaAgent wrapper
112
  # -------------------------
113
  class GaiaAgentMinimal:
114
- def __init__(self, code_agent, search_tool=None):
115
  self.code_agent = code_agent
116
- self.search_tool = search_tool
117
 
118
  def _is_calc(self, q: str) -> bool:
119
- # heuristics: contains digits and math operators
120
  return bool(re.search(r'[\d]', q)) and any(op in q for op in ['+', '-', '*', '/', '%', '^'])
121
 
122
  def _is_time(self, q: str) -> bool:
@@ -124,144 +100,87 @@ class GaiaAgentMinimal:
124
  return "time" in ql or "heure" in ql or "quelle heure" in ql or "what time" in ql
125
 
126
  def run(self, question: str) -> str:
127
- """Return a string (or JSON string) answer for the GAIA runner."""
128
  try:
129
  q = question.strip()
130
 
131
- # 1) calculator
132
  if self._is_calc(q):
133
- # extract the first expression-like substring
134
  m = re.search(r'([0-9\.\s\+\-\*\/\^\%\(\)]+)', q)
135
  expr = m.group(1) if m else q
136
  return calculator(expr)
137
 
138
- # 2) time requests
139
  if self._is_time(q):
140
- # quick timezone guess
141
  if "paris" in q.lower() or "france" in q.lower():
142
  tz = "Europe/Paris"
143
  else:
144
- # try to extract "in X" patterns (e.g., in London)
145
- m = re.search(r'in\s+([A-Za-z_\/]+)', q, re.I)
146
- tz = m.group(1) if m else "UTC"
147
  return get_current_time_in_timezone(tz)
148
 
149
- # 3) quick web search fallback (if available)
150
- if self.search_tool:
151
- try:
152
- if hasattr(self.search_tool, "search"):
153
- res = self.search_tool.search(q, top_k=1)
154
- elif hasattr(self.search_tool, "run"):
155
- res = self.search_tool.run(q)
156
- else:
157
- res = self.search_tool(q)
158
- # Return as JSON string to be stable for scoring parsing
159
- return json.dumps({"source": "web_search", "snippet": str(res)})
160
- except Exception:
161
- pass
162
-
163
- # 4) fallback to LLM CodeAgent
164
- try:
165
- resp = None
166
- # Try typical call signatures (some smolagents versions use call directly)
167
- if hasattr(self.code_agent, "run"):
168
- resp = self.code_agent.run(q)
169
- else:
170
- resp = self.code_agent(q)
171
- # Normalize response
172
- if isinstance(resp, dict):
173
- for key in ("final_answer", "answer", "result", "output"):
174
- if key in resp:
175
- return str(resp[key])
176
- return json.dumps(resp)
177
- return str(resp)
178
- except Exception as e:
179
- # last-resort fallback
180
- return json.dumps({"error": f"LLM error: {e}"})
181
  except Exception as e:
182
  return json.dumps({"error": f"Agent internal error: {e}"})
183
 
184
- # Instantiate gaia_agent for the runner to use
185
- gaia_agent = GaiaAgentMinimal(code_agent, search_tool)
186
 
187
  # -------------------------
188
- # GAIA runner (unchanged logic, uses gaia_agent)
189
  # -------------------------
190
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
191
 
192
  def run_and_submit_all(profile: gr.OAuthProfile | None):
193
- """
194
- Fetches all questions, runs gaia_agent on them, submits all answers,
195
- and returns status and results table.
196
- """
197
  space_id = os.getenv("SPACE_ID")
198
 
199
  if profile:
200
  username = f"{profile.username}"
201
- print(f"User logged in: {username}")
202
  else:
203
- print("User not logged in.")
204
  return "Please Login to Hugging Face with the button.", None
205
 
206
  api_url = DEFAULT_API_URL
207
  questions_url = f"{api_url}/questions"
208
  submit_url = f"{api_url}/submit"
209
 
210
- # 1. Agent is ready (we use gaia_agent defined above)
211
- try:
212
- agent = gaia_agent
213
- except Exception as e:
214
- print(f"Error instantiating agent: {e}")
215
- return f"Error initializing agent: {e}", None
216
-
217
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "unknown"
218
 
219
- # 2. Fetch Questions
220
- print(f"Fetching questions from: {questions_url}")
221
  try:
222
  response = requests.get(questions_url, timeout=15)
223
  response.raise_for_status()
224
  questions_data = response.json()
225
  if not questions_data:
226
- print("Fetched questions list is empty.")
227
  return "Fetched questions list is empty or invalid format.", None
228
- print(f"Fetched {len(questions_data)} questions.")
229
- except requests.exceptions.RequestException as e:
230
- print(f"Error fetching questions: {e}")
231
- return f"Error fetching questions: {e}", None
232
  except Exception as e:
233
- print(f"An unexpected error occurred fetching questions: {e}")
234
- return f"An unexpected error occurred fetching questions: {e}", None
235
 
236
- # 3. Run agent on each question
237
  results_log = []
238
  answers_payload = []
239
- print(f"Running agent on {len(questions_data)} questions...")
240
  for item in questions_data:
241
  task_id = item.get("task_id")
242
  question_text = item.get("question")
243
  if not task_id or question_text is None:
244
- print(f"Skipping item with missing task_id or question: {item}")
245
  continue
246
  try:
247
- submitted_answer = agent.run(question_text)
248
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
249
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
250
  except Exception as e:
251
- print(f"Error running agent on task {task_id}: {e}")
252
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
253
 
254
  if not answers_payload:
255
- print("Agent did not produce any answers to submit.")
256
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
257
 
258
- # 4. Prepare submission
259
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
260
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
261
- print(status_update)
262
 
263
- # 5. Submit
264
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
265
  try:
266
  response = requests.post(submit_url, json=submission_data, timeout=60)
267
  response.raise_for_status()
@@ -273,35 +192,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
273
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
274
  f"Message: {result_data.get('message', 'No message received.')}"
275
  )
276
- print("Submission successful.")
277
  results_df = pd.DataFrame(results_log)
278
  return final_status, results_df
279
- except requests.exceptions.HTTPError as e:
280
- error_detail = f"Server responded with status {e.response.status_code}."
281
- try:
282
- error_json = e.response.json()
283
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
284
- except Exception:
285
- error_detail += f" Response: {e.response.text[:500]}"
286
- status_message = f"Submission Failed: {error_detail}"
287
- print(status_message)
288
- results_df = pd.DataFrame(results_log)
289
- return status_message, results_df
290
- except requests.exceptions.Timeout:
291
- status_message = "Submission Failed: The request timed out."
292
- print(status_message)
293
- results_df = pd.DataFrame(results_log)
294
- return status_message, results_df
295
- except requests.exceptions.RequestException as e:
296
- status_message = f"Submission Failed: Network error - {e}"
297
- print(status_message)
298
- results_df = pd.DataFrame(results_log)
299
- return status_message, results_df
300
  except Exception as e:
301
- status_message = f"An unexpected error occurred during submission: {e}"
302
- print(status_message)
303
  results_df = pd.DataFrame(results_log)
304
- return status_message, results_df
305
 
306
  # -------------------------
307
  # Gradio UI
@@ -309,15 +204,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
309
  with gr.Blocks() as demo:
310
  gr.Markdown("# Minimal GAIA Agent Runner")
311
  gr.Markdown(
312
- """
313
- Instructions:
314
- 1. Log in to your Hugging Face account with the button below.
315
- 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, submit answers and get the score.
316
- """
317
  )
318
-
319
  gr.LoginButton()
320
-
321
  run_button = gr.Button("Run Evaluation & Submit All Answers")
322
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
323
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
@@ -325,20 +214,4 @@ with gr.Blocks() as demo:
325
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
326
 
327
  if __name__ == "__main__":
328
- print("\n" + "-"*30 + " App Starting " + "-"*30)
329
- space_host_startup = os.getenv("SPACE_HOST")
330
- space_id_startup = os.getenv("SPACE_ID")
331
- if space_host_startup:
332
- print(f"✅ SPACE_HOST found: {space_host_startup}")
333
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
334
- else:
335
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
336
- if space_id_startup:
337
- print(f"✅ SPACE_ID found: {space_id_startup}")
338
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
339
- else:
340
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
341
- print("-"*(60 + len(" App Starting ")) + "\n")
342
-
343
- print("Launching Gradio Interface for Basic Agent Evaluation...")
344
  demo.launch(debug=True, share=False)
 
1
  #!/usr/bin/env python3
2
+ # Minimal GAIA agent app.py (compatible toutes versions smolagents)
3
+
4
  import os
5
  import re
6
  import json
7
  import ast
8
  import operator
 
 
 
9
  import datetime
10
  import pytz
11
  import yaml
 
13
  import pandas as pd
14
  import gradio as gr
15
 
16
+ # -------------------------
17
+ # smolagents imports
18
+ # -------------------------
19
+ from smolagents import CodeAgent, tool
 
 
 
 
 
 
 
 
20
  from tools.final_answer import FinalAnswerTool
21
 
22
  # -------------------------
23
+ # Minimal tools
24
  # -------------------------
25
 
 
26
  _allowed_ops = {
27
  ast.Add: operator.add, ast.Sub: operator.sub, ast.Mult: operator.mul,
28
  ast.Div: operator.truediv, ast.Pow: operator.pow, ast.USub: operator.neg,
 
30
  }
31
 
32
  def _eval_node(node):
33
+ if isinstance(node, ast.Constant):
34
  return node.value
35
+ if isinstance(node, ast.Num):
36
  return node.n
37
  if isinstance(node, ast.UnaryOp) and type(node.op) in _allowed_ops:
38
  return _allowed_ops[type(node.op)](_eval_node(node.operand))
39
  if isinstance(node, ast.BinOp) and type(node.op) in _allowed_ops:
40
  return _allowed_ops[type(node.op)](_eval_node(node.left), _eval_node(node.right))
41
+ raise ValueError("Unsupported expression")
42
 
43
  def safe_calc(expr: str):
44
  tree = ast.parse(expr, mode='eval')
 
46
 
47
  @tool
48
  def calculator(expr: str) -> str:
 
49
  try:
50
+ val = safe_calc(expr)
51
+ return json.dumps({"expression": expr, "result": float(val)})
 
 
52
  except Exception as e:
53
  return json.dumps({"error": f"Calc error: {e}"})
54
 
55
  @tool
56
  def get_current_time_in_timezone(timezone: str) -> str:
 
57
  try:
58
  tz = pytz.timezone(timezone)
59
  local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
 
61
  except Exception as e:
62
  return json.dumps({"error": f"Timezone error: {e}"})
63
 
64
+ # FinalAnswerTool
65
  final_answer = FinalAnswerTool()
66
 
67
+ # Load prompts.yaml if exists
68
  prompt_templates = None
69
  try:
70
  with open("prompts.yaml", "r") as fh:
 
72
  except Exception:
73
  prompt_templates = None
74
 
75
+ # -------------------------
76
+ # CodeAgent minimal
77
+ # -------------------------
78
+ MODEL_ID = 'Qwen/Qwen2.5-Coder-32B-Instruct'
79
+
80
  code_agent = CodeAgent(
81
+ model=MODEL_ID, # directly pass string if HfApiModel missing
 
 
 
 
82
  tools=[final_answer, calculator, get_current_time_in_timezone],
83
  max_steps=6,
84
  verbosity_level=0,
85
  prompt_templates=prompt_templates
86
  )
87
 
 
 
 
 
 
 
 
88
  # -------------------------
89
+ # GAIA Agent wrapper
90
  # -------------------------
91
  class GaiaAgentMinimal:
92
+ def __init__(self, code_agent):
93
  self.code_agent = code_agent
 
94
 
95
  def _is_calc(self, q: str) -> bool:
 
96
  return bool(re.search(r'[\d]', q)) and any(op in q for op in ['+', '-', '*', '/', '%', '^'])
97
 
98
  def _is_time(self, q: str) -> bool:
 
100
  return "time" in ql or "heure" in ql or "quelle heure" in ql or "what time" in ql
101
 
102
  def run(self, question: str) -> str:
 
103
  try:
104
  q = question.strip()
105
 
106
+ # 1) Calculator
107
  if self._is_calc(q):
 
108
  m = re.search(r'([0-9\.\s\+\-\*\/\^\%\(\)]+)', q)
109
  expr = m.group(1) if m else q
110
  return calculator(expr)
111
 
112
+ # 2) Time queries
113
  if self._is_time(q):
 
114
  if "paris" in q.lower() or "france" in q.lower():
115
  tz = "Europe/Paris"
116
  else:
117
+ tz = "UTC"
 
 
118
  return get_current_time_in_timezone(tz)
119
 
120
+ # 3) fallback LLM
121
+ resp = self.code_agent.run(q)
122
+ if isinstance(resp, dict):
123
+ for key in ("final_answer", "answer", "result", "output"):
124
+ if key in resp:
125
+ return str(resp[key])
126
+ return json.dumps(resp)
127
+ return str(resp)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  except Exception as e:
129
  return json.dumps({"error": f"Agent internal error: {e}"})
130
 
131
+ # instantiate
132
+ gaia_agent = GaiaAgentMinimal(code_agent)
133
 
134
  # -------------------------
135
+ # GAIA runner (unchanged)
136
  # -------------------------
137
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
138
 
139
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
140
  space_id = os.getenv("SPACE_ID")
141
 
142
  if profile:
143
  username = f"{profile.username}"
 
144
  else:
 
145
  return "Please Login to Hugging Face with the button.", None
146
 
147
  api_url = DEFAULT_API_URL
148
  questions_url = f"{api_url}/questions"
149
  submit_url = f"{api_url}/submit"
150
 
 
 
 
 
 
 
 
151
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "unknown"
152
 
153
+ # Fetch questions
 
154
  try:
155
  response = requests.get(questions_url, timeout=15)
156
  response.raise_for_status()
157
  questions_data = response.json()
158
  if not questions_data:
 
159
  return "Fetched questions list is empty or invalid format.", None
 
 
 
 
160
  except Exception as e:
161
+ return f"Error fetching questions: {e}", None
 
162
 
163
+ # Run agent
164
  results_log = []
165
  answers_payload = []
 
166
  for item in questions_data:
167
  task_id = item.get("task_id")
168
  question_text = item.get("question")
169
  if not task_id or question_text is None:
 
170
  continue
171
  try:
172
+ submitted_answer = gaia_agent.run(question_text)
173
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
174
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
175
  except Exception as e:
 
176
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
177
 
178
  if not answers_payload:
 
179
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
180
 
 
181
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
182
 
183
+ # Submit
 
184
  try:
185
  response = requests.post(submit_url, json=submission_data, timeout=60)
186
  response.raise_for_status()
 
192
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
193
  f"Message: {result_data.get('message', 'No message received.')}"
194
  )
 
195
  results_df = pd.DataFrame(results_log)
196
  return final_status, results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  except Exception as e:
 
 
198
  results_df = pd.DataFrame(results_log)
199
+ return f"Submission failed: {e}", results_df
200
 
201
  # -------------------------
202
  # Gradio UI
 
204
  with gr.Blocks() as demo:
205
  gr.Markdown("# Minimal GAIA Agent Runner")
206
  gr.Markdown(
207
+ "Log in to Hugging Face, click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and submit answers."
 
 
 
 
208
  )
 
209
  gr.LoginButton()
 
210
  run_button = gr.Button("Run Evaluation & Submit All Answers")
211
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
212
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
214
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
215
 
216
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  demo.launch(debug=True, share=False)