MasterOfHugs commited on
Commit
d717247
·
verified ·
1 Parent(s): dfad6bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -95
app.py CHANGED
@@ -1,4 +1,5 @@
1
  #!/usr/bin/env python3
 
2
  import os
3
  import ast
4
  import operator
@@ -9,12 +10,11 @@ import re
9
  import requests
10
  import pandas as pd
11
  import gradio as gr
12
- import yaml
13
 
14
- from smolagents import CodeAgent, HfApiModel, tool
15
 
16
  # -------------------------
17
- # Minimal tools (safe)
18
  # -------------------------
19
  _allowed_ops = {
20
  ast.Add: operator.add, ast.Sub: operator.sub, ast.Mult: operator.mul,
@@ -34,43 +34,25 @@ def _eval_node(node):
34
  raise ValueError("Unsupported expression")
35
 
36
  def safe_calc(expr: str):
37
- # expr must already be validated (only allowed chars)
38
  tree = ast.parse(expr, mode='eval')
39
  return _eval_node(tree.body)
40
 
41
-
42
  @tool
43
  def calculator(expr: str) -> str:
44
  """
45
  Safely evaluate a mathematical expression.
46
 
47
  Args:
48
- expr (str): Mathematical expression to evaluate, e.g. "2 + 2 * 3".
49
- Allowed characters: digits, spaces, parentheses, + - * / % ^ .
50
 
51
  Returns:
52
- str: JSON string {"expression": expr, "result": value} or {"error": "..."} on failure.
53
  """
54
  try:
55
- if expr is None:
56
- return json.dumps({"error": "No expression provided"})
57
- # sanitize: remove newlines, tabs and leading/trailing whitespace
58
- expr_clean = str(expr).replace('\n', ' ').replace('\r', ' ').replace('\t', ' ').strip()
59
- # allow caret ^ as exponent -> convert to **
60
- expr_clean = expr_clean.replace('^', '**')
61
- # validate chars: only digits, operators, parentheses, dot and spaces
62
- if not re.fullmatch(r"[0-9\.\s\+\-\*\/\%\(\)\*]+", expr_clean):
63
- return json.dumps({"error": "Expression contains invalid characters or is not a simple math expression", "original": expr})
64
- # extra safety: prevent huge exponentiation etc (limit length)
65
- if len(expr_clean) > 200:
66
- return json.dumps({"error": "Expression too long"})
67
- # parse & evaluate safely
68
- val = safe_calc(expr_clean)
69
- return json.dumps({"expression": expr_clean, "result": float(val)})
70
- except (SyntaxError, ValueError, IndentationError) as e:
71
- return json.dumps({"error": f"Calc parse error: {str(e)}", "original": expr})
72
  except Exception as e:
73
- return json.dumps({"error": f"Calc error: {str(e)}", "original": expr})
74
 
75
 
76
  @tool
@@ -79,19 +61,17 @@ def get_current_time_in_timezone(timezone: str) -> str:
79
  Get the current local time in a specified timezone.
80
 
81
  Args:
82
- timezone (str): A valid timezone string (e.g., "Europe/Paris").
83
 
84
  Returns:
85
- str: JSON string with {"timezone": timezone, "local_time": "..."} or {"error": "..."} on failure.
86
  """
87
  try:
88
- if not timezone:
89
- timezone = "UTC"
90
  tz = pytz.timezone(timezone)
91
  local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
92
  return json.dumps({"timezone": timezone, "local_time": local_time})
93
  except Exception as e:
94
- return json.dumps({"error": f"Timezone error: {e}", "timezone": timezone})
95
 
96
 
97
  # -------------------------
@@ -99,27 +79,17 @@ def get_current_time_in_timezone(timezone: str) -> str:
99
  # -------------------------
100
  prompt_templates = None
101
  try:
 
102
  with open("prompts.yaml", "r") as fh:
103
  prompt_templates = yaml.safe_load(fh)
104
  except Exception:
105
  prompt_templates = None
106
 
 
107
  # -------------------------
108
- # HfApiModel + CodeAgent
109
  # -------------------------
110
- # IMPORTANT: set HF_API_TOKEN secret in your Space settings (or export locally)
111
- # HF will often provide token internally in Spaces; otherwise add secret HF_API_TOKEN.
112
- hf_token = os.getenv("HF_API_TOKEN")
113
- if hf_token:
114
- print("HF_API_TOKEN found in environment.")
115
- else:
116
- print("Warning: HF_API_TOKEN not set. HfApiModel may fail if token required by environment.")
117
-
118
- model = HfApiModel(
119
- model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
120
- max_tokens=2048,
121
- temperature=0.5
122
- )
123
 
124
  code_agent = CodeAgent(
125
  model=model,
@@ -129,95 +99,104 @@ code_agent = CodeAgent(
129
  prompt_templates=prompt_templates
130
  )
131
 
 
132
  # -------------------------
133
- # GAIA Agent wrapper (fixed)
134
  # -------------------------
135
  class GaiaAgentMinimal:
136
  def __init__(self, code_agent):
137
  self.code_agent = code_agent
138
 
139
  def _is_calc(self, q: str) -> bool:
140
- # strict heuristic: require an explicit operator or explicit math intent
141
- if q is None:
142
  return False
143
  ql = q.lower()
144
- # common trigger words indicating calculation
145
- triggers = ["calculate", "compute", "what is", "how many", "evaluate"]
146
- if any(tr in ql for tr in triggers) and re.search(r"\d", ql):
147
  return True
148
- # or presence of arithmetic operators near digits
149
  if re.search(r"\d\s*[\+\-\*\/\%\^]\s*\d", q):
150
  return True
151
  return False
152
 
153
  def _is_time(self, q: str) -> bool:
154
- if q is None:
155
- return False
156
  ql = q.lower()
157
- return any(tok in ql for tok in ["time", "heure", "quelle heure", "what time", "current time", "local time"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
  def run(self, question: str) -> str:
160
  try:
161
  q = question.strip() if question else ""
162
- print(f"[gaia run] question preview: {q[:120]}")
163
 
164
- # 1) Calculator: strict
165
  if self._is_calc(q):
166
- # try to extract the math subexpression (first match)
167
  m = re.search(r'([0-9\.\s\+\-\*\/\^\%\(\)]+)', q)
168
- expr = m.group(1) if m else q
 
 
 
 
 
169
  return calculator(expr)
170
 
171
- # 2) Time queries
172
  if self._is_time(q):
173
  tz = "Europe/Paris" if "paris" in q.lower() or "france" in q.lower() else "UTC"
174
  return get_current_time_in_timezone(tz)
175
 
176
- # 3) LLM fallback via HfApiModel (wrapped)
177
- try:
178
- resp = self.code_agent.run(q)
179
- except Exception as e:
180
- # return structured error so GAIA runner sees it
181
- return json.dumps({"error": f"LLM runtime error: {str(e)}"})
182
-
183
- # Normalize responses: allow string, dict, number
184
- if resp is None:
185
- return json.dumps({"error": "LLM returned no output"})
186
- if isinstance(resp, dict):
187
- # prefer common keys
188
- for key in ("final_answer", "answer", "result", "output"):
189
- if key in resp:
190
- return str(resp[key])
191
- return json.dumps(resp)
192
- # primitives (int/float) -> convert to string
193
- if isinstance(resp, (int, float)):
194
- return str(resp)
195
- # otherwise assume string
196
- s = str(resp).strip()
197
- if s == "":
198
- return json.dumps({"error": "LLM returned empty string"})
199
- return s
200
  except Exception as e:
201
  return json.dumps({"error": f"Agent internal error: {str(e)}"})
202
 
203
- # instantiate agent
 
204
  gaia_agent = GaiaAgentMinimal(code_agent)
205
 
 
206
  # -------------------------
207
- # GAIA runner (unchanged behavior)
208
  # -------------------------
209
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
210
 
211
  def run_and_submit_all(profile: gr.OAuthProfile | None):
212
  space_id = os.getenv("SPACE_ID")
213
- if not profile:
 
 
 
214
  return "Please Login to Hugging Face with the button.", None
215
- username = profile.username.strip()
216
 
217
- questions_url = f"{DEFAULT_API_URL}/questions"
218
- submit_url = f"{DEFAULT_API_URL}/submit"
 
 
219
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "unknown"
220
 
 
221
  try:
222
  response = requests.get(questions_url, timeout=15)
223
  response.raise_for_status()
@@ -227,6 +206,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
227
  except Exception as e:
228
  return f"Error fetching questions: {e}", None
229
 
 
230
  results_log = []
231
  answers_payload = []
232
  for item in questions_data:
@@ -244,32 +224,42 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
244
  if not answers_payload:
245
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
246
 
247
- submission_data = {"username": username, "agent_code": agent_code, "answers": answers_payload}
 
 
248
  try:
249
  response = requests.post(submit_url, json=submission_data, timeout=60)
250
  response.raise_for_status()
251
  result_data = response.json()
252
  final_status = (
253
- f"Submission Successful!\nUser: {result_data.get('username')}\n"
 
254
  f"Overall Score: {result_data.get('score', 'N/A')}% "
255
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
256
  f"Message: {result_data.get('message', 'No message received.')}"
257
  )
258
- return final_status, pd.DataFrame(results_log)
 
259
  except Exception as e:
260
- return f"Submission failed: {e}", pd.DataFrame(results_log)
 
 
261
 
262
  # -------------------------
263
  # Gradio UI
264
  # -------------------------
265
  with gr.Blocks() as demo:
266
  gr.Markdown("# Minimal GAIA Agent Runner")
267
- gr.Markdown("Log in to Hugging Face, click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and submit answers.")
 
 
268
  gr.LoginButton()
269
  run_button = gr.Button("Run Evaluation & Submit All Answers")
270
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
271
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
272
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
273
 
 
274
  if __name__ == "__main__":
275
  demo.launch(debug=True, share=False)
 
1
  #!/usr/bin/env python3
2
+
3
  import os
4
  import ast
5
  import operator
 
10
  import requests
11
  import pandas as pd
12
  import gradio as gr
 
13
 
14
+ from smolagents import CodeAgent, TransformersModel, tool
15
 
16
  # -------------------------
17
+ # Minimal tools
18
  # -------------------------
19
  _allowed_ops = {
20
  ast.Add: operator.add, ast.Sub: operator.sub, ast.Mult: operator.mul,
 
34
  raise ValueError("Unsupported expression")
35
 
36
  def safe_calc(expr: str):
 
37
  tree = ast.parse(expr, mode='eval')
38
  return _eval_node(tree.body)
39
 
 
40
  @tool
41
  def calculator(expr: str) -> str:
42
  """
43
  Safely evaluate a mathematical expression.
44
 
45
  Args:
46
+ expr: A string containing a math expression like "2 + 2 * 3".
 
47
 
48
  Returns:
49
+ JSON string with {"expression": expr, "result": value} or {"error": "..."} on failure.
50
  """
51
  try:
52
+ val = safe_calc(expr)
53
+ return json.dumps({"expression": expr, "result": float(val)})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  except Exception as e:
55
+ return json.dumps({"error": f"Calc error: {e}"})
56
 
57
 
58
  @tool
 
61
  Get the current local time in a specified timezone.
62
 
63
  Args:
64
+ timezone: A valid timezone string (e.g., "Europe/Paris").
65
 
66
  Returns:
67
+ JSON string with {"timezone": timezone, "local_time": "..."} or {"error": "..."} on failure.
68
  """
69
  try:
 
 
70
  tz = pytz.timezone(timezone)
71
  local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
72
  return json.dumps({"timezone": timezone, "local_time": local_time})
73
  except Exception as e:
74
+ return json.dumps({"error": f"Timezone error: {e}"})
75
 
76
 
77
  # -------------------------
 
79
  # -------------------------
80
  prompt_templates = None
81
  try:
82
+ import yaml
83
  with open("prompts.yaml", "r") as fh:
84
  prompt_templates = yaml.safe_load(fh)
85
  except Exception:
86
  prompt_templates = None
87
 
88
+
89
  # -------------------------
90
+ # TransformersModel + CodeAgent minimal
91
  # -------------------------
92
+ model = TransformersModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct")
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  code_agent = CodeAgent(
95
  model=model,
 
99
  prompt_templates=prompt_templates
100
  )
101
 
102
+
103
  # -------------------------
104
+ # GAIA Agent wrapper
105
  # -------------------------
106
  class GaiaAgentMinimal:
107
  def __init__(self, code_agent):
108
  self.code_agent = code_agent
109
 
110
  def _is_calc(self, q: str) -> bool:
111
+ """Return True only for explicit calculation requests, not 'how many' type questions."""
112
+ if not q:
113
  return False
114
  ql = q.lower()
115
+ calc_triggers = ["calculate", "compute", "evaluate", "what is", "what's"]
116
+ if any(tr in ql for tr in calc_triggers) and re.search(r"\d", ql):
 
117
  return True
 
118
  if re.search(r"\d\s*[\+\-\*\/\%\^]\s*\d", q):
119
  return True
120
  return False
121
 
122
  def _is_time(self, q: str) -> bool:
 
 
123
  ql = q.lower()
124
+ return "time" in ql or "heure" in ql or "quelle heure" in ql or "what time" in ql
125
+
126
+ def _call_llm(self, q: str) -> str:
127
+ """Wrapper to call LLM and return result or proper error if token/API missing."""
128
+ try:
129
+ resp = self.code_agent.run(q)
130
+ except Exception as e:
131
+ msg = str(e)
132
+ if "api_key" in msg.lower() or "auth" in msg.lower():
133
+ return json.dumps({"error": "LLM error: missing HF API token. Set HF_API_TOKEN secret or login with HF."})
134
+ return json.dumps({"error": f"LLM runtime error: {msg}"})
135
+ if resp is None:
136
+ return json.dumps({"error": "LLM returned no output"})
137
+ if isinstance(resp, dict):
138
+ for key in ("final_answer", "answer", "result", "output"):
139
+ if key in resp:
140
+ return str(resp[key])
141
+ return json.dumps(resp)
142
+ if isinstance(resp, (int, float)):
143
+ return str(resp)
144
+ s = str(resp).strip()
145
+ if s == "":
146
+ return json.dumps({"error": "LLM returned empty string"})
147
+ return s
148
 
149
  def run(self, question: str) -> str:
150
  try:
151
  q = question.strip() if question else ""
 
152
 
153
+ # 1) Calculator
154
  if self._is_calc(q):
 
155
  m = re.search(r'([0-9\.\s\+\-\*\/\^\%\(\)]+)', q)
156
+ expr = m.group(1).strip() if m else ""
157
+ if not expr or not re.search(r'[\+\-\*\/\%\^]', expr):
158
+ return self._call_llm(q)
159
+ expr = expr.replace('^', '**').replace('\n', ' ').strip()
160
+ if not re.fullmatch(r"[0-9\.\s\+\-\*\/\%\(\)\*]+", expr):
161
+ return json.dumps({"error": "Expression contains invalid characters or is not a simple math expression", "original": expr})
162
  return calculator(expr)
163
 
164
+ # 2) Time
165
  if self._is_time(q):
166
  tz = "Europe/Paris" if "paris" in q.lower() or "france" in q.lower() else "UTC"
167
  return get_current_time_in_timezone(tz)
168
 
169
+ # 3) fallback LLM
170
+ return self._call_llm(q)
171
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  except Exception as e:
173
  return json.dumps({"error": f"Agent internal error: {str(e)}"})
174
 
175
+
176
+ # instantiate GAIA agent
177
  gaia_agent = GaiaAgentMinimal(code_agent)
178
 
179
+
180
  # -------------------------
181
+ # GAIA runner
182
  # -------------------------
183
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
184
 
185
  def run_and_submit_all(profile: gr.OAuthProfile | None):
186
  space_id = os.getenv("SPACE_ID")
187
+
188
+ if profile:
189
+ username = f"{profile.username}"
190
+ else:
191
  return "Please Login to Hugging Face with the button.", None
 
192
 
193
+ api_url = DEFAULT_API_URL
194
+ questions_url = f"{api_url}/questions"
195
+ submit_url = f"{api_url}/submit"
196
+
197
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "unknown"
198
 
199
+ # Fetch questions
200
  try:
201
  response = requests.get(questions_url, timeout=15)
202
  response.raise_for_status()
 
206
  except Exception as e:
207
  return f"Error fetching questions: {e}", None
208
 
209
+ # Run agent
210
  results_log = []
211
  answers_payload = []
212
  for item in questions_data:
 
224
  if not answers_payload:
225
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
226
 
227
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
228
+
229
+ # Submit
230
  try:
231
  response = requests.post(submit_url, json=submission_data, timeout=60)
232
  response.raise_for_status()
233
  result_data = response.json()
234
  final_status = (
235
+ f"Submission Successful!\n"
236
+ f"User: {result_data.get('username')}\n"
237
  f"Overall Score: {result_data.get('score', 'N/A')}% "
238
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
239
  f"Message: {result_data.get('message', 'No message received.')}"
240
  )
241
+ results_df = pd.DataFrame(results_log)
242
+ return final_status, results_df
243
  except Exception as e:
244
+ results_df = pd.DataFrame(results_log)
245
+ return f"Submission failed: {e}", results_df
246
+
247
 
248
  # -------------------------
249
  # Gradio UI
250
  # -------------------------
251
  with gr.Blocks() as demo:
252
  gr.Markdown("# Minimal GAIA Agent Runner")
253
+ gr.Markdown(
254
+ "Log in to Hugging Face, click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and submit answers."
255
+ )
256
  gr.LoginButton()
257
  run_button = gr.Button("Run Evaluation & Submit All Answers")
258
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
259
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
260
+
261
  run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
262
 
263
+
264
  if __name__ == "__main__":
265
  demo.launch(debug=True, share=False)