File size: 9,170 Bytes
41f4523 d717247 10e9b7d 41f4523 f7aa0f6 63491b2 eccf8e4 3c4371f 41f4523 d717247 41f4523 d717247 41f4523 f6f9436 41f4523 f6f9436 41f4523 f6f9436 41f4523 b4888ab abfaec9 b4888ab d717247 abfaec9 b4888ab d717247 b4888ab 41f4523 d717247 41f4523 d717247 41f4523 51ed13b 41f4523 b4888ab abfaec9 b4888ab d717247 abfaec9 b4888ab d717247 b4888ab 41f4523 d717247 41f4523 51ed13b f7aa0f6 f6f9436 f7aa0f6 41f4523 d717247 41f4523 d717247 f6f9436 d717247 f6f9436 d717247 f6f9436 41f4523 d941eb9 f7aa0f6 41f4523 d717247 41f4523 d717247 41f4523 f6f9436 41f4523 10e9b7d 41f4523 d717247 dfad6bb d717247 dfad6bb 41f4523 d717247 41f4523 dfad6bb 51ed13b d717247 41f4523 d717247 41f4523 51ed13b d717247 41f4523 dfad6bb 41f4523 51ed13b d717247 41f4523 dfad6bb 51ed13b d717247 f6f9436 41f4523 d717247 41f4523 d717247 41f4523 3db6293 e80aab9 41f4523 d717247 7d65c66 51ed13b d717247 41f4523 3c4371f d717247 eccf8e4 31243f4 7d65c66 31243f4 41f4523 7d65c66 f6f9436 e80aab9 d717247 b4888ab 31243f4 f6f9436 7d65c66 31243f4 41f4523 31243f4 d717247 e80aab9 7d65c66 e80aab9 31243f4 d717247 3c4371f e80aab9 d717247 7d65c66 d717247 2f42c05 41f4523 e80aab9 41f4523 d717247 7e4a06b 31243f4 9088b99 7d65c66 d717247 41f4523 e80aab9 d717247 e80aab9 41f4523 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 |
#!/usr/bin/env python3
import os
import ast
import operator
import datetime
import pytz
import json
import re
import requests
import pandas as pd
import gradio as gr
from smolagents import CodeAgent, TransformersModel, tool
# -------------------------
# Minimal tools
# -------------------------
_allowed_ops = {
ast.Add: operator.add, ast.Sub: operator.sub, ast.Mult: operator.mul,
ast.Div: operator.truediv, ast.Pow: operator.pow, ast.USub: operator.neg,
ast.Mod: operator.mod,
}
def _eval_node(node):
if isinstance(node, ast.Constant):
return node.value
if isinstance(node, ast.Num):
return node.n
if isinstance(node, ast.UnaryOp) and type(node.op) in _allowed_ops:
return _allowed_ops[type(node.op)](_eval_node(node.operand))
if isinstance(node, ast.BinOp) and type(node.op) in _allowed_ops:
return _allowed_ops[type(node.op)](_eval_node(node.left), _eval_node(node.right))
raise ValueError("Unsupported expression")
def safe_calc(expr: str):
tree = ast.parse(expr, mode='eval')
return _eval_node(tree.body)
@tool
def calculator(expr: str) -> str:
"""
Safely evaluate a mathematical expression.
Args:
expr: A string containing a math expression like "2 + 2 * 3".
Returns:
JSON string with {"expression": expr, "result": value} or {"error": "..."} on failure.
"""
try:
val = safe_calc(expr)
return json.dumps({"expression": expr, "result": float(val)})
except Exception as e:
return json.dumps({"error": f"Calc error: {e}"})
@tool
def get_current_time_in_timezone(timezone: str) -> str:
"""
Get the current local time in a specified timezone.
Args:
timezone: A valid timezone string (e.g., "Europe/Paris").
Returns:
JSON string with {"timezone": timezone, "local_time": "..."} or {"error": "..."} on failure.
"""
try:
tz = pytz.timezone(timezone)
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
return json.dumps({"timezone": timezone, "local_time": local_time})
except Exception as e:
return json.dumps({"error": f"Timezone error: {e}"})
# -------------------------
# Load prompts.yaml if exists
# -------------------------
prompt_templates = None
try:
import yaml
with open("prompts.yaml", "r") as fh:
prompt_templates = yaml.safe_load(fh)
except Exception:
prompt_templates = None
# -------------------------
# TransformersModel + CodeAgent minimal
# -------------------------
model = TransformersModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct")
code_agent = CodeAgent(
model=model,
tools=[calculator, get_current_time_in_timezone],
max_steps=6,
verbosity_level=0,
prompt_templates=prompt_templates
)
# -------------------------
# GAIA Agent wrapper
# -------------------------
class GaiaAgentMinimal:
def __init__(self, code_agent):
self.code_agent = code_agent
def _is_calc(self, q: str) -> bool:
"""Return True only for explicit calculation requests, not 'how many' type questions."""
if not q:
return False
ql = q.lower()
calc_triggers = ["calculate", "compute", "evaluate", "what is", "what's"]
if any(tr in ql for tr in calc_triggers) and re.search(r"\d", ql):
return True
if re.search(r"\d\s*[\+\-\*\/\%\^]\s*\d", q):
return True
return False
def _is_time(self, q: str) -> bool:
ql = q.lower()
return "time" in ql or "heure" in ql or "quelle heure" in ql or "what time" in ql
def _call_llm(self, q: str) -> str:
"""Wrapper to call LLM and return result or proper error if token/API missing."""
try:
resp = self.code_agent.run(q)
except Exception as e:
msg = str(e)
if "api_key" in msg.lower() or "auth" in msg.lower():
return json.dumps({"error": "LLM error: missing HF API token. Set HF_API_TOKEN secret or login with HF."})
return json.dumps({"error": f"LLM runtime error: {msg}"})
if resp is None:
return json.dumps({"error": "LLM returned no output"})
if isinstance(resp, dict):
for key in ("final_answer", "answer", "result", "output"):
if key in resp:
return str(resp[key])
return json.dumps(resp)
if isinstance(resp, (int, float)):
return str(resp)
s = str(resp).strip()
if s == "":
return json.dumps({"error": "LLM returned empty string"})
return s
def run(self, question: str) -> str:
try:
q = question.strip() if question else ""
# 1) Calculator
if self._is_calc(q):
m = re.search(r'([0-9\.\s\+\-\*\/\^\%\(\)]+)', q)
expr = m.group(1).strip() if m else ""
if not expr or not re.search(r'[\+\-\*\/\%\^]', expr):
return self._call_llm(q)
expr = expr.replace('^', '**').replace('\n', ' ').strip()
if not re.fullmatch(r"[0-9\.\s\+\-\*\/\%\(\)\*]+", expr):
return json.dumps({"error": "Expression contains invalid characters or is not a simple math expression", "original": expr})
return calculator(expr)
# 2) Time
if self._is_time(q):
tz = "Europe/Paris" if "paris" in q.lower() or "france" in q.lower() else "UTC"
return get_current_time_in_timezone(tz)
# 3) fallback LLM
return self._call_llm(q)
except Exception as e:
return json.dumps({"error": f"Agent internal error: {str(e)}"})
# instantiate GAIA agent
gaia_agent = GaiaAgentMinimal(code_agent)
# -------------------------
# GAIA runner
# -------------------------
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if profile:
username = f"{profile.username}"
else:
return "Please Login to Hugging Face with the button.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "unknown"
# Fetch questions
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
if not questions_data:
return "Fetched questions list is empty or invalid format.", None
except Exception as e:
return f"Error fetching questions: {e}", None
# Run agent
results_log = []
answers_payload = []
for item in questions_data:
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
continue
try:
submitted_answer = gaia_agent.run(question_text)
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
except Exception as e:
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
if not answers_payload:
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
# Submit
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
f"Message: {result_data.get('message', 'No message received.')}"
)
results_df = pd.DataFrame(results_log)
return final_status, results_df
except Exception as e:
results_df = pd.DataFrame(results_log)
return f"Submission failed: {e}", results_df
# -------------------------
# Gradio UI
# -------------------------
with gr.Blocks() as demo:
gr.Markdown("# Minimal GAIA Agent Runner")
gr.Markdown(
"Log in to Hugging Face, click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and submit answers."
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
if __name__ == "__main__":
demo.launch(debug=True, share=False)
|