Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,163 +8,213 @@ import pandas as pd
|
|
| 8 |
# --- Constants ---
|
| 9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 10 |
|
| 11 |
-
# --- Basic Agent Definition ---
|
| 12 |
-
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 13 |
-
# --- Try import smolagents components (fail gracefully with helpful error) ---
|
| 14 |
_import_error_msgs = []
|
| 15 |
try:
|
|
|
|
| 16 |
from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel, tool
|
| 17 |
except Exception as e:
|
| 18 |
-
# Keep placeholders to raise clearer errors only when instantiating
|
| 19 |
CodeAgent = None
|
| 20 |
DuckDuckGoSearchTool = None
|
| 21 |
InferenceClientModel = None
|
| 22 |
tool = None
|
| 23 |
-
_import_error_msgs.append(
|
| 24 |
-
|
| 25 |
-
"and the environment has network access if needed. Import error: " + repr(e)
|
| 26 |
-
)
|
| 27 |
|
| 28 |
-
# ---
|
| 29 |
-
def _clean_answer(raw:
|
| 30 |
"""
|
| 31 |
-
Heuristic cleaning
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
if raw is None:
|
| 39 |
return ""
|
| 40 |
text = str(raw)
|
| 41 |
|
| 42 |
-
# Normalize line endings, split, take last non-empty line
|
| 43 |
lines = [ln.strip() for ln in text.replace("\r", "").split("\n") if ln.strip() != ""]
|
| 44 |
if not lines:
|
| 45 |
candidate = text.strip()
|
| 46 |
else:
|
| 47 |
candidate = lines[-1]
|
| 48 |
|
| 49 |
-
# Remove common labels
|
| 50 |
candidate = re.sub(r'^(final answer[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
|
| 51 |
candidate = re.sub(r'^(answer[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
|
| 52 |
candidate = re.sub(r'^(the answer is[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
|
| 53 |
|
| 54 |
-
# Strip surrounding quotes and whitespace
|
| 55 |
candidate = candidate.strip().strip('\'"')
|
| 56 |
-
|
| 57 |
-
# Collapse internal multiple spaces to single space (helps formatting mismatches)
|
| 58 |
candidate = re.sub(r'\s+', ' ', candidate)
|
| 59 |
-
|
| 60 |
return candidate
|
| 61 |
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
if tool is not None:
|
| 64 |
@tool
|
| 65 |
def download_gaia_file(task_id: str) -> str:
|
| 66 |
"""
|
| 67 |
-
Download the file associated with a GAIA task
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
"""
|
| 69 |
try:
|
| 70 |
url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 71 |
resp = requests.get(url, timeout=20)
|
| 72 |
resp.raise_for_status()
|
| 73 |
-
#
|
| 74 |
if isinstance(resp.content, (bytes, bytearray)):
|
| 75 |
-
|
| 76 |
-
return resp.content.decode(resp.encoding or "utf-8", errors="replace")
|
| 77 |
-
except Exception:
|
| 78 |
-
return resp.text
|
| 79 |
return resp.text
|
| 80 |
except Exception as e:
|
| 81 |
-
# Return a short diagnostic string as tool observation (agent can handle)
|
| 82 |
return f"ERROR_DOWNLOADING_FILE: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
else:
|
| 84 |
-
#
|
| 85 |
def download_gaia_file(task_id: str) -> str:
|
| 86 |
-
raise RuntimeError("smolagents.tool
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
|
| 90 |
-
# --- Leaderboard-grade Agent (
|
| 91 |
class BasicAgent:
|
| 92 |
def __init__(self):
|
| 93 |
if CodeAgent is None or InferenceClientModel is None or DuckDuckGoSearchTool is None:
|
| 94 |
-
# Raise a clear runtime error with the original import messages
|
| 95 |
raise RuntimeError(
|
| 96 |
-
"smolagents
|
| 97 |
-
"Please ensure 'smolagents' is installed and included in requirements.txt. "
|
| 98 |
"Import details: " + "; ".join(_import_error_msgs)
|
| 99 |
)
|
| 100 |
|
| 101 |
print("Initializing GAIA leaderboard-grade agent (CodeAgent)...")
|
| 102 |
|
| 103 |
-
# Model selection: allow overriding via env var HF_MODEL_ID
|
| 104 |
model_id = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-72B-Instruct")
|
| 105 |
-
# Temperature explicitly 0 for determinism
|
| 106 |
try:
|
| 107 |
self.model = InferenceClientModel(
|
| 108 |
model_id=model_id,
|
| 109 |
temperature=0.0
|
| 110 |
)
|
| 111 |
except Exception as e:
|
| 112 |
-
|
| 113 |
-
raise RuntimeError(f"Failed to initialize InferenceClientModel for '{model_id}': {e}")
|
| 114 |
|
| 115 |
-
#
|
| 116 |
try:
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
download_gaia_file
|
| 120 |
-
]
|
| 121 |
except Exception as e:
|
| 122 |
-
raise RuntimeError(f"Failed to
|
| 123 |
|
| 124 |
-
#
|
| 125 |
-
# we will supply a short instruction prefix when calling agent.run below.
|
| 126 |
self.system_instructions = (
|
| 127 |
-
"You are solving GAIA benchmark questions. "
|
| 128 |
-
"
|
| 129 |
-
"
|
| 130 |
-
"no 'FINAL ANSWER', no explanations. Keep output to a single line if possible."
|
| 131 |
)
|
| 132 |
|
| 133 |
-
# Initialize CodeAgent
|
| 134 |
try:
|
| 135 |
-
# CodeAgent signature and options may vary by smolagents version; keep minimal.
|
| 136 |
self.agent = CodeAgent(
|
| 137 |
tools=self.tools,
|
| 138 |
model=self.model
|
| 139 |
)
|
| 140 |
except TypeError:
|
| 141 |
-
# Try alternate ordering if smolagents version expects different arguments
|
| 142 |
self.agent = CodeAgent(self.model, self.tools)
|
| 143 |
|
| 144 |
def __call__(self, question: str) -> str:
|
| 145 |
"""
|
| 146 |
-
Run the
|
| 147 |
-
We prefix the question with system instructions to bias towards exact-match outputs.
|
| 148 |
"""
|
| 149 |
try:
|
| 150 |
prompt = f"{self.system_instructions}\n\nQUESTION:\n{question}\n\nAnswer:"
|
| 151 |
-
|
| 152 |
-
# Many smolagents agent.run implementations accept either a string or a dict; support both.
|
| 153 |
try:
|
| 154 |
-
|
| 155 |
except TypeError:
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
# Convert to string and clean
|
| 159 |
-
cleaned = _clean_answer(raw_result)
|
| 160 |
-
print("Raw result preview:", str(raw_result)[:300])
|
| 161 |
-
print("Cleaned final answer:", cleaned)
|
| 162 |
return cleaned
|
| 163 |
except Exception as e:
|
| 164 |
tb = traceback.format_exc()
|
| 165 |
print("Agent runtime error:", e, tb)
|
| 166 |
-
|
| 167 |
-
return f"AGENT_ERROR: {str(e)}"
|
| 168 |
|
| 169 |
|
| 170 |
|
|
|
|
| 8 |
# --- Constants ---
|
| 9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
_import_error_msgs = []
|
| 12 |
try:
|
| 13 |
+
# Use CodeAgent (stable export), DuckDuckGoSearchTool, InferenceClientModel, and tool decorator
|
| 14 |
from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel, tool
|
| 15 |
except Exception as e:
|
|
|
|
| 16 |
CodeAgent = None
|
| 17 |
DuckDuckGoSearchTool = None
|
| 18 |
InferenceClientModel = None
|
| 19 |
tool = None
|
| 20 |
+
_import_error_msgs.append(repr(e))
|
| 21 |
+
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
# --- Utilities ---
|
| 24 |
+
def _clean_answer(raw: Any) -> str:
|
| 25 |
"""
|
| 26 |
+
Heuristic cleaning to produce a single-line exact-match-friendly answer.
|
| 27 |
+
|
| 28 |
+
- Keep the last non-empty line of output.
|
| 29 |
+
- Remove common labels like "Answer:", "Final answer:".
|
| 30 |
+
- Strip surrounding quotes and whitespace.
|
| 31 |
+
- Collapse internal whitespace to single spaces.
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
raw (Any): Raw agent output to clean.
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
str: Cleaned single-line answer string.
|
| 38 |
"""
|
| 39 |
if raw is None:
|
| 40 |
return ""
|
| 41 |
text = str(raw)
|
| 42 |
|
|
|
|
| 43 |
lines = [ln.strip() for ln in text.replace("\r", "").split("\n") if ln.strip() != ""]
|
| 44 |
if not lines:
|
| 45 |
candidate = text.strip()
|
| 46 |
else:
|
| 47 |
candidate = lines[-1]
|
| 48 |
|
|
|
|
| 49 |
candidate = re.sub(r'^(final answer[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
|
| 50 |
candidate = re.sub(r'^(answer[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
|
| 51 |
candidate = re.sub(r'^(the answer is[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
|
| 52 |
|
|
|
|
| 53 |
candidate = candidate.strip().strip('\'"')
|
|
|
|
|
|
|
| 54 |
candidate = re.sub(r'\s+', ' ', candidate)
|
|
|
|
| 55 |
return candidate
|
| 56 |
|
| 57 |
+
|
| 58 |
+
# --- Safe small arithmetic evaluator tool ---
|
| 59 |
+
def _safe_eval_arith(expr: str) -> str:
|
| 60 |
+
"""
|
| 61 |
+
Safely evaluate simple arithmetic expressions using ast.
|
| 62 |
+
|
| 63 |
+
Supports: + - * / ** % unary ops and parentheses, numeric literals.
|
| 64 |
+
Rejects names, attribute access, calls, comprehensions, etc.
|
| 65 |
+
"""
|
| 66 |
+
try:
|
| 67 |
+
node = ast.parse(expr, mode="eval")
|
| 68 |
+
|
| 69 |
+
# Define allowed node types
|
| 70 |
+
allowed_nodes = (
|
| 71 |
+
ast.Expression, ast.BinOp, ast.UnaryOp, ast.Num, ast.Constant,
|
| 72 |
+
ast.Add, ast.Sub, ast.Mult, ast.Div, ast.Pow, ast.Mod,
|
| 73 |
+
ast.UAdd, ast.USub, ast.Load, ast.Tuple, ast.List, ast.Expr,
|
| 74 |
+
ast.Subscript, ast.Index, ast.Slice, ast.Tuple
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# Walk the AST and ensure nodes are permitted
|
| 78 |
+
for n in ast.walk(node):
|
| 79 |
+
if not isinstance(n, allowed_nodes):
|
| 80 |
+
# numeric constants in Python 3.8+ are ast.Constant
|
| 81 |
+
# allow parentheses (they are represented by grouping nodes)
|
| 82 |
+
raise ValueError(f"Disallowed expression element: {type(n).__name__}")
|
| 83 |
+
|
| 84 |
+
# Evaluate in a restricted namespace
|
| 85 |
+
result = eval(compile(node, filename="<ast>", mode="eval"), {"__builtins__": {}}, {})
|
| 86 |
+
return str(result)
|
| 87 |
+
except Exception as e:
|
| 88 |
+
return f"ERROR_EVAL: {e}"
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# --- Tools (must have good docstrings for smolagents) ---
|
| 92 |
if tool is not None:
|
| 93 |
@tool
|
| 94 |
def download_gaia_file(task_id: str) -> str:
|
| 95 |
"""
|
| 96 |
+
Download the text content of the file associated with a GAIA task ID.
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
task_id (str): The task identifier for which the file should be downloaded. This
|
| 100 |
+
value comes from the GAIA questions endpoint and is used to fetch the file via
|
| 101 |
+
the /files/{task_id} route.
|
| 102 |
+
|
| 103 |
+
Returns:
|
| 104 |
+
str: The textual content of the downloaded file, or an error string beginning with
|
| 105 |
+
'ERROR_DOWNLOADING_FILE:' in case of failure.
|
| 106 |
"""
|
| 107 |
try:
|
| 108 |
url = f"{DEFAULT_API_URL}/files/{task_id}"
|
| 109 |
resp = requests.get(url, timeout=20)
|
| 110 |
resp.raise_for_status()
|
| 111 |
+
# Return text, decoding bytes defensively
|
| 112 |
if isinstance(resp.content, (bytes, bytearray)):
|
| 113 |
+
return resp.content.decode(resp.encoding or "utf-8", errors="replace")
|
|
|
|
|
|
|
|
|
|
| 114 |
return resp.text
|
| 115 |
except Exception as e:
|
|
|
|
| 116 |
return f"ERROR_DOWNLOADING_FILE: {e}"
|
| 117 |
+
|
| 118 |
+
@tool
|
| 119 |
+
def web_search(query: str) -> str:
|
| 120 |
+
"""
|
| 121 |
+
Execute a web search using DuckDuckGoSearchTool (wrapped) and return the combined results.
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
query (str): A natural-language query describing the information to find.
|
| 125 |
+
|
| 126 |
+
Returns:
|
| 127 |
+
str: Search results or a short error string beginning with 'ERROR_SEARCH:'.
|
| 128 |
+
"""
|
| 129 |
+
try:
|
| 130 |
+
# Construct a minimal wrapper call to DuckDuckGoSearchTool
|
| 131 |
+
# The actual DuckDuckGoSearchTool object will be created in agent init
|
| 132 |
+
return DuckDuckGoSearchTool()(query)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
return f"ERROR_SEARCH: {e}"
|
| 135 |
+
|
| 136 |
+
@tool
|
| 137 |
+
def simple_calc(expression: str) -> str:
|
| 138 |
+
"""
|
| 139 |
+
Compute a simple arithmetic expression safely.
|
| 140 |
+
|
| 141 |
+
Args:
|
| 142 |
+
expression (str): A mathematical expression like '2 + 3 * (4 - 1)'.
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
str: The numeric result as a string, or an error string beginning with 'ERROR_EVAL:'.
|
| 146 |
+
"""
|
| 147 |
+
return _safe_eval_arith(expression)
|
| 148 |
else:
|
| 149 |
+
# If smolagents.tool not available, define fallback functions that raise helpful errors
|
| 150 |
def download_gaia_file(task_id: str) -> str:
|
| 151 |
+
raise RuntimeError("smolagents.tool decorator unavailable. Install smolagents and redeploy. Import errors: " + "; ".join(_import_error_msgs))
|
| 152 |
+
def web_search(query: str) -> str:
|
| 153 |
+
raise RuntimeError("smolagents.tool decorator unavailable. Install smolagents and redeploy. Import errors: " + "; ".join(_import_error_msgs))
|
| 154 |
+
def simple_calc(expression: str) -> str:
|
| 155 |
+
raise RuntimeError("smolagents.tool decorator unavailable. Install smolagents and redeploy. Import errors: " + "; ".join(_import_error_msgs))
|
| 156 |
|
| 157 |
|
| 158 |
+
# --- Leaderboard-grade Agent (CodeAgent) ---
|
| 159 |
class BasicAgent:
|
| 160 |
def __init__(self):
|
| 161 |
if CodeAgent is None or InferenceClientModel is None or DuckDuckGoSearchTool is None:
|
|
|
|
| 162 |
raise RuntimeError(
|
| 163 |
+
"smolagents imports failed. Ensure 'smolagents' is in requirements.txt and redeploy. "
|
|
|
|
| 164 |
"Import details: " + "; ".join(_import_error_msgs)
|
| 165 |
)
|
| 166 |
|
| 167 |
print("Initializing GAIA leaderboard-grade agent (CodeAgent)...")
|
| 168 |
|
|
|
|
| 169 |
model_id = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-72B-Instruct")
|
|
|
|
| 170 |
try:
|
| 171 |
self.model = InferenceClientModel(
|
| 172 |
model_id=model_id,
|
| 173 |
temperature=0.0
|
| 174 |
)
|
| 175 |
except Exception as e:
|
| 176 |
+
raise RuntimeError(f"Failed to init InferenceClientModel({model_id}): {e}")
|
|
|
|
| 177 |
|
| 178 |
+
# Instantiate the real search tool object and put our tools in list
|
| 179 |
try:
|
| 180 |
+
ddg = DuckDuckGoSearchTool()
|
| 181 |
+
self.tools = [ddg, download_gaia_file, simple_calc]
|
|
|
|
|
|
|
| 182 |
except Exception as e:
|
| 183 |
+
raise RuntimeError(f"Failed to init tools: {e}")
|
| 184 |
|
| 185 |
+
# Instructions to bias towards exact final-answer-only outputs
|
|
|
|
| 186 |
self.system_instructions = (
|
| 187 |
+
"You are solving GAIA benchmark questions. Use available tools when needed. "
|
| 188 |
+
"If a file is referenced, download and read it. Do NOT reveal your chain-of-thought or reasoning. "
|
| 189 |
+
"The final output MUST be exactly the answer only (one short line). No extra commentary, no 'FINAL ANSWER'."
|
|
|
|
| 190 |
)
|
| 191 |
|
| 192 |
+
# Initialize CodeAgent; argument signatures may vary across versions, handle common cases
|
| 193 |
try:
|
|
|
|
| 194 |
self.agent = CodeAgent(
|
| 195 |
tools=self.tools,
|
| 196 |
model=self.model
|
| 197 |
)
|
| 198 |
except TypeError:
|
|
|
|
| 199 |
self.agent = CodeAgent(self.model, self.tools)
|
| 200 |
|
| 201 |
def __call__(self, question: str) -> str:
|
| 202 |
"""
|
| 203 |
+
Run the CodeAgent on the provided question and return a cleaned single-line answer.
|
|
|
|
| 204 |
"""
|
| 205 |
try:
|
| 206 |
prompt = f"{self.system_instructions}\n\nQUESTION:\n{question}\n\nAnswer:"
|
| 207 |
+
# Some smolagents versions accept dict input; try string then dict
|
|
|
|
| 208 |
try:
|
| 209 |
+
raw = self.agent.run(prompt)
|
| 210 |
except TypeError:
|
| 211 |
+
raw = self.agent.run({"input": prompt})
|
| 212 |
+
cleaned = _clean_answer(raw)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
return cleaned
|
| 214 |
except Exception as e:
|
| 215 |
tb = traceback.format_exc()
|
| 216 |
print("Agent runtime error:", e, tb)
|
| 217 |
+
return f"AGENT_ERROR: {e}"
|
|
|
|
| 218 |
|
| 219 |
|
| 220 |
|