TuNan52 commited on
Commit
8f07707
·
verified ·
1 Parent(s): 1db7e12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -69
app.py CHANGED
@@ -8,163 +8,213 @@ import pandas as pd
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- # --- Try import smolagents components (fail gracefully with helpful error) ---
14
  _import_error_msgs = []
15
  try:
 
16
  from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel, tool
17
  except Exception as e:
18
- # Keep placeholders to raise clearer errors only when instantiating
19
  CodeAgent = None
20
  DuckDuckGoSearchTool = None
21
  InferenceClientModel = None
22
  tool = None
23
- _import_error_msgs.append(
24
- "Failed to import smolagents components. Ensure 'smolagents' is installed in requirements.txt "
25
- "and the environment has network access if needed. Import error: " + repr(e)
26
- )
27
 
28
- # --- Utility: clean agent output for exact-match grading ---
29
- def _clean_answer(raw: str) -> str:
30
  """
31
- Heuristic cleaning:
32
- - strip surrounding whitespace and quotes
33
- - drop lines before the final non-empty line
34
- - remove common prefixes like 'Answer:', 'Final answer:', 'The answer is'
35
- - return that single-line string
36
- Note: this is intentionally conservative but helps avoid trivial format mismatches.
 
 
 
 
 
 
37
  """
38
  if raw is None:
39
  return ""
40
  text = str(raw)
41
 
42
- # Normalize line endings, split, take last non-empty line
43
  lines = [ln.strip() for ln in text.replace("\r", "").split("\n") if ln.strip() != ""]
44
  if not lines:
45
  candidate = text.strip()
46
  else:
47
  candidate = lines[-1]
48
 
49
- # Remove common labels
50
  candidate = re.sub(r'^(final answer[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
51
  candidate = re.sub(r'^(answer[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
52
  candidate = re.sub(r'^(the answer is[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
53
 
54
- # Strip surrounding quotes and whitespace
55
  candidate = candidate.strip().strip('\'"')
56
-
57
- # Collapse internal multiple spaces to single space (helps formatting mismatches)
58
  candidate = re.sub(r'\s+', ' ', candidate)
59
-
60
  return candidate
61
 
62
- # --- Tool: GAIA file downloader ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  if tool is not None:
64
  @tool
65
  def download_gaia_file(task_id: str) -> str:
66
  """
67
- Download the file associated with a GAIA task and return its text content.
 
 
 
 
 
 
 
 
 
68
  """
69
  try:
70
  url = f"{DEFAULT_API_URL}/files/{task_id}"
71
  resp = requests.get(url, timeout=20)
72
  resp.raise_for_status()
73
- # If content-type is binary, this might return bytes; convert to text defensively
74
  if isinstance(resp.content, (bytes, bytearray)):
75
- try:
76
- return resp.content.decode(resp.encoding or "utf-8", errors="replace")
77
- except Exception:
78
- return resp.text
79
  return resp.text
80
  except Exception as e:
81
- # Return a short diagnostic string as tool observation (agent can handle)
82
  return f"ERROR_DOWNLOADING_FILE: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  else:
84
- # Placeholder to fail fast with clear message if tool decorator missing
85
  def download_gaia_file(task_id: str) -> str:
86
- raise RuntimeError("smolagents.tool is not available; check smolagents installation. "
87
- "Original import error(s): " + "; ".join(_import_error_msgs))
 
 
 
88
 
89
 
90
- # --- Leaderboard-grade Agent (uses CodeAgent under smolagents) ---
91
  class BasicAgent:
92
  def __init__(self):
93
  if CodeAgent is None or InferenceClientModel is None or DuckDuckGoSearchTool is None:
94
- # Raise a clear runtime error with the original import messages
95
  raise RuntimeError(
96
- "smolagents components are not available in this environment. "
97
- "Please ensure 'smolagents' is installed and included in requirements.txt. "
98
  "Import details: " + "; ".join(_import_error_msgs)
99
  )
100
 
101
  print("Initializing GAIA leaderboard-grade agent (CodeAgent)...")
102
 
103
- # Model selection: allow overriding via env var HF_MODEL_ID
104
  model_id = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-72B-Instruct")
105
- # Temperature explicitly 0 for determinism
106
  try:
107
  self.model = InferenceClientModel(
108
  model_id=model_id,
109
  temperature=0.0
110
  )
111
  except Exception as e:
112
- # Provide a friendly message if model instantiation fails
113
- raise RuntimeError(f"Failed to initialize InferenceClientModel for '{model_id}': {e}")
114
 
115
- # Tools: search + file download
116
  try:
117
- self.tools = [
118
- DuckDuckGoSearchTool(),
119
- download_gaia_file
120
- ]
121
  except Exception as e:
122
- raise RuntimeError(f"Failed to initialize tools: {e}")
123
 
124
- # A concise system prompt guiding strict outputs. CodeAgent may accept prompts via run input;
125
- # we will supply a short instruction prefix when calling agent.run below.
126
  self.system_instructions = (
127
- "You are solving GAIA benchmark questions. "
128
- "Rules: use tools when needed. If a file is available, read it. "
129
- "Do NOT output reasoning. The final output MUST be exactly the answer only—no extra words, "
130
- "no 'FINAL ANSWER', no explanations. Keep output to a single line if possible."
131
  )
132
 
133
- # Initialize CodeAgent
134
  try:
135
- # CodeAgent signature and options may vary by smolagents version; keep minimal.
136
  self.agent = CodeAgent(
137
  tools=self.tools,
138
  model=self.model
139
  )
140
  except TypeError:
141
- # Try alternate ordering if smolagents version expects different arguments
142
  self.agent = CodeAgent(self.model, self.tools)
143
 
144
  def __call__(self, question: str) -> str:
145
  """
146
- Run the agent on the given question and return a cleaned answer string.
147
- We prefix the question with system instructions to bias towards exact-match outputs.
148
  """
149
  try:
150
  prompt = f"{self.system_instructions}\n\nQUESTION:\n{question}\n\nAnswer:"
151
- print("Running agent on question (preview):", (question[:200] + "...") if len(question) > 200 else question)
152
- # Many smolagents agent.run implementations accept either a string or a dict; support both.
153
  try:
154
- raw_result = self.agent.run(prompt)
155
  except TypeError:
156
- # fallback if run signature is different
157
- raw_result = self.agent.run({"input": prompt})
158
- # Convert to string and clean
159
- cleaned = _clean_answer(raw_result)
160
- print("Raw result preview:", str(raw_result)[:300])
161
- print("Cleaned final answer:", cleaned)
162
  return cleaned
163
  except Exception as e:
164
  tb = traceback.format_exc()
165
  print("Agent runtime error:", e, tb)
166
- # Return a short sentinel so submission still proceeds (tooling will show errors per-item)
167
- return f"AGENT_ERROR: {str(e)}"
168
 
169
 
170
 
 
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
 
 
 
11
  _import_error_msgs = []
12
  try:
13
+ # Use CodeAgent (stable export), DuckDuckGoSearchTool, InferenceClientModel, and tool decorator
14
  from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel, tool
15
  except Exception as e:
 
16
  CodeAgent = None
17
  DuckDuckGoSearchTool = None
18
  InferenceClientModel = None
19
  tool = None
20
+ _import_error_msgs.append(repr(e))
21
+
 
 
22
 
23
+ # --- Utilities ---
24
+ def _clean_answer(raw: Any) -> str:
25
  """
26
+ Heuristic cleaning to produce a single-line exact-match-friendly answer.
27
+
28
+ - Keep the last non-empty line of output.
29
+ - Remove common labels like "Answer:", "Final answer:".
30
+ - Strip surrounding quotes and whitespace.
31
+ - Collapse internal whitespace to single spaces.
32
+
33
+ Args:
34
+ raw (Any): Raw agent output to clean.
35
+
36
+ Returns:
37
+ str: Cleaned single-line answer string.
38
  """
39
  if raw is None:
40
  return ""
41
  text = str(raw)
42
 
 
43
  lines = [ln.strip() for ln in text.replace("\r", "").split("\n") if ln.strip() != ""]
44
  if not lines:
45
  candidate = text.strip()
46
  else:
47
  candidate = lines[-1]
48
 
 
49
  candidate = re.sub(r'^(final answer[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
50
  candidate = re.sub(r'^(answer[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
51
  candidate = re.sub(r'^(the answer is[:\-\s]*)', '', candidate, flags=re.IGNORECASE)
52
 
 
53
  candidate = candidate.strip().strip('\'"')
 
 
54
  candidate = re.sub(r'\s+', ' ', candidate)
 
55
  return candidate
56
 
57
+
58
+ # --- Safe small arithmetic evaluator tool ---
59
+ def _safe_eval_arith(expr: str) -> str:
60
+ """
61
+ Safely evaluate simple arithmetic expressions using ast.
62
+
63
+ Supports: + - * / ** % unary ops and parentheses, numeric literals.
64
+ Rejects names, attribute access, calls, comprehensions, etc.
65
+ """
66
+ try:
67
+ node = ast.parse(expr, mode="eval")
68
+
69
+ # Define allowed node types
70
+ allowed_nodes = (
71
+ ast.Expression, ast.BinOp, ast.UnaryOp, ast.Num, ast.Constant,
72
+ ast.Add, ast.Sub, ast.Mult, ast.Div, ast.Pow, ast.Mod,
73
+ ast.UAdd, ast.USub, ast.Load, ast.Tuple, ast.List, ast.Expr,
74
+ ast.Subscript, ast.Index, ast.Slice, ast.Tuple
75
+ )
76
+
77
+ # Walk the AST and ensure nodes are permitted
78
+ for n in ast.walk(node):
79
+ if not isinstance(n, allowed_nodes):
80
+ # numeric constants in Python 3.8+ are ast.Constant
81
+ # allow parentheses (they are represented by grouping nodes)
82
+ raise ValueError(f"Disallowed expression element: {type(n).__name__}")
83
+
84
+ # Evaluate in a restricted namespace
85
+ result = eval(compile(node, filename="<ast>", mode="eval"), {"__builtins__": {}}, {})
86
+ return str(result)
87
+ except Exception as e:
88
+ return f"ERROR_EVAL: {e}"
89
+
90
+
91
+ # --- Tools (must have good docstrings for smolagents) ---
92
  if tool is not None:
93
  @tool
94
  def download_gaia_file(task_id: str) -> str:
95
  """
96
+ Download the text content of the file associated with a GAIA task ID.
97
+
98
+ Args:
99
+ task_id (str): The task identifier for which the file should be downloaded. This
100
+ value comes from the GAIA questions endpoint and is used to fetch the file via
101
+ the /files/{task_id} route.
102
+
103
+ Returns:
104
+ str: The textual content of the downloaded file, or an error string beginning with
105
+ 'ERROR_DOWNLOADING_FILE:' in case of failure.
106
  """
107
  try:
108
  url = f"{DEFAULT_API_URL}/files/{task_id}"
109
  resp = requests.get(url, timeout=20)
110
  resp.raise_for_status()
111
+ # Return text, decoding bytes defensively
112
  if isinstance(resp.content, (bytes, bytearray)):
113
+ return resp.content.decode(resp.encoding or "utf-8", errors="replace")
 
 
 
114
  return resp.text
115
  except Exception as e:
 
116
  return f"ERROR_DOWNLOADING_FILE: {e}"
117
+
118
+ @tool
119
+ def web_search(query: str) -> str:
120
+ """
121
+ Execute a web search using DuckDuckGoSearchTool (wrapped) and return the combined results.
122
+
123
+ Args:
124
+ query (str): A natural-language query describing the information to find.
125
+
126
+ Returns:
127
+ str: Search results or a short error string beginning with 'ERROR_SEARCH:'.
128
+ """
129
+ try:
130
+ # Construct a minimal wrapper call to DuckDuckGoSearchTool
131
+ # The actual DuckDuckGoSearchTool object will be created in agent init
132
+ return DuckDuckGoSearchTool()(query)
133
+ except Exception as e:
134
+ return f"ERROR_SEARCH: {e}"
135
+
136
+ @tool
137
+ def simple_calc(expression: str) -> str:
138
+ """
139
+ Compute a simple arithmetic expression safely.
140
+
141
+ Args:
142
+ expression (str): A mathematical expression like '2 + 3 * (4 - 1)'.
143
+
144
+ Returns:
145
+ str: The numeric result as a string, or an error string beginning with 'ERROR_EVAL:'.
146
+ """
147
+ return _safe_eval_arith(expression)
148
  else:
149
+ # If smolagents.tool not available, define fallback functions that raise helpful errors
150
  def download_gaia_file(task_id: str) -> str:
151
+ raise RuntimeError("smolagents.tool decorator unavailable. Install smolagents and redeploy. Import errors: " + "; ".join(_import_error_msgs))
152
+ def web_search(query: str) -> str:
153
+ raise RuntimeError("smolagents.tool decorator unavailable. Install smolagents and redeploy. Import errors: " + "; ".join(_import_error_msgs))
154
+ def simple_calc(expression: str) -> str:
155
+ raise RuntimeError("smolagents.tool decorator unavailable. Install smolagents and redeploy. Import errors: " + "; ".join(_import_error_msgs))
156
 
157
 
158
+ # --- Leaderboard-grade Agent (CodeAgent) ---
159
  class BasicAgent:
160
  def __init__(self):
161
  if CodeAgent is None or InferenceClientModel is None or DuckDuckGoSearchTool is None:
 
162
  raise RuntimeError(
163
+ "smolagents imports failed. Ensure 'smolagents' is in requirements.txt and redeploy. "
 
164
  "Import details: " + "; ".join(_import_error_msgs)
165
  )
166
 
167
  print("Initializing GAIA leaderboard-grade agent (CodeAgent)...")
168
 
 
169
  model_id = os.getenv("HF_MODEL_ID", "Qwen/Qwen2.5-72B-Instruct")
 
170
  try:
171
  self.model = InferenceClientModel(
172
  model_id=model_id,
173
  temperature=0.0
174
  )
175
  except Exception as e:
176
+ raise RuntimeError(f"Failed to init InferenceClientModel({model_id}): {e}")
 
177
 
178
+ # Instantiate the real search tool object and put our tools in list
179
  try:
180
+ ddg = DuckDuckGoSearchTool()
181
+ self.tools = [ddg, download_gaia_file, simple_calc]
 
 
182
  except Exception as e:
183
+ raise RuntimeError(f"Failed to init tools: {e}")
184
 
185
+ # Instructions to bias towards exact final-answer-only outputs
 
186
  self.system_instructions = (
187
+ "You are solving GAIA benchmark questions. Use available tools when needed. "
188
+ "If a file is referenced, download and read it. Do NOT reveal your chain-of-thought or reasoning. "
189
+ "The final output MUST be exactly the answer only (one short line). No extra commentary, no 'FINAL ANSWER'."
 
190
  )
191
 
192
+ # Initialize CodeAgent; argument signatures may vary across versions, handle common cases
193
  try:
 
194
  self.agent = CodeAgent(
195
  tools=self.tools,
196
  model=self.model
197
  )
198
  except TypeError:
 
199
  self.agent = CodeAgent(self.model, self.tools)
200
 
201
  def __call__(self, question: str) -> str:
202
  """
203
+ Run the CodeAgent on the provided question and return a cleaned single-line answer.
 
204
  """
205
  try:
206
  prompt = f"{self.system_instructions}\n\nQUESTION:\n{question}\n\nAnswer:"
207
+ # Some smolagents versions accept dict input; try string then dict
 
208
  try:
209
+ raw = self.agent.run(prompt)
210
  except TypeError:
211
+ raw = self.agent.run({"input": prompt})
212
+ cleaned = _clean_answer(raw)
 
 
 
 
213
  return cleaned
214
  except Exception as e:
215
  tb = traceback.format_exc()
216
  print("Agent runtime error:", e, tb)
217
+ return f"AGENT_ERROR: {e}"
 
218
 
219
 
220