MainStreet123 commited on
Commit
00d93b9
·
verified ·
1 Parent(s): 2543503

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -19
app.py CHANGED
@@ -14,7 +14,11 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
  HF_INFERENCE_URL = "https://api-inference.huggingface.co/models"
15
  ROUTER_MODEL = "HuggingFaceH4/zephyr-7b-beta"
16
  EVALUATOR_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 
 
17
  MAX_MANAGER_ITERATIONS = 5
 
 
18
 
19
  # --- Tools (used by agents) ---
20
 
@@ -136,6 +140,50 @@ def final_answer_tool(answer: str) -> str:
136
  return answer.strip()
137
 
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  # --- Code Agent (has Python interpreter tool) ---
140
 
141
  def _extract_python_code(text: str) -> str:
@@ -170,9 +218,10 @@ class CodeAgent:
170
  prompt = (
171
  f"Question: {question}\n\n"
172
  "Write a single Python code block to answer this. Use a variable 'result' for the final answer. "
 
173
  "Only output valid Python code, no explanation."
174
  )
175
- code = _llm_call(prompt, ROUTER_MODEL, max_new_tokens=400)
176
  if not code:
177
  code = _heuristic_code_from_question(question)
178
  code = _extract_python_code(code)
@@ -183,34 +232,52 @@ class CodeAgent:
183
 
184
  # --- Web Search Agent (DuckDuckGo + visit web page tools) ---
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  class WebSearchAgent:
187
  def __init__(self):
188
  print("WebSearchAgent initialized.")
189
 
190
  def __call__(self, question: str) -> str:
191
  print(f"WebSearchAgent received (first 50 chars): {question[:50]}...")
192
- snippets = duckduckgo_search_tool(question, max_results=5)
193
- if not snippets or "No search results" in snippets:
194
- return "No search results found."
195
- first_url = None
196
- for line in snippets.split("\n"):
197
- m = re.search(r"\((https?://[^)]+)\)", line)
198
- if m:
199
- first_url = m.group(1)
 
 
 
 
 
 
 
200
  break
201
- if first_url:
202
- page_text = visit_web_page_tool(first_url, max_chars=4000)
203
- if "Visit error" not in page_text:
204
- snippets = snippets + "\n\n--- Page content ---\n" + page_text[:3000]
205
  prompt = (
206
- f"Question: {question}\n\nRelevant information:\n{snippets[:6000]}\n\n"
207
- "Provide a concise, direct answer (string or number). No preamble."
208
  )
209
  answer = _llm_call(prompt, EVALUATOR_MODEL, max_new_tokens=200)
210
  if answer:
211
  return answer.strip()
212
- blocks = [b.strip() for b in snippets.split("\n\n") if len(b.strip()) > 20]
213
- return blocks[0][:500] if blocks else snippets[:500]
214
 
215
 
216
  # --- Manager Agent (user input = question; routes code/web; evaluates accuracy; final answer or retry) ---
@@ -246,8 +313,9 @@ class ManagerAgent:
246
  if reply and "Error:" not in reply[:100] and "Could not" not in reply[:100]:
247
  best_answer = reply
248
  if evaluate_accuracy_tool(question, reply):
249
- return final_answer_tool(reply)
250
- return final_answer_tool(best_answer) if best_answer else "I could not determine a reliable answer."
 
251
 
252
  def run_and_submit_all( profile: gr.OAuthProfile | None):
253
  """
 
14
  HF_INFERENCE_URL = "https://api-inference.huggingface.co/models"
15
  ROUTER_MODEL = "HuggingFaceH4/zephyr-7b-beta"
16
  EVALUATOR_MODEL = "HuggingFaceH4/zephyr-7b-beta"
17
+ CODE_MODEL = "HuggingFaceH4/zephyr-7b-beta"
18
+ EXTRACTOR_MODEL = "HuggingFaceH4/zephyr-7b-beta"
19
  MAX_MANAGER_ITERATIONS = 5
20
+ MAX_WEB_PAGES_TO_VISIT = 3
21
+ MAX_WEB_SEARCH_ROUNDS = 2
22
 
23
  # --- Tools (used by agents) ---
24
 
 
140
  return answer.strip()
141
 
142
 
143
+ def _looks_like_number(s: str) -> bool:
144
+ s = s.strip().rstrip("%")
145
+ try:
146
+ float(s.replace(",", ""))
147
+ return True
148
+ except ValueError:
149
+ return False
150
+
151
+
152
+ def normalize_to_gaia_answer(question: str, raw_answer: str) -> str:
153
+ """Extract a short, GAIA-style answer: one word, number, or short comma-separated list."""
154
+ if not raw_answer or not raw_answer.strip():
155
+ return raw_answer.strip() if raw_answer else ""
156
+ raw = raw_answer.strip()
157
+ lines = [ln.strip() for ln in raw.split("\n") if ln.strip()]
158
+ for candidate in reversed(lines):
159
+ if 1 <= len(candidate) <= 120 and "Error" not in candidate and "Could not" not in candidate:
160
+ if candidate[0].isdigit() or (not candidate.startswith("(") and "http" not in candidate.lower()):
161
+ if "," in candidate and len(candidate) < 80:
162
+ return candidate
163
+ if candidate.isdigit() or _looks_like_number(candidate):
164
+ return candidate
165
+ if len(candidate.split()) <= 8:
166
+ return candidate
167
+ numbers = re.findall(r"\b\d+(?:\.\d+)?%?\b", raw)
168
+ if numbers:
169
+ return numbers[-1]
170
+ prompt = (
171
+ f"Question: {question}\n\nLong answer or context:\n{raw[:1000]}\n\n"
172
+ "Output ONLY the final answer: one word, one number, or a short comma-separated list (no explanation, no period at end). "
173
+ "Example: Paris | 42 | apple, banana"
174
+ )
175
+ out = _llm_call(prompt, EXTRACTOR_MODEL, max_new_tokens=50).strip()
176
+ if out:
177
+ out = out.rstrip(".")
178
+ if len(out) <= 150:
179
+ return out
180
+ for seg in re.split(r"[\n.!?]", raw):
181
+ seg = seg.strip()
182
+ if 1 <= len(seg) <= 100 and "Error" not in seg:
183
+ return seg
184
+ return raw[:200].strip()
185
+
186
+
187
  # --- Code Agent (has Python interpreter tool) ---
188
 
189
  def _extract_python_code(text: str) -> str:
 
218
  prompt = (
219
  f"Question: {question}\n\n"
220
  "Write a single Python code block to answer this. Use a variable 'result' for the final answer. "
221
+ "The value of 'result' must be a single number, one word, or a short phrase (GAIA format: no long explanation). "
222
  "Only output valid Python code, no explanation."
223
  )
224
+ code = _llm_call(prompt, CODE_MODEL, max_new_tokens=400)
225
  if not code:
226
  code = _heuristic_code_from_question(question)
227
  code = _extract_python_code(code)
 
232
 
233
  # --- Web Search Agent (DuckDuckGo + visit web page tools) ---
234
 
235
+ def _urls_from_snippets(snippets: str, max_urls: int = 5) -> list:
236
+ urls = []
237
+ for line in snippets.split("\n"):
238
+ m = re.search(r"\((https?://[^)]+)\)", line)
239
+ if m:
240
+ u = m.group(1)
241
+ if u not in urls:
242
+ urls.append(u)
243
+ if len(urls) >= max_urls:
244
+ break
245
+ return urls
246
+
247
+
248
  class WebSearchAgent:
249
  def __init__(self):
250
  print("WebSearchAgent initialized.")
251
 
252
  def __call__(self, question: str) -> str:
253
  print(f"WebSearchAgent received (first 50 chars): {question[:50]}...")
254
+ combined = ""
255
+ for round_num in range(MAX_WEB_SEARCH_ROUNDS):
256
+ query = question if round_num == 0 else f"{question} answer"
257
+ snippets = duckduckgo_search_tool(query, max_results=6)
258
+ if not snippets or "No search results" in snippets:
259
+ if round_num == 0:
260
+ return "No search results found."
261
+ break
262
+ combined += "\n\n--- Search round {} ---\n{}".format(round_num + 1, snippets)
263
+ urls = _urls_from_snippets(snippets, max_urls=MAX_WEB_PAGES_TO_VISIT)
264
+ for url in urls:
265
+ page_text = visit_web_page_tool(url, max_chars=3500)
266
+ if "Visit error" not in page_text:
267
+ combined += "\n\n--- Page ---\n" + page_text[:3000]
268
+ if round_num == 0 and len(combined) > 500:
269
  break
270
+ if not combined:
271
+ return "No search results found."
 
 
272
  prompt = (
273
+ f"Question: {question}\n\nRelevant information:\n{combined[:7000]}\n\n"
274
+ "Provide ONLY the final answer in GAIA format: one word, one number, or a short comma-separated list. No preamble, no explanation, no period at end."
275
  )
276
  answer = _llm_call(prompt, EVALUATOR_MODEL, max_new_tokens=200)
277
  if answer:
278
  return answer.strip()
279
+ blocks = [b.strip() for b in combined.split("\n\n") if len(b.strip()) > 20]
280
+ return blocks[0][:400] if blocks else combined[:400]
281
 
282
 
283
  # --- Manager Agent (user input = question; routes code/web; evaluates accuracy; final answer or retry) ---
 
313
  if reply and "Error:" not in reply[:100] and "Could not" not in reply[:100]:
314
  best_answer = reply
315
  if evaluate_accuracy_tool(question, reply):
316
+ return normalize_to_gaia_answer(question, final_answer_tool(reply))
317
+ out = final_answer_tool(best_answer) if best_answer else "I could not determine a reliable answer."
318
+ return normalize_to_gaia_answer(question, out)
319
 
320
  def run_and_submit_all( profile: gr.OAuthProfile | None):
321
  """