Final_Assignment_Template

Sleeping

App Files Files Community

MainStreet123 commited on 30 days ago

Commit

00d93b9

verified ·

1 Parent(s): 2543503

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -19

app.py CHANGED Viewed

@@ -14,7 +14,11 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 HF_INFERENCE_URL = "https://api-inference.huggingface.co/models"
 ROUTER_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 EVALUATOR_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 MAX_MANAGER_ITERATIONS = 5
 # --- Tools (used by agents) ---
@@ -136,6 +140,50 @@ def final_answer_tool(answer: str) -> str:
     return answer.strip()
 # --- Code Agent (has Python interpreter tool) ---
 def _extract_python_code(text: str) -> str:
@@ -170,9 +218,10 @@ class CodeAgent:
         prompt = (
             f"Question: {question}\n\n"
             "Write a single Python code block to answer this. Use a variable 'result' for the final answer. "
             "Only output valid Python code, no explanation."
         )
-        code = _llm_call(prompt, ROUTER_MODEL, max_new_tokens=400)
         if not code:
             code = _heuristic_code_from_question(question)
         code = _extract_python_code(code)
@@ -183,34 +232,52 @@ class CodeAgent:
 # --- Web Search Agent (DuckDuckGo + visit web page tools) ---
 class WebSearchAgent:
     def __init__(self):
         print("WebSearchAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"WebSearchAgent received (first 50 chars): {question[:50]}...")
-        snippets = duckduckgo_search_tool(question, max_results=5)
-        if not snippets or "No search results" in snippets:
-            return "No search results found."
-        first_url = None
-        for line in snippets.split("\n"):
-            m = re.search(r"\((https?://[^)]+)\)", line)
-            if m:
-                first_url = m.group(1)
                 break
-        if first_url:
-            page_text = visit_web_page_tool(first_url, max_chars=4000)
-            if "Visit error" not in page_text:
-                snippets = snippets + "\n\n--- Page content ---\n" + page_text[:3000]
         prompt = (
-            f"Question: {question}\n\nRelevant information:\n{snippets[:6000]}\n\n"
-            "Provide a concise, direct answer (string or number). No preamble."
         )
         answer = _llm_call(prompt, EVALUATOR_MODEL, max_new_tokens=200)
         if answer:
             return answer.strip()
-        blocks = [b.strip() for b in snippets.split("\n\n") if len(b.strip()) > 20]
-        return blocks[0][:500] if blocks else snippets[:500]
 # --- Manager Agent (user input = question; routes code/web; evaluates accuracy; final answer or retry) ---
@@ -246,8 +313,9 @@ class ManagerAgent:
             if reply and "Error:" not in reply[:100] and "Could not" not in reply[:100]:
                 best_answer = reply
             if evaluate_accuracy_tool(question, reply):
-                return final_answer_tool(reply)
-        return final_answer_tool(best_answer) if best_answer else "I could not determine a reliable answer."
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """

 HF_INFERENCE_URL = "https://api-inference.huggingface.co/models"
 ROUTER_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 EVALUATOR_MODEL = "HuggingFaceH4/zephyr-7b-beta"
+CODE_MODEL = "HuggingFaceH4/zephyr-7b-beta"
+EXTRACTOR_MODEL = "HuggingFaceH4/zephyr-7b-beta"
 MAX_MANAGER_ITERATIONS = 5
+MAX_WEB_PAGES_TO_VISIT = 3
+MAX_WEB_SEARCH_ROUNDS = 2
 # --- Tools (used by agents) ---
     return answer.strip()
+def _looks_like_number(s: str) -> bool:
+    s = s.strip().rstrip("%")
+    try:
+        float(s.replace(",", ""))
+        return True
+    except ValueError:
+        return False
+def normalize_to_gaia_answer(question: str, raw_answer: str) -> str:
+    """Extract a short, GAIA-style answer: one word, number, or short comma-separated list."""
+    if not raw_answer or not raw_answer.strip():
+        return raw_answer.strip() if raw_answer else ""
+    raw = raw_answer.strip()
+    lines = [ln.strip() for ln in raw.split("\n") if ln.strip()]
+    for candidate in reversed(lines):
+        if 1 <= len(candidate) <= 120 and "Error" not in candidate and "Could not" not in candidate:
+            if candidate[0].isdigit() or (not candidate.startswith("(") and "http" not in candidate.lower()):
+                if "," in candidate and len(candidate) < 80:
+                    return candidate
+                if candidate.isdigit() or _looks_like_number(candidate):
+                    return candidate
+                if len(candidate.split()) <= 8:
+                    return candidate
+    numbers = re.findall(r"\b\d+(?:\.\d+)?%?\b", raw)
+    if numbers:
+        return numbers[-1]
+    prompt = (
+        f"Question: {question}\n\nLong answer or context:\n{raw[:1000]}\n\n"
+        "Output ONLY the final answer: one word, one number, or a short comma-separated list (no explanation, no period at end). "
+        "Example: Paris | 42 | apple, banana"
+    )
+    out = _llm_call(prompt, EXTRACTOR_MODEL, max_new_tokens=50).strip()
+    if out:
+        out = out.rstrip(".")
+        if len(out) <= 150:
+            return out
+    for seg in re.split(r"[\n.!?]", raw):
+        seg = seg.strip()
+        if 1 <= len(seg) <= 100 and "Error" not in seg:
+            return seg
+    return raw[:200].strip()
 # --- Code Agent (has Python interpreter tool) ---
 def _extract_python_code(text: str) -> str:
         prompt = (
             f"Question: {question}\n\n"
             "Write a single Python code block to answer this. Use a variable 'result' for the final answer. "
+            "The value of 'result' must be a single number, one word, or a short phrase (GAIA format: no long explanation). "
             "Only output valid Python code, no explanation."
         )
+        code = _llm_call(prompt, CODE_MODEL, max_new_tokens=400)
         if not code:
             code = _heuristic_code_from_question(question)
         code = _extract_python_code(code)
 # --- Web Search Agent (DuckDuckGo + visit web page tools) ---
+def _urls_from_snippets(snippets: str, max_urls: int = 5) -> list:
+    urls = []
+    for line in snippets.split("\n"):
+        m = re.search(r"\((https?://[^)]+)\)", line)
+        if m:
+            u = m.group(1)
+            if u not in urls:
+                urls.append(u)
+                if len(urls) >= max_urls:
+                    break
+    return urls
 class WebSearchAgent:
     def __init__(self):
         print("WebSearchAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"WebSearchAgent received (first 50 chars): {question[:50]}...")
+        combined = ""
+        for round_num in range(MAX_WEB_SEARCH_ROUNDS):
+            query = question if round_num == 0 else f"{question} answer"
+            snippets = duckduckgo_search_tool(query, max_results=6)
+            if not snippets or "No search results" in snippets:
+                if round_num == 0:
+                    return "No search results found."
+                break
+            combined += "\n\n--- Search round {} ---\n{}".format(round_num + 1, snippets)
+            urls = _urls_from_snippets(snippets, max_urls=MAX_WEB_PAGES_TO_VISIT)
+            for url in urls:
+                page_text = visit_web_page_tool(url, max_chars=3500)
+                if "Visit error" not in page_text:
+                    combined += "\n\n--- Page ---\n" + page_text[:3000]
+            if round_num == 0 and len(combined) > 500:
                 break
+        if not combined:
+            return "No search results found."
         prompt = (
+            f"Question: {question}\n\nRelevant information:\n{combined[:7000]}\n\n"
+            "Provide ONLY the final answer in GAIA format: one word, one number, or a short comma-separated list. No preamble, no explanation, no period at end."
         )
         answer = _llm_call(prompt, EVALUATOR_MODEL, max_new_tokens=200)
         if answer:
             return answer.strip()
+        blocks = [b.strip() for b in combined.split("\n\n") if len(b.strip()) > 20]
+        return blocks[0][:400] if blocks else combined[:400]
 # --- Manager Agent (user input = question; routes code/web; evaluates accuracy; final answer or retry) ---
             if reply and "Error:" not in reply[:100] and "Could not" not in reply[:100]:
                 best_answer = reply
             if evaluate_accuracy_tool(question, reply):
+                return normalize_to_gaia_answer(question, final_answer_tool(reply))
+        out = final_answer_tool(best_answer) if best_answer else "I could not determine a reliable answer."
+        return normalize_to_gaia_answer(question, out)
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """