Final_Assignment_Template

Sleeping

App Files Files Community

Raj989898 commited on 26 days ago

Commit

d5a51fc

verified ·

1 Parent(s): 67eace3

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -47

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
-# app.py — safe GAIA runner (paste entire file, replace existing)
 import os
 import time
 import requests
 import pandas as pd
 import gradio as gr
-# ddgs (DuckDuckGo search) — safe, lightweight
 try:
     from ddgs import DDGS
 except Exception:
@@ -14,15 +13,13 @@ except Exception:
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # -------------------------
-# LLM (Groq) caller (safe)
 # -------------------------
 _last_call = 0
-def call_groq(api_key, prompt, max_tokens=128):
-    """
-    Call Groq API. Raises on network/HTTP failure.
-    """
     global _last_call
-    # rate limit tiny delay
     if time.time() - _last_call < 1.5:
         time.sleep(1.5)
     _last_call = time.time()
@@ -41,27 +38,37 @@ def call_groq(api_key, prompt, max_tokens=128):
     r = requests.post(url, headers=headers, json=body, timeout=60)
     r.raise_for_status()
     data = r.json()
-    # defensive
-    return data["choices"][0]["message"]["content"].strip()
 # -------------------------
-# Clean / normalise answers
 # -------------------------
 def clean_answer(text: str) -> str:
     if text is None:
         return ""
     text = str(text).strip()
     prefixes = [
-        "FINAL ANSWER:", "Final Answer:", "Answer:", "The answer is", "Result:"
     ]
     for p in prefixes:
         if text.lower().startswith(p.lower()):
-            text = text[len(p):].strip()
     # only first line
     text = text.splitlines()[0].strip()
-    # strip quotes/asterisks
     return text.strip('"').strip("'").strip("*").strip()
 # -------------------------
 # Web search (ddgs)
 # -------------------------
@@ -72,31 +79,25 @@ def web_search_snippets(query: str, max_results: int = 5) -> str:
     try:
         with DDGS() as ddgs:
             for i, r in enumerate(ddgs.text(query, max_results=max_results)):
-                # r typically contains 'title' and 'body'
                 title = r.get("title", "")
                 body = r.get("body", "")
                 snippets.append(f"{title} — {body}")
-                if i+1 >= max_results:
                     break
     except Exception:
-        # swallow search errors
         return ""
     return "\n".join(snippets)
 # -------------------------
 # Download task file helper
 # -------------------------
 def download_task_file(task_id: str):
-    """
-    Returns (local_path, filename) or (None, None) if not found.
-    Saves into /tmp and returns path.
-    """
     try:
         url = f"{DEFAULT_API_URL}/files/{task_id}/download"
         r = requests.get(url, timeout=20)
         if r.status_code != 200:
             return None, None
-        # try to derive filename
         cd = r.headers.get("content-disposition", "")
         filename = ""
         if "filename=" in cd:
@@ -112,14 +113,13 @@ def download_task_file(task_id: str):
     except Exception:
         return None, None
 # -------------------------
-# BasicAgent (safe, retry)
 # -------------------------
 class BasicAgent:
     def __init__(self):
-        # pick up key if available
         self.key = os.getenv("GROQ_API_KEY", "").strip() or None
-        # quick status printed to logs
         print("BasicAgent initializing. GROQ key present:", bool(self.key), "DDGS available:", DDGS is not None)
     def ask_llm(self, prompt: str, max_tokens: int = 128) -> str:
@@ -140,25 +140,20 @@ class BasicAgent:
         return ""
     def fallback_from_search(self, question: str) -> str:
-        # If no key or LLM fails, return the first useful snippet from web search
         snippets = web_search_snippets(question, max_results=4)
         if not snippets:
             return ""
-        # pick first non-empty line and clean
         for line in snippets.splitlines():
             s = line.strip()
             if len(s) > 3:
-                # take first sentence-like chunk
                 sentence = s.split(".")[0].strip()
                 return clean_answer(sentence)
         return ""
     def __call__(self, question: str, task_id: str = "") -> str:
         print("Received question:", question[:200])
-        # prepare short context (search + file)
         context_parts = []
-        # file if present
         if task_id:
             lp, fn = download_task_file(task_id)
             if lp and fn:
@@ -167,17 +162,14 @@ class BasicAgent:
                         txt = f.read(4000)
                     context_parts.append(f"File {fn} contents (truncated):\n{txt}")
                 except Exception:
-                    # binary file or not readable; ignore
                     context_parts.append(f"File {fn} exists but not included in context.")
-        # web snippets
         search_snip = web_search_snippets(question, max_results=4)
         if search_snip:
             context_parts.append("Web snippets:\n" + search_snip[:3000])
         context = "\n\n".join(context_parts).strip()
-        # construct LLM prompt
         prompt = f"""You are solving a GAIA benchmark question. Return ONLY the final answer, nothing else.
 Question:
@@ -187,12 +179,10 @@ Context:
 {context}
 Return ONLY the final answer."""
-        # try LLM if key present
         if self.key:
             ans = self.solve_with_retries(prompt, attempts=3)
             if ans:
                 return ans
-            # fallback try one more time shorter prompt
             try:
                 ans2 = self.ask_llm("Extract the single final short answer only:\n" + prompt, max_tokens=48)
                 ans2 = clean_answer(ans2)
@@ -201,13 +191,12 @@ Return ONLY the final answer."""
             except Exception as e:
                 print("LLM final fallback failed:", e)
-        # final fallback from web search
         fb = self.fallback_from_search(question)
         if fb:
             return fb
-        # last resort: empty string (the grader tolerates empties)
         return ""
 # -------------------------
 # Evaluation runner used by UI
 # -------------------------
@@ -215,9 +204,17 @@ def run_and_submit_all(profile):
     if not profile:
         return "Please login first", None
-    username = profile.username
-    print("User:", username)
     agent = BasicAgent()
     try:
@@ -230,15 +227,15 @@ def run_and_submit_all(profile):
     for q in questions:
         task_id = q.get("task_id")
         question = q.get("question", "")
-        ans = agent(question, task_id)
         answers.append({"task_id": task_id, "submitted_answer": ans})
         logs.append({"task_id": task_id, "question": question, "answer": ans})
-    payload = {
-        "username": username,
-        "agent_code": "",  # optional: your space repo link
-        "answers": answers
-    }
     try:
         resp = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=30)
         resp.raise_for_status()
@@ -248,6 +245,7 @@ def run_and_submit_all(profile):
     except Exception as e:
         return f"Submission failed: {e}", pd.DataFrame(logs)
 # -------------------------
 # UI (minimal)
 # -------------------------
@@ -256,9 +254,9 @@ with gr.Blocks() as demo:
     gr.Markdown("Make sure you added `GROQ_API_KEY` in Settings → Secrets for best results.")
     gr.LoginButton()
     run_btn = gr.Button("Run Evaluation")
-    status = gr.Textbox(label="Run status", lines=4)
     table = gr.DataFrame(label="Logs")
-    run_btn.click(run_and_submit_all, outputs=[status, table])
 if __name__ == "__main__":
     demo.launch()

 import os
 import time
 import requests
 import pandas as pd
 import gradio as gr
+# optional ddgs (duckduckgo) search
 try:
     from ddgs import DDGS
 except Exception:
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # -------------------------
+# GROQ / LLM caller (safe)
 # -------------------------
 _last_call = 0
+def call_groq(api_key: str, prompt: str, max_tokens: int = 128) -> str:
     global _last_call
     if time.time() - _last_call < 1.5:
         time.sleep(1.5)
     _last_call = time.time()
     r = requests.post(url, headers=headers, json=body, timeout=60)
     r.raise_for_status()
     data = r.json()
+    # defensive access
+    choice = data.get("choices") and data["choices"][0]
+    if not choice:
+        return ""
+    msg = choice.get("message") or {}
+    return msg.get("content", "").strip()
 # -------------------------
+# Clean / normalize answers
 # -------------------------
 def clean_answer(text: str) -> str:
     if text is None:
         return ""
     text = str(text).strip()
     prefixes = [
+        "FINAL ANSWER:",
+        "Final Answer:",
+        "Answer:",
+        "The answer is",
+        "Result:",
     ]
     for p in prefixes:
         if text.lower().startswith(p.lower()):
+            text = text[len(p) :].strip()
     # only first line
     text = text.splitlines()[0].strip()
+    # strip common quoting characters
     return text.strip('"').strip("'").strip("*").strip()
 # -------------------------
 # Web search (ddgs)
 # -------------------------
     try:
         with DDGS() as ddgs:
             for i, r in enumerate(ddgs.text(query, max_results=max_results)):
                 title = r.get("title", "")
                 body = r.get("body", "")
                 snippets.append(f"{title} — {body}")
+                if i + 1 >= max_results:
                     break
     except Exception:
         return ""
     return "\n".join(snippets)
 # -------------------------
 # Download task file helper
 # -------------------------
 def download_task_file(task_id: str):
     try:
         url = f"{DEFAULT_API_URL}/files/{task_id}/download"
         r = requests.get(url, timeout=20)
         if r.status_code != 200:
             return None, None
         cd = r.headers.get("content-disposition", "")
         filename = ""
         if "filename=" in cd:
     except Exception:
         return None, None
 # -------------------------
+# BasicAgent with retries and fallback
 # -------------------------
 class BasicAgent:
     def __init__(self):
         self.key = os.getenv("GROQ_API_KEY", "").strip() or None
         print("BasicAgent initializing. GROQ key present:", bool(self.key), "DDGS available:", DDGS is not None)
     def ask_llm(self, prompt: str, max_tokens: int = 128) -> str:
         return ""
     def fallback_from_search(self, question: str) -> str:
         snippets = web_search_snippets(question, max_results=4)
         if not snippets:
             return ""
         for line in snippets.splitlines():
             s = line.strip()
             if len(s) > 3:
                 sentence = s.split(".")[0].strip()
                 return clean_answer(sentence)
         return ""
     def __call__(self, question: str, task_id: str = "") -> str:
         print("Received question:", question[:200])
         context_parts = []
         if task_id:
             lp, fn = download_task_file(task_id)
             if lp and fn:
                         txt = f.read(4000)
                     context_parts.append(f"File {fn} contents (truncated):\n{txt}")
                 except Exception:
                     context_parts.append(f"File {fn} exists but not included in context.")
         search_snip = web_search_snippets(question, max_results=4)
         if search_snip:
             context_parts.append("Web snippets:\n" + search_snip[:3000])
         context = "\n\n".join(context_parts).strip()
         prompt = f"""You are solving a GAIA benchmark question. Return ONLY the final answer, nothing else.
 Question:
 {context}
 Return ONLY the final answer."""
         if self.key:
             ans = self.solve_with_retries(prompt, attempts=3)
             if ans:
                 return ans
             try:
                 ans2 = self.ask_llm("Extract the single final short answer only:\n" + prompt, max_tokens=48)
                 ans2 = clean_answer(ans2)
             except Exception as e:
                 print("LLM final fallback failed:", e)
         fb = self.fallback_from_search(question)
         if fb:
             return fb
         return ""
 # -------------------------
 # Evaluation runner used by UI
 # -------------------------
     if not profile:
         return "Please login first", None
+    username = getattr(profile, "username", None) or profile.get("username") if isinstance(profile, dict) else None
+    if not username:
+        # sometimes gradio returns OAuthProfile object; fallback
+        try:
+            username = profile.username
+        except Exception:
+            username = None
+    if not username:
+        return "Unable to get username from profile. Please try logging out and back in.", None
+    print("User:", username)
     agent = BasicAgent()
     try:
     for q in questions:
         task_id = q.get("task_id")
         question = q.get("question", "")
+        try:
+            ans = agent(question, task_id)
+        except Exception as e:
+            print("Agent execution error:", e)
+            ans = ""
         answers.append({"task_id": task_id, "submitted_answer": ans})
         logs.append({"task_id": task_id, "question": question, "answer": ans})
+    payload = {"username": username, "agent_code": "", "answers": answers}
     try:
         resp = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=30)
         resp.raise_for_status()
     except Exception as e:
         return f"Submission failed: {e}", pd.DataFrame(logs)
 # -------------------------
 # UI (minimal)
 # -------------------------
     gr.Markdown("Make sure you added `GROQ_API_KEY` in Settings → Secrets for best results.")
     gr.LoginButton()
     run_btn = gr.Button("Run Evaluation")
+    status = gr.Textbox(label="Run status", lines=6)
     table = gr.DataFrame(label="Logs")
+    run_btn.click(run_and_submit_all, inputs=gr.OAuthProfile(), outputs=[status, table])
 if __name__ == "__main__":
     demo.launch()