Final_Assignment_Template

Sleeping

App Files Files Community

Raj989898 commited on Mar 6

Commit

8497d3d

verified ·

1 Parent(s): 833c9ef

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -185

app.py CHANGED Viewed

@@ -9,13 +9,11 @@ import sys
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- File helpers ---
 def download_task_file(task_id: str):
     url = f"{DEFAULT_API_URL}/files/{task_id}"
     try:
         resp = requests.get(url, timeout=30)
         if resp.status_code != 200:
-            print(f"No file for {task_id}: HTTP {resp.status_code}")
             return None, None
         cd = resp.headers.get("content-disposition", "")
         fname = "task_file"
@@ -25,13 +23,13 @@ def download_task_file(task_id: str):
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
         tmp.write(resp.content)
         tmp.close()
-        print(f"Downloaded: {fname} ({len(resp.content)} bytes) -> {tmp.name}")
         return tmp.name, fname
     except Exception as e:
         print(f"File download error: {e}")
         return None, None
-def read_file_contents(local_path: str, fname: str) -> str:
     ext = os.path.splitext(fname)[-1].lower()
     try:
         if ext in (".xlsx", ".xls"):
@@ -48,50 +46,39 @@ def read_file_contents(local_path: str, fname: str) -> str:
                 with open(local_path) as f:
                     return f.read()
             except:
-                return f"Binary file: {fname}"
     except Exception as e:
-        return f"Error reading: {e}"
-def run_python_file(local_path: str) -> str:
     try:
-        result = subprocess.run(
-            [sys.executable, local_path],
-            capture_output=True, text=True, timeout=15
-        )
-        output = (result.stdout + result.stderr).strip()
-        print(f"Python output: '{output[:200]}'")
-        return output if output else "No output."
-    except subprocess.TimeoutExpired:
-        return "Timed out."
     except Exception as e:
         return f"Error: {e}"
-def clean_answer(text: str) -> str:
     text = text.strip()
-    for prefix in ["FINAL ANSWER:", "Final Answer:", "Answer:",
-                   "The answer is:", "The answer is",
-                   "**Answer:**", "**Final Answer:**"]:
-        if text.lower().startswith(prefix.lower()):
-            text = text[len(prefix):].strip()
     return text.split("\n")[0].strip().strip('"').strip("'").strip("*").strip()
-# --- Groq API ---
-def call_groq(api_key: str, prompt: str, system: str = "", max_tokens: int = 512) -> str:
     url = "https://api.groq.com/openai/v1/chat/completions"
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
-    messages = []
-    if system:
-        messages.append({"role": "system", "content": system})
-    messages.append({"role": "user", "content": prompt})
     body = {"model": "llama-3.3-70b-versatile", "messages": messages,
             "temperature": 0.0, "max_tokens": max_tokens}
     resp = requests.post(url, headers=headers, json=body, timeout=60)
     if resp.status_code != 200:
-        raise Exception(f"Groq error {resp.status_code}: {resp.text[:200]}")
     return resp.json()["choices"][0]["message"]["content"].strip()
-# --- Web search ---
-def search_web(query: str, max_results: int = 6) -> str:
     try:
         from duckduckgo_search import DDGS
         with DDGS() as ddgs:
@@ -100,8 +87,7 @@ def search_web(query: str, max_results: int = 6) -> str:
             return "No results."
         return "\n\n".join(
             f"Title: {r.get('title','')}\nSnippet: {r.get('body','')}\nURL: {r.get('href','')}"
-            for r in results
-        )
     except Exception as e:
         return f"Search error: {e}"
@@ -110,44 +96,45 @@ def test_api():
     if not key:
         return "❌ GROQ_API_KEY not set!"
     try:
-        ans = call_groq(key, "What is 2+2?", "Reply with only the number.")
         return f"✅ Groq working! Test: '{ans}'"
     except Exception as e:
         return f"❌ {e}"
-SYSTEM_PROMPT = """You are a GAIA benchmark agent. Exact match grading is used — precision is everything.
-RULES:
-1. Reply with ONLY the final answer. No explanation, no prefix, no "The answer is".
-2. Numbers: use digits unless words are asked. No $ or , in numbers unless format is asked.
-3. Names: exact format as requested (first name only if asked for first name).
-4. Lists: comma-separated, alphabetical if asked.
-5. Think carefully — wrong format = wrong answer even if content is right.
-"""
 class BasicAgent:
     def __init__(self):
-        self.api_key = os.getenv("GROQ_API_KEY", "")
-        if not self.api_key:
-            raise RuntimeError("GROQ_API_KEY not set! Add it in Space Settings → Secrets.")
-        print(f"Agent ready. Key: {self.api_key[:8]}...")
-    def _multi_search(self, question: str) -> str:
-        """Do up to 2 targeted searches for better results."""
-        # First search: full question
-        r1 = search_web(question[:200])
-        # Second search: extract key entities for a more focused query
-        try:
-            focused = call_groq(
-                self.api_key,
-                f"Write a short 5-8 word web search query to find the answer to:\n{question}",
-                "Reply with only the search query. No quotes.",
-                max_tokens=30
-            )
-            r2 = search_web(focused)
-            return r1 + "\n\n---\n\n" + r2
-        except:
-            return r1
     def __call__(self, question: str) -> str:
         task_id = ""
@@ -158,116 +145,61 @@ class BasicAgent:
         print(f"\n{'='*50}\nTask: {task_id}\nQ: {question[:200]}")
-        file_context = ""
-        is_python = False
-        is_image = False
-        # 1. Download file
         if task_id:
-            local_path, fname = download_task_file(task_id)
-            if local_path and fname:
-                ext = os.path.splitext(fname)[-1].lower()
                 if ext == ".py":
-                    is_python = True
-                    code = read_file_contents(local_path, fname)
-                    output = run_python_file(local_path)
-                    file_context = (
-                        f"\n\n[Python file: {fname}]\n"
-                        f"CODE:\n{code}\n\n"
-                        f"EXECUTION OUTPUT: {output}\n"
-                        f"[End]\n"
-                    )
                 elif ext in (".xlsx", ".xls", ".csv"):
-                    contents = read_file_contents(local_path, fname)
-                    file_context = f"\n\n[Data file: {fname}]\n{contents[:6000]}\n[End]\n"
-                elif ext in (".png", ".jpg", ".jpeg", ".gif"):
-                    is_image = True
-                    file_context = f"\n\n[Image file '{fname}' attached — use question context and your knowledge.]\n"
                 else:
-                    contents = read_file_contents(local_path, fname)
-                    file_context = f"\n\n[File: {fname}]\n{contents[:4000]}\n[End]\n"
-        # 2. Handle reversed text question
-        q_for_search = question
-        if "rewsna" in question or "dnatsrednu" in question:
-            reversed_q = question[::-1]
-            print(f"Reversed: {reversed_q}")
-            q_for_search = reversed_q
-            file_context += f"\n\n[Note: The question above is written in reverse. Reversed it reads: {reversed_q}]\n"
-        # 3. Web search (skip if python file — we have the output)
-        search_context = ""
-        if not is_python:
-            print("Searching...")
-            results = self._multi_search(q_for_search)
-            if results and "error" not in results.lower():
-                search_context = f"\n\n[Web search results]\n{results[:4000]}\n[End search]\n"
-        # 4. Build prompt with strong format guidance
-        format_hint = self._get_format_hint(question)
-        prompt = (
-            f"Question: {q_for_search}"
-            f"{file_context}"
-            f"{search_context}"
-            f"\n\n{format_hint}"
-            "\nProvide ONLY the final answer. No explanation."
-        )
         try:
-            answer = call_groq(self.api_key, prompt, SYSTEM_PROMPT, max_tokens=128)
-            print(f"Raw: '{answer}'")
-            if len(answer.split()) > 30:
-                answer = call_groq(
-                    self.api_key,
-                    f"Extract only the shortest final answer from:\n\n{answer}",
-                    "Reply with only the bare answer.",
-                    max_tokens=64
-                )
-            answer = clean_answer(answer)
             print(f"Final: '{answer}'")
             return answer
         except Exception as e:
             print(f"Error: {e}")
             return ""
-    def _get_format_hint(self, question: str) -> str:
-        q = question.lower()
-        if "first name" in q:
-            return "Format: Reply with first name only."
-        if "surname" in q or "last name" in q:
-            return "Format: Reply with surname/last name only."
-        if "how many" in q:
-            return "Format: Reply with a number only (digits, no words)."
-        if "studio album" in q:
-            return "Format: Reply with a number only. Count only STUDIO albums (not live, compilation, or collaborative)."
-        if "country" in q and "olympic" in q:
-            return "Format: Reply with country name only."
-        if "excel" in q or "sales" in q or "total" in q:
-            return "Format: Plain number only, no $ or commas (e.g. 12345.67 not $12,345.67)."
-        if "chess" in q:
-            return "Format: Chess move in standard notation (e.g. Qd8, e5, Nf3)."
-        if "at bat" in q or "at-bat" in q:
-            return "Format: Reply with a number only."
-        if "video" in q and "youtube" in q:
-            return "Format: Reply with the exact quote or short phrase only."
-        if "wikipedia" in q and "nominat" in q:
-            return "Format: Reply with the username only."
-        if "pitcher" in q:
-            return "Format: Two last names separated by comma (e.g. Smith, Jones), in jersey number order."
-        if "grocery" in q or "shopping" in q or "ingredients" in q:
-            return "Format: Comma-separated list, alphabetical order, all lowercase."
-        return "Format: Reply with the shortest possible correct answer."
-# --- Submit ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
     if not profile:
-        return "Please Login to Hugging Face with the button.", None
-    username = f"{profile.username}"
     try:
         agent = BasicAgent()
     except RuntimeError as e:
@@ -276,15 +208,14 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
-        response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
         print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
         return f"Error fetching questions: {e}", None
-    results_log = []
-    answers_payload = []
     for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
@@ -293,44 +224,35 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             continue
         print(f"\n[{i+1}/{len(questions_data)}]")
         try:
-            submitted_answer = agent(f"[TASK_ID:{task_id}] {question_text}")
         except Exception as e:
-            submitted_answer = ""
             print(f"Error: {e}")
-        answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
         results_log.append({
             "Task ID": task_id,
             "Question": question_text[:100] + ("..." if len(question_text) > 100 else ""),
-            "Submitted Answer": submitted_answer
         })
     if not answers_payload:
-        return "No answers produced.", pd.DataFrame(results_log)
     try:
-        response = requests.post(
-            f"{DEFAULT_API_URL}/submit",
             json={"username": username.strip(), "agent_code": agent_code, "answers": answers_payload},
-            timeout=60
-        )
-        response.raise_for_status()
-        r = response.json()
-        status = (
-            f"Submission Successful!\n"
-            f"User: {r.get('username')}\n"
-            f"Overall Score: {r.get('score', 'N/A')}% "
-            f"({r.get('correct_count', '?')}/{r.get('total_attempted', '?')} correct)\n"
-            f"Message: {r.get('message', '')}"
-        )
-        return status, pd.DataFrame(results_log)
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(results_log)
-# --- UI ---
 with gr.Blocks() as demo:
     gr.Markdown("# Basic Agent Evaluation Runner")
-    gr.Markdown("**Setup:** Add `GROQ_API_KEY` in Space Settings → Secrets. Free key at [console.groq.com](https://console.groq.com)")
     gr.LoginButton()
     with gr.Row():
         test_btn = gr.Button("🔬 Test Groq API", variant="secondary")

 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def download_task_file(task_id: str):
     url = f"{DEFAULT_API_URL}/files/{task_id}"
     try:
         resp = requests.get(url, timeout=30)
         if resp.status_code != 200:
             return None, None
         cd = resp.headers.get("content-disposition", "")
         fname = "task_file"
         tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
         tmp.write(resp.content)
         tmp.close()
+        print(f"Downloaded: {fname} ({len(resp.content)} bytes)")
         return tmp.name, fname
     except Exception as e:
         print(f"File download error: {e}")
         return None, None
+def read_file_contents(local_path, fname):
     ext = os.path.splitext(fname)[-1].lower()
     try:
         if ext in (".xlsx", ".xls"):
                 with open(local_path) as f:
                     return f.read()
             except:
+                return f"Binary: {fname}"
     except Exception as e:
+        return f"Error: {e}"
+def run_python_file(local_path):
     try:
+        result = subprocess.run([sys.executable, local_path],
+                                capture_output=True, text=True, timeout=15)
+        out = (result.stdout + result.stderr).strip()
+        print(f"Python output: '{out[:200]}'")
+        return out or "No output."
     except Exception as e:
         return f"Error: {e}"
+def clean_answer(text):
     text = text.strip()
+    for p in ["FINAL ANSWER:", "Final Answer:", "Answer:", "The answer is:", "The answer is",
+              "**Answer:**", "**Final Answer:**"]:
+        if text.lower().startswith(p.lower()):
+            text = text[len(p):].strip()
     return text.split("\n")[0].strip().strip('"').strip("'").strip("*").strip()
+def call_groq(api_key, messages, max_tokens=512):
     url = "https://api.groq.com/openai/v1/chat/completions"
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     body = {"model": "llama-3.3-70b-versatile", "messages": messages,
             "temperature": 0.0, "max_tokens": max_tokens}
     resp = requests.post(url, headers=headers, json=body, timeout=60)
     if resp.status_code != 200:
+        raise Exception(f"Groq {resp.status_code}: {resp.text[:200]}")
     return resp.json()["choices"][0]["message"]["content"].strip()
+def search_web(query, max_results=6):
     try:
         from duckduckgo_search import DDGS
         with DDGS() as ddgs:
             return "No results."
         return "\n\n".join(
             f"Title: {r.get('title','')}\nSnippet: {r.get('body','')}\nURL: {r.get('href','')}"
+            for r in results)
     except Exception as e:
         return f"Search error: {e}"
     if not key:
         return "❌ GROQ_API_KEY not set!"
     try:
+        ans = call_groq(key, [{"role":"user","content":"What is 2+2?"}], max_tokens=10)
         return f"✅ Groq working! Test: '{ans}'"
     except Exception as e:
         return f"❌ {e}"
+SYSTEM = """You are a GAIA benchmark agent. Exact match grading — precision is critical.
+Reply with ONLY the final answer. No explanation. No prefix. No "The answer is".
+Just the bare answer: a name, number, word, or short phrase."""
 class BasicAgent:
     def __init__(self):
+        self.key = os.getenv("GROQ_API_KEY", "")
+        if not self.key:
+            raise RuntimeError("GROQ_API_KEY not set! Add in Space Settings → Secrets.")
+        print(f"Agent ready. Key: {self.key[:8]}...")
+    def ask(self, user_msg, max_tokens=256):
+        return call_groq(self.key, [
+            {"role": "system", "content": SYSTEM},
+            {"role": "user", "content": user_msg}
+        ], max_tokens)
+    def think_then_answer(self, question, context=""):
+        """Two-step: reason first, then extract bare answer."""
+        # Step 1: reason
+        reasoning = call_groq(self.key, [
+            {"role": "system", "content": "You are a careful researcher. Think step by step to find the correct answer. Show your reasoning."},
+            {"role": "user", "content": f"Question: {question}\n\n{context}\n\nThink carefully and find the answer."}
+        ], max_tokens=1024)
+        print(f"Reasoning: {reasoning[:300]}...")
+        # Step 2: extract
+        answer = call_groq(self.key, [
+            {"role": "system", "content": SYSTEM},
+            {"role": "user", "content":
+             f"Question: {question}\n\nReasoning and research:\n{reasoning}\n\n"
+             f"Based on the above reasoning, give ONLY the final bare answer. Nothing else."}
+        ], max_tokens=64)
+        return clean_answer(answer)
     def __call__(self, question: str) -> str:
         task_id = ""
         print(f"\n{'='*50}\nTask: {task_id}\nQ: {question[:200]}")
+        # Handle reversed text
+        if "rewsna" in question or "dnatsrednu" in question:
+            question = question[::-1]
+            print(f"Reversed: {question}")
+        file_ctx = ""
+        is_py = False
+        # Download file
         if task_id:
+            lp, fn = download_task_file(task_id)
+            if lp and fn:
+                ext = os.path.splitext(fn)[-1].lower()
                 if ext == ".py":
+                    is_py = True
+                    code = read_file_contents(lp, fn)
+                    out = run_python_file(lp)
+                    file_ctx = f"\n[Python file: {fn}]\nCODE:\n{code}\n\nEXECUTION OUTPUT:\n{out}\n"
                 elif ext in (".xlsx", ".xls", ".csv"):
+                    contents = read_file_contents(lp, fn)
+                    file_ctx = f"\n[Data file: {fn}]\n{contents[:6000]}\n"
+                elif ext in (".png", ".jpg", ".jpeg"):
+                    file_ctx = f"\n[Image attached: {fn} — use your knowledge based on the question.]\n"
                 else:
+                    contents = read_file_contents(lp, fn)
+                    file_ctx = f"\n[File: {fn}]\n{contents[:4000]}\n"
+        # Web search (multiple targeted queries)
+        search_ctx = ""
+        if not is_py:
+            # Ask the model for a good search query
+            sq = call_groq(self.key, [
+                {"role": "user", "content":
+                 f"Write a precise 5-8 word web search query to find: {question}\nReply with ONLY the search query."}
+            ], max_tokens=30).strip().strip('"')
+            print(f"Search query: {sq}")
+            r1 = search_web(question[:200])
+            r2 = search_web(sq)
+            search_ctx = f"\n[Search 1]\n{r1[:2500]}\n\n[Search 2]\n{r2[:2500]}\n"
+        context = file_ctx + search_ctx
         try:
+            answer = self.think_then_answer(question, context)
             print(f"Final: '{answer}'")
             return answer
         except Exception as e:
             print(f"Error: {e}")
             return ""
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
     if not profile:
+        return "Please Login to Hugging Face.", None
+    username = profile.username
     try:
         agent = BasicAgent()
     except RuntimeError as e:
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
+        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
+        resp.raise_for_status()
+        questions_data = resp.json()
         print(f"Fetched {len(questions_data)} questions.")
     except Exception as e:
         return f"Error fetching questions: {e}", None
+    results_log, answers_payload = [], []
     for i, item in enumerate(questions_data):
         task_id = item.get("task_id")
             continue
         print(f"\n[{i+1}/{len(questions_data)}]")
         try:
+            ans = agent(f"[TASK_ID:{task_id}] {question_text}")
         except Exception as e:
+            ans = ""
             print(f"Error: {e}")
+        answers_payload.append({"task_id": task_id, "submitted_answer": ans})
         results_log.append({
             "Task ID": task_id,
             "Question": question_text[:100] + ("..." if len(question_text) > 100 else ""),
+            "Submitted Answer": ans
         })
     if not answers_payload:
+        return "No answers.", pd.DataFrame(results_log)
     try:
+        resp = requests.post(f"{DEFAULT_API_URL}/submit",
             json={"username": username.strip(), "agent_code": agent_code, "answers": answers_payload},
+            timeout=60)
+        resp.raise_for_status()
+        r = resp.json()
+        return (f"Submission Successful!\nUser: {r.get('username')}\n"
+                f"Score: {r.get('score')}% ({r.get('correct_count')}/{r.get('total_attempted')} correct)\n"
+                f"Message: {r.get('message')}"), pd.DataFrame(results_log)
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(results_log)
 with gr.Blocks() as demo:
     gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown("**Setup:** `GROQ_API_KEY` in Space Settings → Secrets. Free at [console.groq.com](https://console.groq.com)")
     gr.LoginButton()
     with gr.Row():
         test_btn = gr.Button("🔬 Test Groq API", variant="secondary")