Final_Assignment_Template

Sleeping

App Files Files Community

Raj989898 commited on Mar 6

Commit

1758136

verified ·

1 Parent(s): 4790a7a

Update app.py

Browse files

Files changed (1) hide show

app.py +200 -280

app.py CHANGED Viewed

@@ -1,309 +1,229 @@
 import os
 import time
-import gradio as gr
 import requests
 import pandas as pd
-import tempfile
-import subprocess
-import sys
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# Track API calls for rate limiting
-_last_call_time = 0
-def rate_limited_groq(api_key, prompt, system="", max_tokens=128):
-    """Call Groq with rate limiting — max 25 req/min to stay safe."""
-    global _last_call_time
-    # Ensure at least 2.5 seconds between calls (= 24/min, safely under 30 limit)
-    elapsed = time.time() - _last_call_time
-    if elapsed < 2.5:
-        time.sleep(2.5 - elapsed)
-    _last_call_time = time.time()
     url = "https://api.groq.com/openai/v1/chat/completions"
-    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
-    msgs = []
-    if system:
-        msgs.append({"role": "system", "content": system})
-    msgs.append({"role": "user", "content": prompt})
-    body = {"model": "llama-3.3-70b-versatile", "messages": msgs,
-            "temperature": 0.0, "max_tokens": max_tokens}
-    resp = requests.post(url, headers=headers, json=body, timeout=60)
-    if resp.status_code == 429:
-        print("Rate limited! Waiting 60s...")
-        time.sleep(60)
-        resp = requests.post(url, headers=headers, json=body, timeout=60)
-    if resp.status_code != 200:
-        raise Exception(f"Groq {resp.status_code}: {resp.text[:200]}")
-    return resp.json()["choices"][0]["message"]["content"].strip()
-def download_task_file(task_id):
-    url = f"{DEFAULT_API_URL}/files/{task_id}"
-    try:
-        resp = requests.get(url, timeout=30)
-        print(f"  File request: HTTP {resp.status_code}, size={len(resp.content)}, "
-              f"content-type={resp.headers.get('content-type','?')}")
-        if resp.status_code != 200 or len(resp.content) == 0:
-            return None, None
-        cd = resp.headers.get("content-disposition", "")
-        ct = resp.headers.get("content-type", "")
-        fname = "task_file"
-        if "filename=" in cd:
-            fname = cd.split("filename=")[-1].strip().strip('"').strip("'")
-        ext = os.path.splitext(fname)[-1]
-        if not ext:
-            if "python" in ct: ext = ".py"
-            elif "excel" in ct or "spreadsheet" in ct: ext = ".xlsx"
-            elif "csv" in ct: ext = ".csv"
-            elif "image" in ct: ext = ".png"
-            else: ext = ".bin"
-            fname += ext
-        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext, prefix="gaia_")
-        tmp.write(resp.content)
-        tmp.close()
-        print(f"  Saved: {fname} -> {tmp.name}")
-        return tmp.name, fname
-    except Exception as e:
-        print(f"  Download error: {e}")
-        return None, None
-def read_file_contents(local_path, fname):
-    ext = os.path.splitext(fname)[-1].lower()
-    try:
-        if ext in (".xlsx", ".xls"):
-            df = pd.read_excel(local_path)
-            return f"Excel shape={df.shape}\nColumns={list(df.columns)}\n\n{df.to_string()}"
-        elif ext == ".csv":
-            df = pd.read_csv(local_path)
-            return f"CSV shape={df.shape}\nColumns={list(df.columns)}\n\n{df.to_string()}"
-        elif ext in (".py", ".txt", ".md", ".json"):
-            with open(local_path, "r", errors="replace") as f:
-                return f.read()
-        else:
-            try:
-                with open(local_path, "r", errors="replace") as f:
-                    c = f.read()
-                    if c.strip(): return c
-            except: pass
-            return f"Binary: {fname}"
-    except Exception as e:
-        return f"Error: {e}"
-def run_python_file(local_path):
-    try:
-        r = subprocess.run([sys.executable, local_path],
-                           capture_output=True, text=True, timeout=15)
-        out = (r.stdout + r.stderr).strip()
-        print(f"  Python output: '{out[:200]}'")
-        return out if out else "No output."
-    except Exception as e:
-        return f"Error: {e}"
 def clean_answer(text):
     text = text.strip()
-    for p in ["FINAL ANSWER:", "Final Answer:", "Answer:", "The answer is:", "The answer is",
-              "**Answer:**", "**Final Answer:**"]:
         if text.lower().startswith(p.lower()):
             text = text[len(p):].strip()
-    return text.split("\n")[0].strip().strip('"').strip("'").strip("*").strip()
-def search_web(query, max_results=6):
-    try:
-        from duckduckgo_search import DDGS
-        with DDGS() as ddgs:
-            results = list(ddgs.text(query, max_results=max_results))
-        if not results:
-            return "No results."
-        return "\n\n".join(
-            f"Title: {r.get('title','')}\nSnippet: {r.get('body','')}\nURL: {r.get('href','')}"
-            for r in results)
-    except Exception as e:
-        return f"Search error: {e}"
-def test_api():
-    key = os.getenv("GROQ_API_KEY", "")
-    if not key:
-        return "❌ GROQ_API_KEY not set!"
-    try:
-        ans = rate_limited_groq(key, "What is 2+2?", "Reply with only the number.")
-        return f"✅ Groq working! Test: '{ans}'"
-    except Exception as e:
-        return f"❌ {e}"
-SYSTEM = """You are a GAIA benchmark agent. Exact match grading — your answer must match exactly.
-Reply with ONLY the final answer. No explanation. No prefix. No "The answer is".
-Give the bare answer: a name, number, word, or short phrase only."""
 class BasicAgent:
     def __init__(self):
-        self.key = os.getenv("GROQ_API_KEY", "")
         if not self.key:
-            raise RuntimeError("GROQ_API_KEY not set!")
-        print(f"Agent ready. Key: {self.key[:8]}...")
-    def ask(self, prompt, max_tokens=128):
-        return clean_answer(rate_limited_groq(self.key, prompt, SYSTEM, max_tokens))
-    def __call__(self, question: str, task_id: str = "") -> str:
-        print(f"\n{'='*50}\nTask: {task_id}\nQ: {question[:200]}")
-        # Handle reversed text
-        if "rewsna" in question or "dnatsrednu" in question:
-            question = question[::-1]
-            print(f"  Reversed: {question}")
-        file_ctx = ""
-        is_py = False
-        # Download file
-        if task_id:
-            print(f"  Attempting file download for task_id={task_id}")
-            lp, fn = download_task_file(task_id)
-            if lp and fn:
-                ext = os.path.splitext(fn)[-1].lower()
-                if ext == ".py":
-                    is_py = True
-                    code = read_file_contents(lp, fn)
-                    out = run_python_file(lp)
-                    file_ctx = f"\n[Python: {fn}]\nCODE:\n{code}\nOUTPUT:\n{out}\n"
-                elif ext in (".xlsx", ".xls", ".csv"):
-                    contents = read_file_contents(lp, fn)
-                    file_ctx = f"\n[File: {fn}]\n{contents[:6000]}\n"
-                elif ext in (".png", ".jpg", ".jpeg"):
-                    file_ctx = f"\n[Image: {fn} attached.]\n"
-                else:
-                    contents = read_file_contents(lp, fn)
-                    file_ctx = f"\n[File: {fn}]\n{contents[:4000]}\n"
-            else:
-                print(f"  No file found for this task.")
-        # Web search
-        search_ctx = ""
-        if not is_py:
-            results = search_web(question[:200])
-            if results and "error" not in results.lower():
-                search_ctx = f"\n[Search]\n{results[:3500]}\n"
-        # Format hints
-        q = question.lower()
-        fmt = ""
-        if "studio album" in q:
-            fmt = "\nCount only SOLO studio albums (exclude collaborative albums). Single integer answer."
-        elif "first name" in q:
-            fmt = "\nFirst name only."
-        elif "surname" in q or "last name" in q:
-            fmt = "\nSurname only."
-        elif "at bat" in q or "at-bat" in q:
-            fmt = "\nSingle integer only."
-        elif "how many" in q:
-            fmt = "\nSingle integer only."
-        elif "ioc" in q or ("country" in q and "olympic" in q):
-            fmt = "\nIOC country code only (3 letters, e.g. USA, GBR). If tied, alphabetically first."
-        elif "excel" in q or ("sale" in q and "food" in q):
-            fmt = "\nUSD with two decimal places (e.g. 89.50). No $ sign."
-        elif "chess" in q:
-            fmt = "\nChess move in algebraic notation only."
-        elif "pitcher" in q and "number" in q:
-            fmt = "\nTwo last names, comma-separated, pitcher with lower jersey number first."
-        elif "wikipedia" in q and "nominat" in q:
-            fmt = "\nWikipedia username only."
-        elif "grocery" in q or ("shopping" in q and "list" in q):
-            fmt = "\nComma-separated list, alphabetical order."
-        elif "youtube" in q or "video" in q:
-            fmt = "\nExact short answer only — quote, number, or name."
-        elif "grant" in q or "award number" in q:
-            fmt = "\nExact identifier only."
-        prompt = (
-            f"Question: {question}"
-            f"{file_ctx}"
-            f"{search_ctx}"
-            f"{fmt}"
-            "\n\nGive ONLY the final answer."
-        )
-        try:
-            answer = self.ask(prompt, max_tokens=64)
-            if len(answer.split()) > 20:
-                answer = clean_answer(rate_limited_groq(
                     self.key,
-                    f"Extract only the shortest final answer from:\n{answer}",
-                    "Reply with only the bare answer.", max_tokens=32))
-            print(f"  Final: '{answer}'")
-            return answer
-        except Exception as e:
-            print(f"  Error: {e}")
-            return ""
-def run_and_submit_all(profile: gr.OAuthProfile | None):
-    space_id = os.getenv("SPACE_ID")
     if not profile:
-        return "Please Login to Hugging Face.", None
     username = profile.username
-    try:
-        agent = BasicAgent()
-    except RuntimeError as e:
-        return f"❌ {e}", None
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    try:
-        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
-        resp.raise_for_status()
-        questions_data = resp.json()
-        print(f"Fetched {len(questions_data)} questions.")
-    except Exception as e:
-        return f"Error: {e}", None
-    results_log, answers_payload = [], []
-    for i, item in enumerate(questions_data):
-        task_id = item.get("task_id", "")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            continue
-        print(f"\n[{i+1}/{len(questions_data)}]")
-        try:
-            # Pass task_id directly — no injection needed
-            ans = agent(question_text, task_id=task_id)
-        except Exception as e:
-            ans = ""
-        answers_payload.append({"task_id": task_id, "submitted_answer": ans})
-        results_log.append({
-            "Task ID": task_id,
-            "Question": question_text[:100] + ("..." if len(question_text) > 100 else ""),
-            "Submitted Answer": ans
         })
-    if not answers_payload:
-        return "No answers.", pd.DataFrame(results_log)
-    try:
-        resp = requests.post(f"{DEFAULT_API_URL}/submit",
-            json={"username": username.strip(), "agent_code": agent_code, "answers": answers_payload},
-            timeout=60)
-        resp.raise_for_status()
-        r = resp.json()
-        return (f"Submission Successful!\nUser: {r.get('username')}\n"
-                f"Score: {r.get('score')}% ({r.get('correct_count')}/{r.get('total_attempted')} correct)\n"
-                f"Message: {r.get('message')}"), pd.DataFrame(results_log)
-    except Exception as e:
-        return f"Submission Failed: {e}", pd.DataFrame(results_log)
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
-    gr.Markdown("**Setup:** `GROQ_API_KEY` in Space Settings → Secrets. Free at [console.groq.com](https://console.groq.com)")
     gr.LoginButton()
-    with gr.Row():
-        test_btn = gr.Button("🔬 Test Groq API", variant="secondary")
-        test_out = gr.Textbox(label="Test Result", lines=2, interactive=False)
-    test_btn.click(fn=test_api, outputs=test_out)
-    gr.Markdown("---")
-    run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
-    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
-    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
-    key = os.getenv("GROQ_API_KEY", "")
-    print(f"GROQ_API_KEY: {'SET ✅ ' + key[:8] + '...' if key else 'NOT SET ❌'}")
-    demo.launch(debug=True, share=False)

 import os
 import time
 import requests
 import pandas as pd
+import gradio as gr
+from ddgs import DDGS
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# -------------------------
+# GROQ API CALL
+# -------------------------
+_last_call = 0
+def call_llm(api_key, prompt, system="", max_tokens=128):
+    global _last_call
+    if time.time() - _last_call < 2.5:
+        time.sleep(2.5)
+    _last_call = time.time()
     url = "https://api.groq.com/openai/v1/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+    data = {
+        "model": "llama-3.3-70b-versatile",
+        "messages": [
+            {"role": "system", "content": system},
+            {"role": "user", "content": prompt}
+        ],
+        "temperature": 0,
+        "max_tokens": max_tokens
+    }
+    r = requests.post(url, headers=headers, json=data, timeout=60)
+    if r.status_code != 200:
+        raise Exception(r.text)
+    return r.json()["choices"][0]["message"]["content"].strip()
+# -------------------------
+# CLEAN ANSWER
+# -------------------------
 def clean_answer(text):
     text = text.strip()
+    prefixes = [
+        "FINAL ANSWER:",
+        "Final Answer:",
+        "Answer:",
+        "The answer is"
+    ]
+    for p in prefixes:
         if text.lower().startswith(p.lower()):
             text = text[len(p):].strip()
+    text = text.split("\n")[0]
+    return text.strip('"').strip("'").strip("*")
+# -------------------------
+# WEB SEARCH
+# -------------------------
+def web_search(query):
+    results = []
+    with DDGS() as ddgs:
+        for r in ddgs.text(query, max_results=6):
+            results.append(
+                f"{r['title']} — {r['body']}"
+            )
+    return "\n".join(results)
+# -------------------------
+# AGENT
+# -------------------------
+SYSTEM = """
+You are solving GAIA benchmark questions.
+Rules:
+Return ONLY the final answer.
+No explanation.
+Exact match grading.
+"""
 class BasicAgent:
     def __init__(self):
+        self.key = os.getenv("GROQ_API_KEY")
         if not self.key:
+            raise RuntimeError("GROQ_API_KEY missing")
+        print("Agent ready")
+    # automatic retry
+    def solve(self, prompt):
+        for attempt in range(3):
+            try:
+                answer = call_llm(
                     self.key,
+                    prompt,
+                    SYSTEM,
+                    max_tokens=128
+                )
+                answer = clean_answer(answer)
+                if len(answer) > 0:
+                    return answer
+            except Exception as e:
+                print("Retry:", e)
+            time.sleep(2)
+        return ""
+    def __call__(self, question, task_id=""):
+        print("Question:", question)
+        search = web_search(question)
+        prompt = f"""
+Question:
+{question}
+Web information:
+{search}
+Return ONLY the final answer.
+"""
+        answer = self.solve(prompt)
+        print("Answer:", answer)
+        return answer
+# -------------------------
+# EVALUATION
+# -------------------------
+def run_and_submit_all(profile):
     if not profile:
+        return "Please login", None
     username = profile.username
+    agent = BasicAgent()
+    questions = requests.get(
+        f"{DEFAULT_API_URL}/questions"
+    ).json()
+    answers = []
+    logs = []
+    for q in questions:
+        task_id = q["task_id"]
+        question = q["question"]
+        ans = agent(question, task_id)
+        answers.append({
+            "task_id": task_id,
+            "submitted_answer": ans
         })
+        logs.append({
+            "Task": task_id,
+            "Answer": ans
+        })
+    result = requests.post(
+        f"{DEFAULT_API_URL}/submit",
+        json={
+            "username": username,
+            "agent_code": "",
+            "answers": answers
+        }
+    ).json()
+    msg = f"""
+User: {result['username']}
+Score: {result['score']}%
+Correct: {result['correct_count']}
+"""
+    return msg, pd.DataFrame(logs)
+# -------------------------
+# UI
+# -------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# GAIA Agent")
     gr.LoginButton()
+    run_btn = gr.Button("Run Evaluation")
+    status = gr.Textbox()
+    table = gr.DataFrame()
+    run_btn.click(
+        run_and_submit_all,
+        outputs=[status, table]
+    )
 if __name__ == "__main__":
+    demo.launch()