Final_Assignment_Template

Sleeping

App Files Files Community

SantoshKumar1310 commited on Oct 27, 2025

Commit

3075801

verified ·

1 Parent(s): 2889cb1

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -138

app.py CHANGED Viewed

@@ -1,155 +1,138 @@
-# app.py — Final GAIA Assignment Template (Enhanced)
-import streamlit as st
-from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonREPLTool, HfApiModel
-from huggingface_hub import login
-import json
-import time
 import os
-# =========================
-# 1. Define the GAIA Agent
-# =========================
 class BasicAgent:
     def __init__(self):
-        st.write("🔧 Initializing enhanced GAIA Agent...")
-        # Core model from Hugging Face
-        self.model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct")
-        # Tools for reasoning and search
-        self.tools = [
-            DuckDuckGoSearchTool(),
-            PythonREPLTool()
-        ]
-        # Create a CodeAgent instance
-        self.agent = CodeAgent(
-            tools=self.tools,
-            model=self.model,
-            name="GAIA_Level1_Agent",
-            description="Hybrid reasoning agent using web + code execution to answer GAIA L1 questions.",
-            max_steps=5
-        )
-    def sanitize(self, text: str) -> str:
-        """Clean and simplify final outputs for benchmark scoring."""
-        if not text:
-            return ""
-        text = text.strip()
-        for prefix in ["FINAL ANSWER:", "Final Answer:", "Answer:", "answer:"]:
-            if text.startswith(prefix):
-                text = text[len(prefix):].strip()
-        if text.startswith('"') and text.endswith('"'):
-            text = text[1:-1]
-        text = " ".join(text.split())
-        return text
     def __call__(self, question: str) -> str:
-        """Run the agent on a single GAIA question."""
-        st.write(f"🤖 Running agent on: {question[:80]}...")
-        prompt = (
-            "You are a concise reasoning agent. "
-            "Use your tools to find accurate answers. "
-            "Always return only the final answer (no explanations).\n\n"
-            f"Question: {question}"
-        )
-        try:
-            response = self.agent.run(prompt)
-            clean_answer = self.sanitize(response)
-            st.write(f"✅ Final Answer: {clean_answer}")
-            return clean_answer or "N/A"
-        except Exception as e:
-            st.error(f"⚠️ Agent failed: {e}")
-            return "N/A"
-# =======================================
-# 2. Streamlit UI and GAIA Leaderboard
-# =======================================
-st.set_page_config(page_title="GAIA Final Assignment", layout="centered")
-st.title("🤖 GAIA Benchmark Final Assignment")
-st.markdown(
     """
-Welcome to your **Final Assignment** for the Agents course!
-This app evaluates your custom agent on a subset of **GAIA Level 1** benchmark questions.
-To pass and earn your certificate 🏅, your agent must score **≥ 30% accuracy**.
----
-### 🧠 Steps
-1. Log in to your **Hugging Face** account.
-2. Run your **agent** on the GAIA dataset.
-3. Automatically submit your results for scoring.
----
-"""
-)
-# =========================
-# 3. Login Section
-# =========================
-hf_token = st.text_input("🔑 Enter your Hugging Face access token:", type="password")
-if st.button("Login to Hugging Face"):
     try:
-        login(token=hf_token)
-        st.success("✅ Logged in successfully!")
     except Exception as e:
-        st.error(f"Login failed: {e}")
-# =========================
-# 4. Load GAIA Questions
-# =========================
-if st.button("🧩 Load GAIA Dataset"):
-    st.info("Fetching 20 GAIA Level 1 questions...")
-    os.system("wget -q https://huggingface.co/spaces/agents-course/Final_Assignment_Template/resolve/main/questions.json -O questions.json")
-    st.success("✅ Dataset loaded!")
-# =========================
-# 5. Run Evaluation
-# =========================
-if st.button("🚀 Run Evaluation & Submit All Answers"):
-    if not os.path.exists("questions.json"):
-        st.warning("Please load the GAIA dataset first.")
-    else:
-        with open("questions.json", "r") as f:
-            data = json.load(f)
-        questions = data["questions"]
-        agent = BasicAgent()
-        results = {}
-        for i, q in enumerate(questions):
-            st.write(f"### Question {i+1}:")
-            st.write(q)
-            ans = agent(q)
-            results[q] = ans
-            time.sleep(1)
-        # Save answers
-        with open("answers.json", "w") as f:
-            json.dump(results, f, indent=2)
-        st.success("✅ All questions answered and saved as answers.json")
-        # Auto-submit via huggingface CLI (if supported)
-        st.info("📤 Submitting answers to GAIA leaderboard...")
-        os.system("python3 -m smolagents.eval_gaia submit answers.json")
-        st.success("🎉 Submission complete! Check your score on the leaderboard.")
-# =========================
-# 6. Notes
-# =========================
-st.markdown(
-    """
----
-### ℹ️ Notes
-- You can edit the agent logic inside the `BasicAgent` class to boost performance.
-- Use more reasoning, examples, or API calls for higher accuracy.
-- Make your Space **public** before submitting.
-Good luck on the GAIA leaderboard! 🌍
-"""
-)

 import os
+import gradio as gr
+import requests
+import pandas as pd
+# --- Constants ---
+DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Basic Agent Definition ---
+# 👉 You can customize this class with your own logic or tools
 class BasicAgent:
     def __init__(self):
+        print("✅ BasicAgent initialized.")
     def __call__(self, question: str) -> str:
+        print(f"🧠 Received question: {question[:60]}...")
+        # Default fixed answer (customize this)
+        fixed_answer = "This is a default answer."
+        print(f"💬 Returning: {fixed_answer}")
+        return fixed_answer
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """
+    Fetch all questions, run the agent, submit answers, and show results.
     """
+    space_id = os.getenv("SPACE_ID")  # Hugging Face Space ID
+    if profile:
+        username = profile.username
+        print(f"👤 User logged in: {username}")
+    else:
+        print("❌ User not logged in.")
+        return "Please login to Hugging Face first.", None
+    api_url = DEFAULT_API_URL
+    questions_url = f"{api_url}/questions"
+    submit_url = f"{api_url}/submit"
+    # 1️⃣ Create Agent
+    try:
+        agent = BasicAgent()
+    except Exception as e:
+        return f"Agent initialization failed: {e}", None
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "Local_Run"
+    print(f"📁 Agent code link: {agent_code}")
+    # 2️⃣ Fetch Questions
     try:
+        print("📡 Fetching questions...")
+        response = requests.get(questions_url, timeout=15)
+        response.raise_for_status()
+        questions_data = response.json()
+        if not questions_data:
+            return "Fetched question list is empty or invalid.", None
+        print(f"✅ Retrieved {len(questions_data)} questions.")
     except Exception as e:
+        return f"Error fetching questions: {e}", None
+    # 3️⃣ Run Agent
+    results_log = []
+    answers_payload = []
+    for item in questions_data:
+        task_id = item.get("task_id")
+        question_text = item.get("question")
+        if not task_id or question_text is None:
+            continue
+        try:
+            submitted_answer = agent(question_text)
+            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text,
+                "Submitted Answer": submitted_answer
+            })
+        except Exception as e:
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text,
+                "Submitted Answer": f"AGENT ERROR: {e}"
+            })
+    if not answers_payload:
+        return "No answers generated by the agent.", pd.DataFrame(results_log)
+    # 4️⃣ Submit Answers
+    submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
+        "answers": answers_payload
+    }
+    try:
+        print("📤 Submitting answers...")
+        response = requests.post(submit_url, json=submission_data, timeout=60)
+        response.raise_for_status()
+        result_data = response.json()
+        final_status = (
+            f"✅ Submission Successful!\n"
+            f"👤 User: {result_data.get('username')}\n"
+            f"🏁 Score: {result_data.get('score', 'N/A')}% "
+            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+            f"📝 Message: {result_data.get('message', 'No message received.')}"
+        )
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except Exception as e:
+        return f"Submission failed: {e}", pd.DataFrame(results_log)
+# --- Gradio Interface ---
+with gr.Blocks() as demo:
+    gr.Markdown("# 🤖 Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        ### Instructions:
+        1️⃣ Clone this space on your Hugging Face profile.
+        2️⃣ Modify the `BasicAgent` class with your logic.
+        3️⃣ Log in below and run evaluation.
+        ---
+        The process may take time (the agent answers all questions).
+        You can customize the agent with reasoning, search tools, or memory.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("🚀 Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=6, interactive=False)
+    results_table = gr.DataFrame(label="Questions and Agent Answers")
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
+if __name__ == "__main__":
+    print("🚀 Launching Gradio Interface...")
+    demo.launch(debug=True, share=False)