| |
|
|
| import streamlit as st |
| from smolagents import CodeAgent, DuckDuckGoSearchTool, PythonREPLTool, HfApiModel |
| from huggingface_hub import login |
| import json |
| import time |
| import os |
|
|
| |
| |
| |
| class BasicAgent: |
| def __init__(self): |
| st.write("π§ Initializing enhanced GAIA Agent...") |
|
|
| |
| self.model = HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct") |
|
|
| |
| self.tools = [ |
| DuckDuckGoSearchTool(), |
| PythonREPLTool() |
| ] |
|
|
| |
| self.agent = CodeAgent( |
| tools=self.tools, |
| model=self.model, |
| name="GAIA_Level1_Agent", |
| description="Hybrid reasoning agent using web + code execution to answer GAIA L1 questions.", |
| max_steps=5 |
| ) |
|
|
| def sanitize(self, text: str) -> str: |
| """Clean and simplify final outputs for benchmark scoring.""" |
| if not text: |
| return "" |
| text = text.strip() |
| for prefix in ["FINAL ANSWER:", "Final Answer:", "Answer:", "answer:"]: |
| if text.startswith(prefix): |
| text = text[len(prefix):].strip() |
| if text.startswith('"') and text.endswith('"'): |
| text = text[1:-1] |
| text = " ".join(text.split()) |
| return text |
|
|
| def __call__(self, question: str) -> str: |
| """Run the agent on a single GAIA question.""" |
| st.write(f"π€ Running agent on: {question[:80]}...") |
| prompt = ( |
| "You are a concise reasoning agent. " |
| "Use your tools to find accurate answers. " |
| "Always return only the final answer (no explanations).\n\n" |
| f"Question: {question}" |
| ) |
|
|
| try: |
| response = self.agent.run(prompt) |
| clean_answer = self.sanitize(response) |
| st.write(f"β
Final Answer: {clean_answer}") |
| return clean_answer or "N/A" |
| except Exception as e: |
| st.error(f"β οΈ Agent failed: {e}") |
| return "N/A" |
|
|
|
|
| |
| |
| |
| st.set_page_config(page_title="GAIA Final Assignment", layout="centered") |
|
|
| st.title("π€ GAIA Benchmark Final Assignment") |
| st.markdown( |
| """ |
| Welcome to your **Final Assignment** for the Agents course! |
| |
| This app evaluates your custom agent on a subset of **GAIA Level 1** benchmark questions. |
| To pass and earn your certificate π
, your agent must score **β₯ 30% accuracy**. |
| |
| --- |
| |
| ### π§ Steps |
| 1. Log in to your **Hugging Face** account. |
| 2. Run your **agent** on the GAIA dataset. |
| 3. Automatically submit your results for scoring. |
| |
| --- |
| """ |
| ) |
|
|
| |
| |
| |
| hf_token = st.text_input("π Enter your Hugging Face access token:", type="password") |
| if st.button("Login to Hugging Face"): |
| try: |
| login(token=hf_token) |
| st.success("β
Logged in successfully!") |
| except Exception as e: |
| st.error(f"Login failed: {e}") |
|
|
| |
| |
| |
| if st.button("π§© Load GAIA Dataset"): |
| st.info("Fetching 20 GAIA Level 1 questions...") |
| os.system("wget -q https://huggingface.co/spaces/agents-course/Final_Assignment_Template/resolve/main/questions.json -O questions.json") |
| st.success("β
Dataset loaded!") |
|
|
| |
| |
| |
| if st.button("π Run Evaluation & Submit All Answers"): |
| if not os.path.exists("questions.json"): |
| st.warning("Please load the GAIA dataset first.") |
| else: |
| with open("questions.json", "r") as f: |
| data = json.load(f) |
| questions = data["questions"] |
|
|
| agent = BasicAgent() |
| results = {} |
|
|
| for i, q in enumerate(questions): |
| st.write(f"### Question {i+1}:") |
| st.write(q) |
| ans = agent(q) |
| results[q] = ans |
| time.sleep(1) |
|
|
| |
| with open("answers.json", "w") as f: |
| json.dump(results, f, indent=2) |
| st.success("β
All questions answered and saved as answers.json") |
|
|
| |
| st.info("π€ Submitting answers to GAIA leaderboard...") |
| os.system("python3 -m smolagents.eval_gaia submit answers.json") |
| st.success("π Submission complete! Check your score on the leaderboard.") |
|
|
| |
| |
| |
| st.markdown( |
| """ |
| --- |
| ### βΉοΈ Notes |
| - You can edit the agent logic inside the `BasicAgent` class to boost performance. |
| - Use more reasoning, examples, or API calls for higher accuracy. |
| - Make your Space **public** before submitting. |
| |
| Good luck on the GAIA leaderboard! π |
| """ |
| ) |
|
|