My_Final_Assignment

Runtime error

App Files Files Community

AlexDGenu commited on Jun 30, 2025

Commit

9ec3f06

1 Parent(s): 0ab201b

Refactor run_gaia_evaluation to integrate LiteLLMModel and update agent initialization.

Browse files

Files changed (1) hide show

app.py +52 -157

app.py CHANGED Viewed

@@ -1,18 +1,11 @@
 import os
-import gradio as gr
 import requests
-import pandas as pd
 from dotenv import load_dotenv
-from smolagents import CodeAgent, OpenAIServerModel, DuckDuckGoSearchTool
-# Load environment variables (including OPENAI_API_KEY)
 load_dotenv()
-# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 INSTRUCTIONS = """You are a general AI assistant. I will ask you a question. Report your thoughts, and then provide your final answer.
 CRITICAL FORMATTING RULES:
@@ -24,62 +17,34 @@ CRITICAL FORMATTING RULES:
 - For strings: no extra spaces, no punctuation unless part of the answer, lowercase
 - For numbers: just the number, no units, no commas, no currency symbols
 - Provide ONLY the answer as your final response, nothing else
 You have access to a web search tool to help you find accurate information. Use it when you need to look up facts."""
-# --- Smol Agent Definition ---
-class SmolAgent:
-    def __init__(self):
-        print("Initializing SmolAgent with OpenAI model...")
-        if not OPENAI_API_KEY:
-            raise ValueError("OPENAI_API_KEY not found. Please set it in your environment.")
-        # Initialize the OpenAI-backed model
-        self.model = OpenAIServerModel(
-            model_id="gpt-4o-mini",                  # or "gpt-4", "gpt-3.5-turbo", etc.
-            api_base="https://api.openai.com/v1",
-            api_key=OPENAI_API_KEY,
-        )
-        # Initialize the agent with tools and instructions
-        self.agent = CodeAgent(
-            tools=[DuckDuckGoSearchTool()],
-            model=self.model,
-            instructions=INSTRUCTIONS,
-            max_steps=7,
-        )
-        print("SmolAgent initialized with CodeAgent and DuckDuckGoSearchTool.")
-    def __call__(self, question: str) -> str:
-        print(f"\n🪐 Running on question:\n{question}\n")
-        try:
-            answer = self.agent.run(question)
-            print(f"✅ Agent's final answer: {answer}")
-            return str(answer)
-        except Exception as e:
-            import traceback
-            traceback.print_exc()
-            error_message = f"AGENT ERROR: {e}"
-            print(f"❌ {error_message}")
-            return error_message
-def run_gaia_evaluation(username: str):
-    """Run the complete GAIA evaluation and submit results"""
-    print("🚀 GAIA Benchmark Evaluation with ChatGPT")
     print("=" * 60)
     if not username:
-        return "❌ Please provide a username"
     print(f"👤 User: {username}")
-    # Initialize the agent
-    try:
-        agent = SmolAgent()
-    except Exception as e:
-        return f"❌ Failed to initialize agent: {e}"
-    # Fetch questions
     try:
         resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
         resp.raise_for_status()
@@ -87,121 +52,51 @@ def run_gaia_evaluation(username: str):
         questions = data if isinstance(data, list) else data.get("questions", [])
         print(f"📋 Loaded {len(questions)} questions")
     except requests.RequestException as e:
-        return f"❌ Error fetching questions: {e}"
-    # Process questions
     results = []
-    progress_log = []
     for i, q in enumerate(questions):
         task_id = q["task_id"]
         text = q["question"]
-        progress_log.append(f"❓ Question {i+1}: {text}")
         print(f"\n❓ Question {i+1}: {text}")
-        try:
-            result = agent(text)
-            result_str = str(result).strip()
-            # Take the last line as the answer
-            out = result_str.splitlines()[-1] if result_str else "AGENT ERROR: No response."
-            if out.startswith("{"):
-                out = "AGENT ERROR: No final answer."
-            out = out.strip().rstrip(".")
-            results.append({"task_id": task_id, "submitted_answer": out})
-            progress_log.append(f"✅ Answer: '{out}'")
-            print(f"✅ Answer: '{out}'")
-        except Exception as e:
-            error_msg = f"AGENT ERROR: {e}"
-            results.append({"task_id": task_id, "submitted_answer": error_msg})
-            progress_log.append(f"❌ Error: {error_msg}")
-            print(f"❌ Error: {error_msg}")
-    # Submit results
     payload = {
         "username": username,
-        "agent_code": "chatgpt-gpt4o-mini-with-tools",
         "answers": results,
     }
     try:
-        print("📤 Submitting to GAIA leaderboard...")
         post = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=60)
         post.raise_for_status()
         res = post.json()
-        # Format results for display
-        result_summary = f"""
-🏆 GAIA BENCHMARK RESULTS
-{'=' * 60}
-👤 User: {res.get('username', username)}
-📊 Overall Score: {res.get('score', res.get('overall_score', 'N/A'))}%
-✅ Correct: {res.get('correct_count', res.get('num_correct', 'N/A'))}/{len(results)}
-💬 Message: {res.get('message', 'N/A')}
-{'=' * 60}
-"""
-        # Combine progress log with final results
-        full_log = "\n".join(progress_log) + "\n" + result_summary
-        return full_log
     except requests.RequestException as e:
-        error_msg = f"❌ Error submitting: {e}"
         done = sum(1 for r in results if not r["submitted_answer"].startswith("AGENT ERROR"))
-        local_summary = f"📋 Completed locally: {done}/{len(results)}"
-        return "\n".join(progress_log) + "\n" + error_msg + "\n" + local_summary
-# --- Gradio Interface ---
-def create_interface():
-    with gr.Blocks(title="GAIA Benchmark with ChatGPT", theme=gr.themes.Soft()) as demo:
-        gr.Markdown("# 🚀 GAIA Benchmark Evaluation with ChatGPT")
-        gr.Markdown("This app runs the GAIA benchmark using ChatGPT (GPT-4o-mini) with web search capabilities.")
-        with gr.Row():
-            with gr.Column(scale=1):
-                username_input = gr.Textbox(
-                    label="Hugging Face Username",
-                    placeholder="Enter your HF username",
-                    info="This will be used for the GAIA leaderboard submission"
-                )
-                run_button = gr.Button("🚀 Run GAIA Evaluation", variant="primary", size="lg")
-            with gr.Column(scale=2):
-                output_area = gr.Textbox(
-                    label="Results & Progress",
-                    lines=20,
-                    max_lines=50,
-                    interactive=False
-                )
-        # Event handler
-        run_button.click(
-            fn=run_gaia_evaluation,
-            inputs=[username_input],
-            outputs=[output_area]
-        )
-        gr.Markdown("""
-        ### How it works:
-        1. Enter your Hugging Face username
-        2. Click "Run GAIA Evaluation"
-        3. The agent will process all 20 GAIA questions using ChatGPT + web search
-        4. Results will be automatically submitted to the GAIA leaderboard
-        5. Your score will be displayed here
-        ### Requirements:
-        - Set `OPENAI_API_KEY` in your environment variables
-        - Valid Hugging Face username for leaderboard submission
-        """)
-    return demo
-# --- Main execution ---
 if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch()

 import os
 import requests
 from dotenv import load_dotenv
+from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel
 load_dotenv()
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 INSTRUCTIONS = """You are a general AI assistant. I will ask you a question. Report your thoughts, and then provide your final answer.
 CRITICAL FORMATTING RULES:
 - For strings: no extra spaces, no punctuation unless part of the answer, lowercase
 - For numbers: just the number, no units, no commas, no currency symbols
 - Provide ONLY the answer as your final response, nothing else
+- Expand abbreviations like 'St.' to 'Saint' in city names
 You have access to a web search tool to help you find accurate information. Use it when you need to look up facts."""
+def run_gaia_evaluation():
+    print("🚀 GAIA Benchmark Evaluation with Ollama")
     print("=" * 60)
+    username = os.getenv("HF_USERNAME")
     if not username:
+        print("❌ Please set HF_USERNAME environment variable")
+        return
     print(f"👤 User: {username}")
+    model = LiteLLMModel(
+        model_id="ollama_chat/gemma3",
+        api_base="http://localhost:11434",
+        num_ctx=8192,
+        temperature=0.1,  # Low temperature for more deterministic answers
+    )
+    agent = CodeAgent(
+        tools=[DuckDuckGoSearchTool()],
+        model=model,
+        instructions=INSTRUCTIONS,
+        max_steps=10,
+    )
     try:
         resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
         resp.raise_for_status()
         questions = data if isinstance(data, list) else data.get("questions", [])
         print(f"📋 Loaded {len(questions)} questions")
     except requests.RequestException as e:
+        print(f"❌ Error fetching questions: {e}")
+        return
     results = []
     for i, q in enumerate(questions):
         task_id = q["task_id"]
         text = q["question"]
         print(f"\n❓ Question {i+1}: {text}")
+        result = agent.run(text, reset=True)
+        result_str = str(result).strip()
+        # Take the last line as the answer (since agent should provide only the answer)
+        out = result_str.splitlines()[-1] if result_str else "AGENT ERROR: No response."
+        if out.startswith("{"):
+            out = "AGENT ERROR: No final answer."
+        out = out.strip().rstrip(".")
+        results.append({"task_id": task_id, "submitted_answer": out})
+        print(f"✅ Answer: '{out}'")
+        print(f"📝 Preview: {result_str[:200]}...")
+    # Submit answers automatically
     payload = {
         "username": username,
+        "agent_code": "ollama-gemma3-with-tools",
         "answers": results,
     }
     try:
         post = requests.post(f"{DEFAULT_API_URL}/submit", json=payload, timeout=60)
         post.raise_for_status()
         res = post.json()
+        print("\n" + "=" * 60)
+        print("🏆 GAIA BENCHMARK RESULTS")
+        print("=" * 60)
+        print(f"👤 User: {res.get('username', username)}")
+        print(f"📊 Overall Score: {res.get('score', res.get('overall_score', 'N/A'))}%")
+        print(f"✅ Correct: {res.get('correct_count', res.get('num_correct', 'N/A'))}/{len(results)}")
+        print(f"💬 Message: {res.get('message', 'N/A')}")
+        print("=" * 60)
     except requests.RequestException as e:
+        print(f"❌ Error submitting: {e}")
         done = sum(1 for r in results if not r["submitted_answer"].startswith("AGENT ERROR"))
+        print(f"Completed locally: {done}/{len(results)}")
 if __name__ == "__main__":
+    run_gaia_evaluation()