Final_Assignment_Template

Sleeping

App Files Files Community

bhotta commited on 15 days ago

Commit

5ebb577

verified ·

1 Parent(s): d6e9174

Update app.py

Browse files

Files changed (1) hide show

app.py +178 -50

app.py CHANGED Viewed

@@ -2,76 +2,215 @@ import os
 import gradio as gr
 import requests
 import pandas as pd
-from smolagents import CodeAgent, DuckDuckGoSearchTool, OpenAIServerModel
-# --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- OpenAI-Powered Agent Definition ---
 class BasicAgent:
     def __init__(self):
-        # 1. Get the key from the environment (Must be set in HF Space Secrets)
         api_key = os.getenv("OPENAI_API_KEY")
         if not api_key:
-            raise ValueError("OPENAI_API_KEY is missing! Add it to your Space Secrets.")
-        # 2. Initialize the Model (GPT-4o is recommended for GAIA tasks)
         self.model = OpenAIServerModel(
-            model_id="gpt-4o",
             api_key=api_key
         )
-        # 3. Initialize the Agent with tools
         self.agent = CodeAgent(
-            tools=[DuckDuckGoSearchTool()],
             model=self.model,
-            add_base_tools=True
         )
         print("✅ OpenAI-powered Agent initialized.")
-    def __call__(self, question: str) -> str:
         print(f"DEBUG: Agent received question: {question[:100]}...")
-        # Formatting the prompt for precise GAIA evaluation
         prompt = (
-            f"You are a helpful agent. Task: {question}\n\n"
-            "Provide ONLY the final direct answer. No explanations, no 'The answer is...', "
-            "just the value or fact requested."
         )
         try:
             result = self.agent.run(prompt)
             return str(result).strip()
         except Exception as e:
-            print(f"❌ Error during agent execution: {e}")
             return "Error finding answer."
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    """
-    Fetches GAIA questions, runs the BasicAgent, and submits to the leaderboard.
-    """
-    # 1. Check Login
     if profile:
         username = f"{profile.username}"
         print(f"Logged in as: {username}")
     else:
-        return "Please Login to Hugging Face with the button above first.", None
-    # 2. Setup URLs and Paths
-    space_id = os.getenv("SPACE_ID")
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
-    # 3. Instantiate Agent
     try:
         agent = BasicAgent()
     except Exception as e:
         return f"Initialization Failed: {e}", None
-    # 4. Fetch Questions
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
@@ -79,26 +218,22 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     except Exception as e:
         return f"Error fetching questions: {e}", None
-    # 5. Run Agent on Questions
     results_log = []
     answers_payload = []
-    # NOTE: This loop can take several minutes!
     for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Answer": submitted_answer})
         except Exception as e:
-            results_log.append({"Task ID": task_id, "Question": question_text, "Answer": f"Error: {e}"})
-    # 6. Submit to Leaderboard
     submission_data = {
-        "username": username.strip(),
-        "agent_code": agent_code,
         "answers": answers_payload
     }
@@ -106,7 +241,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         res = response.json()
         status = (
             f"Submission Successful!\n"
             f"Score: {res.get('score')}% ({res.get('correct_count')}/{res.get('total_attempted')})\n"
@@ -116,21 +250,15 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(results_log)
-# --- Gradio UI ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🤖 GAIA Agent Evaluation")
     gr.Markdown("Click Login, then Run to evaluate your agent on the GAIA dataset.")
     gr.LoginButton()
     run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary")
     status_output = gr.Textbox(label="Status", lines=4)
     results_table = gr.DataFrame(label="Agent Performance Log")
-    run_button.click(
-        fn=run_and_submit_all,
-        outputs=[status_output, results_table]
-    )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import requests
 import pandas as pd
+from smolagents import CodeAgent, OpenAIServerModel, tool
+from openai import OpenAI
+import base64
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Custom Tools ---
+@tool
+def search_web(query: str) -> str:
+    """Search the web using a query string. Returns search results as text."""
+    try:
+        from duckduckgo_search import DDGS
+        with DDGS() as ddgs:
+            results = list(ddgs.text(query, max_results=5))
+        return "\n\n".join(
+            f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body']}"
+            for r in results
+        )
+    except Exception as e:
+        return f"Search failed: {e}"
+@tool
+def visit_url(url: str) -> str:
+    """Fetch the text content of a webpage at the given URL."""
+    try:
+        headers = {"User-Agent": "Mozilla/5.0"}
+        resp = requests.get(url, headers=headers, timeout=15)
+        resp.raise_for_status()
+        # Basic HTML stripping
+        import re
+        text = re.sub(r'<[^>]+>', ' ', resp.text)
+        text = re.sub(r'\s+', ' ', text)
+        return text[:5000]  # limit to 5000 chars
+    except Exception as e:
+        return f"Failed to fetch URL: {e}"
+@tool
+def wikipedia_search(topic: str) -> str:
+    """Search Wikipedia for a topic and return a summary."""
+    try:
+        url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{topic.replace(' ', '_')}"
+        resp = requests.get(url, timeout=10)
+        resp.raise_for_status()
+        data = resp.json()
+        return data.get("extract", "No summary found.")
+    except Exception as e:
+        return f"Wikipedia search failed: {e}"
+@tool
+def analyze_image_from_url(image_url: str, question: str) -> str:
+    """Analyze an image from a URL using GPT-4o vision and answer a question about it."""
+    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    try:
+        response = client.chat.completions.create(
+            model="gpt-4o",
+            messages=[{
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": question}
+                ]
+            }],
+            max_tokens=500
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Image analysis failed: {e}"
+@tool
+def analyze_task_file(task_id: str, question: str) -> str:
+    """
+    Download and analyze a file attached to a GAIA task.
+    Returns analysis result based on the question.
+    task_id: the GAIA task ID
+    question: what to ask about the file
+    """
+    api_url = DEFAULT_API_URL
+    file_url = f"{api_url}/files/{task_id}"
+    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    try:
+        resp = requests.get(file_url, timeout=30)
+        resp.raise_for_status()
+        content_type = resp.headers.get("content-type", "")
+        file_bytes = resp.content
+        # Image files → vision
+        if any(x in content_type for x in ["image", "png", "jpeg", "jpg", "gif", "webp"]):
+            b64 = base64.b64encode(file_bytes).decode()
+            ext = content_type.split("/")[-1]
+            data_url = f"data:{content_type};base64,{b64}"
+            response = client.chat.completions.create(
+                model="gpt-4o",
+                messages=[{
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": data_url}},
+                        {"type": "text", "text": question}
+                    ]
+                }],
+                max_tokens=500
+            )
+            return response.choices[0].message.content
+        # Text/CSV/code files
+        elif any(x in content_type for x in ["text", "csv", "json", "html"]):
+            text_content = file_bytes.decode("utf-8", errors="ignore")[:8000]
+            response = client.chat.completions.create(
+                model="gpt-4o",
+                messages=[{
+                    "role": "user",
+                    "content": f"File content:\n{text_content}\n\nQuestion: {question}"
+                }],
+                max_tokens=500
+            )
+            return response.choices[0].message.content
+        # Audio → Whisper transcription
+        elif any(x in content_type for x in ["audio", "mp3", "wav", "m4a", "ogg"]):
+            import tempfile
+            suffix = "." + content_type.split("/")[-1]
+            with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as f:
+                f.write(file_bytes)
+                f.flush()
+                transcript = client.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=open(f.name, "rb")
+                )
+            return f"Transcript: {transcript.text}\n\nAnswer to '{question}': {transcript.text}"
+        else:
+            return f"File downloaded ({len(file_bytes)} bytes, type: {content_type}) but format not supported for analysis."
+    except Exception as e:
+        return f"File analysis failed: {e}"
+# --- Agent ---
 class BasicAgent:
     def __init__(self):
         api_key = os.getenv("OPENAI_API_KEY")
         if not api_key:
+            raise ValueError("OPENAI_API_KEY is missing!")
         self.model = OpenAIServerModel(
+            model_id="gpt-4o",
             api_key=api_key
         )
         self.agent = CodeAgent(
+            tools=[
+                search_web,
+                visit_url,
+                wikipedia_search,
+                analyze_image_from_url,
+                analyze_task_file,
+            ],
             model=self.model,
+            add_base_tools=True,
+            max_steps=12,
         )
         print("✅ OpenAI-powered Agent initialized.")
+    def __call__(self, question: str, task_id: str = "") -> str:
         print(f"DEBUG: Agent received question: {question[:100]}...")
         prompt = (
+            f"You are a precise research agent solving GAIA benchmark tasks.\n"
+            f"Task ID: {task_id}\n"
+            f"Task: {question}\n\n"
+            f"Instructions:\n"
+            f"- If the task mentions a file or attachment, use analyze_task_file(task_id='{task_id}', question=...) first.\n"
+            f"- If the task mentions a YouTube URL, search for information about its content.\n"
+            f"- Use wikipedia_search for factual lookups before broader web search.\n"
+            f"- Provide ONLY the final direct answer. No explanations, no 'The answer is...', "
+            f"just the exact value or fact requested."
         )
         try:
             result = self.agent.run(prompt)
             return str(result).strip()
         except Exception as e:
+            print(f"❌ Error: {e}")
             return "Error finding answer."
+# --- Gradio + Submission ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if profile:
         username = f"{profile.username}"
         print(f"Logged in as: {username}")
     else:
+        return "Please Login to Hugging Face first.", None
+    space_id = os.getenv("SPACE_ID")
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     try:
         agent = BasicAgent()
     except Exception as e:
         return f"Initialization Failed: {e}", None
     try:
         response = requests.get(questions_url, timeout=15)
         response.raise_for_status()
     except Exception as e:
         return f"Error fetching questions: {e}", None
     results_log = []
     answers_payload = []
     for item in questions_data:
+        task_id = item.get("task_id", "")
+        question_text = item.get("question", "")
         try:
+            submitted_answer = agent(question_text, task_id=task_id)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            results_log.append({"Task ID": task_id, "Question": question_text[:80], "Answer": submitted_answer})
         except Exception as e:
+            results_log.append({"Task ID": task_id, "Question": question_text[:80], "Answer": f"Error: {e}"})
     submission_data = {
+        "username": username.strip(),
+        "agent_code": agent_code,
         "answers": answers_payload
     }
         response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         res = response.json()
         status = (
             f"Submission Successful!\n"
             f"Score: {res.get('score')}% ({res.get('correct_count')}/{res.get('total_attempted')})\n"
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(results_log)
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🤖 GAIA Agent Evaluation")
     gr.Markdown("Click Login, then Run to evaluate your agent on the GAIA dataset.")
     gr.LoginButton()
     run_button = gr.Button("🚀 Run Evaluation & Submit", variant="primary")
     status_output = gr.Textbox(label="Status", lines=4)
     results_table = gr.DataFrame(label="Agent Performance Log")
+    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 if __name__ == "__main__":
+    demo.launch(ssr_mode=False)