Final_Assignment_Template2

Sleeping

App Files Files Community

lethaq commited on May 21

Commit

5d7f198

verified ·

1 Parent(s): 81917a3

Update app.py

Browse files

Files changed (1) hide show

app.py +247 -38

app.py CHANGED Viewed

@@ -3,25 +3,202 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-# (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
     def __init__(self):
-        print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
@@ -38,13 +215,20 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
@@ -80,9 +264,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             submitted_answer = agent(question_text)
-            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
@@ -91,23 +278,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
-    # 4. Prepare Submission
-    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
-    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
-    print(status_update)
-    # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
         )
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
@@ -142,19 +323,17 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
@@ -163,13 +342,41 @@ with gr.Blocks() as demo:
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
@@ -192,5 +399,7 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

 import requests
 import inspect
 import pandas as pd
+import json
+import time
+from typing import List, Dict, Any, Optional
+from litellm import completion
+from duckduckgo_search import DDGS
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Tool Implementations ---
+class DuckDuckGoSearchTool:
     def __init__(self):
+        self.name = "duckduckgo_search"
+        self.description = "Search the web using DuckDuckGo"
+    def search(self, query: str, max_results: int = 5) -> List[Dict[str, str]]:
+        """
+        Search the web using DuckDuckGo and return results.
+        Args:
+            query: The search query
+            max_results: Maximum number of results to return
+        Returns:
+            List of dictionaries with search results
+        """
+        try:
+            with DDGS() as ddgs:
+                results = list(ddgs.text(query, max_results=max_results))
+            return results
+        except Exception as e:
+            print(f"DuckDuckGo search error: {e}")
+            return [{"title": f"Search error: {e}", "body": "", "href": ""}]
+    def __call__(self, query: str, max_results: int = 5) -> Dict[str, Any]:
+        """
+        Execute the search and return results in a structured format.
+        Args:
+            query: The search query
+            max_results: Maximum number of results to return
+        Returns:
+            Dictionary with search results and metadata
+        """
+        start_time = time.time()
+        results = self.search(query, max_results)
+        end_time = time.time()
+        return {
+            "tool_name": self.name,
+            "query": query,
+            "results": results,
+            "result_count": len(results),
+            "time_taken": end_time - start_time
+        }
+# --- LiteLLM Model Wrapper ---
+class LiteLLMModel:
+    def __init__(self, model_id: str, api_key: str):
+        self.model_id = model_id
+        self.api_key = api_key
+        print(f"Initialized LiteLLM with model: {model_id}")
+    def generate(self, prompt: str, system_prompt: str = None) -> str:
+        """
+        Generate text using the LiteLLM model.
+        Args:
+            prompt: The user prompt
+            system_prompt: Optional system prompt
+        Returns:
+            Generated text response
+        """
+        try:
+            messages = []
+            if system_prompt:
+                messages.append({"role": "system", "content": system_prompt})
+            messages.append({"role": "user", "content": prompt})
+            response = completion(
+                model=self.model_id,
+                messages=messages,
+                api_key=self.api_key
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            print(f"LiteLLM generation error: {e}")
+            return f"Error generating response: {str(e)}"
+# --- Advanced Agent Implementation ---
+class CodeAgent:
+    def __init__(self, tools: List[Any], model: LiteLLMModel):
+        self.tools = tools
+        self.model = model
+        self.search_tool = next((tool for tool in tools if isinstance(tool, DuckDuckGoSearchTool)), None)
+        print(f"CodeAgent initialized with {len(tools)} tools and model {model.model_id}")
+    def format_search_results(self, results: List[Dict[str, str]]) -> str:
+        """Format search results into a readable string"""
+        formatted = "Search Results:\n"
+        for i, result in enumerate(results, 1):
+            formatted += f"{i}. {result.get('title', 'No title')}\n"
+            formatted += f"   {result.get('body', 'No description')[:200]}...\n"
+            formatted += f"   URL: {result.get('href', 'No URL')}\n\n"
+        return formatted
+    def create_prompt(self, question: str, search_results: Optional[List[Dict[str, str]]] = None) -> str:
+        """Create a prompt for the model with optional search results"""
+        prompt = f"Question: {question}\n\n"
+        if search_results:
+            prompt += self.format_search_results(search_results)
+        prompt += "\nPlease provide a concise, factual answer to the question. "
+        prompt += "Your answer should be direct and to the point, without any explanations or reasoning. "
+        prompt += "For example, if asked 'What is the capital of France?', just answer 'Paris'. "
+        prompt += "If asked for a numerical value, provide only the number. "
+        prompt += "If asked for a list, provide comma-separated values without numbering. "
+        prompt += "If you don't know the answer, respond with 'Unknown' rather than speculating.\n\n"
+        prompt += "Answer: "
+        return prompt
+    def create_system_prompt(self) -> str:
+        """Create a system prompt for the model"""
+        return (
+            "You are a helpful AI assistant specialized in answering factual questions. "
+            "You always provide direct, concise answers without explanations or reasoning. "
+            "Your answers are factual, accurate, and to the point. "
+            "For questions requiring specific formats, you follow those formats exactly. "
+            "You never include phrases like 'the answer is' or 'I believe' in your responses."
+        )
     def __call__(self, question: str) -> str:
+        """
+        Process a question and return an answer.
+        Args:
+            question: The question to answer
+        Returns:
+            The answer to the question
+        """
+        print(f"Agent received question: {question[:100]}...")
+        # Determine if we should use search for this question
+        should_search = (
+            "what is" in question.lower() or
+            "who is" in question.lower() or
+            "when" in question.lower() or
+            "where" in question.lower() or
+            "how many" in question.lower() or
+            "which" in question.lower()
+        )
+        search_results = None
+        if should_search and self.search_tool:
+            print(f"Searching for information about: {question}")
+            search_response = self.search_tool(question, max_results=3)
+            search_results = search_response.get("results", [])
+            print(f"Found {len(search_results)} search results")
+        # Create prompt and generate response
+        prompt = self.create_prompt(question, search_results)
+        system_prompt = self.create_system_prompt()
+        print("Generating response with LLM...")
+        response = self.model.generate(prompt, system_prompt)
+        # Clean up the response
+        answer = response.strip()
+        # Remove common prefixes that models tend to add
+        prefixes_to_remove = [
+            "Answer:", "The answer is:", "I believe", "I think",
+            "Based on", "According to", "The answer would be"
+        ]
+        for prefix in prefixes_to_remove:
+            if answer.startswith(prefix):
+                answer = answer[len(prefix):].strip()
+        # Remove quotes if they wrap the entire answer
+        if (answer.startswith('"') and answer.endswith('"')) or \
+           (answer.startswith("'") and answer.endswith("'")):
+            answer = answer[1:-1].strip()
+        print(f"Final answer: {answer[:100]}...")
+        return answer
+def run_and_submit_all(profile: gr.OAuthProfile | None):
     """
+    Fetches all questions, runs the Agent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent with Gemini model and DuckDuckGo search
     try:
+        # Get API key from environment variable
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            return "Error: GEMINI_API_KEY environment variable not found. Please set it in your Space settings.", None
+        model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-lite", api_key=api_key)
+        agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            print(f"Processing task {task_id}: {question_text[:50]}...")
             submitted_answer = agent(question_text)
+            # Important: Use "model_answer" as the key, not "submitted_answer"
+            answers_payload.append({"task_id": task_id, "model_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+            print(f"Answer for task {task_id}: {submitted_answer[:50]}...")
         except Exception as e:
              print(f"Error running agent on task {task_id}: {e}")
              results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
         print("Agent did not produce any answers to submit.")
         return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Submit answers directly as a list of dictionaries
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        # Important: Submit the answers_payload directly as JSON
+        response = requests.post(submit_url, json=answers_payload, timeout=60)
         response.raise_for_status()
         result_data = response.json()
         final_status = (
             f"Submission Successful!\n"
+            f"Score: {result_data.get('score', 'N/A')}% "
             f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
         )
         print("Submission successful.")
         results_df = pd.DataFrame(results_log)
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Gemini Agent for GAIA Benchmark")
     gr.Markdown(
         """
         **Instructions:**
+        1. Make sure you have set the GEMINI_API_KEY environment variable in your Space settings.
+        2. Log in to your Hugging Face account using the button below.
+        3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run the agent, and submit answers.
+        This agent uses:
+        - Gemini 2.0 Flash Lite model for reasoning
+        - DuckDuckGo search for retrieving information
         """
     )
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    # Add a single question test feature
+    gr.Markdown("## Test Single Question")
+    with gr.Row():
+        question_in = gr.Textbox(label="Question", lines=3)
+        answer_out = gr.Textbox(label="Answer", lines=3, interactive=False)
+    test_btn = gr.Button("Test Question", variant="secondary")
+    # Add a function to test a single question
+    def test_single_question(question):
+        try:
+            api_key = os.getenv("GEMINI_API_KEY")
+            if not api_key:
+                return "Error: GEMINI_API_KEY environment variable not found"
+            model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-lite", api_key=AIzaSyAhmwogxZFBtt7_OUsKQGNeOYF7ced39bM)
+            agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model)
+            answer = agent(question)
+            return answer
+        except Exception as e:
+            return f"Error: {str(e)}"
     run_button.click(
         fn=run_and_submit_all,
+        inputs=[gr.OAuthProfile()],
         outputs=[status_output, results_table]
     )
+    test_btn.click(
+        fn=test_single_question,
+        inputs=[question_in],
+        outputs=[answer_out]
+    )
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Gemini Agent Evaluation...")
+    demo.launch(debug=True, share=False)