Final_Assignment_Template

Sleeping

App Files Files Community

jonathan9879 commited on Jun 8, 2025

Commit

75a1136

verified ·

1 Parent(s): 39af7e5

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -66

app.py CHANGED Viewed

@@ -9,119 +9,136 @@ from google.generativeai.types import HarmCategory, HarmBlockThreshold
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- User's Corrected NativeGeminiAgent Class ---
-# This is the superior implementation provided by you.
-class NativeGeminiAgent:
-    def __init__(self, gemini_api_key: str, api_url: str):
-        print("Initializing NativeGeminiAgent with corrected configuration...")
         genai.configure(api_key=gemini_api_key)
         self.api_url = api_url
-        self.model_name = 'gemini-2.5-flash-preview-05-20' # Using the stable, powerful model
-        # Correct tool configuration using the recommended string-based method
         self.model = genai.GenerativeModel(
             model_name=self.model_name,
-            tools=['google_search_retrieval'],
-            system_instruction="""You are a world-class problem solver and researcher.
-            Analyze the question carefully, use available tools to gather information,
-            and provide accurate, concise answers. Focus on factual information and
-            avoid speculation.""",
             safety_settings={
                 HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
-                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
-                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
-                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
             }
         )
-        print(f"Agent initialized with {self.model_name} and Google Search grounding.")
     def _get_mime_type(self, url: str) -> str:
-        """Enhanced MIME type detection."""
         url_lower = url.lower()
         if url_lower.endswith(('.jpg', '.jpeg')): return "image/jpeg"
         elif url_lower.endswith('.png'): return "image/png"
-        elif url_lower.endswith('.gif'): return "image/gif"
         elif url_lower.endswith('.pdf'): return "application/pdf"
-        elif url_lower.endswith('.txt'): return "text/plain"
-        elif url_lower.endswith('.csv'): return "text/csv"
-        elif url_lower.endswith(('.mp4', '.avi', '.mov')): return "video/mp4"
-        elif url_lower.endswith('.json'): return "application/json"
         else: return "application/octet-stream"
     def _check_if_file_exists(self, url: str) -> bool:
-        """Enhanced file existence check."""
         try:
             response = requests.head(url, timeout=15, allow_redirects=True)
             return response.status_code == 200
-        except requests.exceptions.RequestException as e:
-            print(f"File check failed for {url}: {e}")
             return False
     def __call__(self, question: str, task_id: str) -> str:
         print(f"\n{'='*20}\nProcessing Task ID: {task_id}")
-        prompt_parts = [question]
-        # Enhanced URL detection
         urls_in_question = re.findall(r'https?://[^\s<>"{}|\\^`\[\]]+', question)
         for url in urls_in_question:
             try:
                 mime_type = self._get_mime_type(url)
                 prompt_parts.append(genai.Part.from_uri(uri=url, mime_type=mime_type))
-                print(f"Added URL: {url} (MIME: {mime_type})")
-            except Exception as e:
-                print(f"Failed to add URL {url}: {e}")
-        # Check for associated files
         file_url = f"{self.api_url}/files/{task_id}"
         if self._check_if_file_exists(file_url):
             try:
                 mime_type = self._get_mime_type(file_url)
                 prompt_parts.append(genai.Part.from_uri(uri=file_url, mime_type=mime_type))
-                print(f"Added file: {file_url} (MIME: {mime_type})")
-            except Exception as e:
-                print(f"Failed to add file {file_url}: {e}")
-        try:
-            # Use the specified generation config for more stable outputs
-            response = self.model.generate_content(
-                prompt_parts,
-                request_options={'timeout': 120},
-                generation_config=genai.types.GenerationConfig(
-                    temperature=0.1,
-                    top_p=0.8,
-                    max_output_tokens=2048
                 )
-            )
-            if response.text:
-                # Thoroughly clean the response text
-                final_answer = response.text.strip()
-                final_answer = re.sub(r'\[\d+\]', '', final_answer) # Remove citations
-                final_answer = re.sub(r'\s+', ' ', final_answer).strip() # Normalize whitespace
-                return final_answer
             else:
-                return "AGENT_ERROR: Empty response from model"
-        except Exception as e:
-            error_msg = f"AGENT_ERROR: {str(e)}"
-            print(error_msg)
-            return error_msg
 # --- Main run_and_submit_all function ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
-    if not profile: return "Please Login to Hugging Face with the button.", None
     username = f"{profile.username}"
     gemini_key = os.getenv("GEMINI_API_KEY")
-    if not gemini_key: return "CRITICAL ERROR: GEMINI_API_KEY not found in Space secrets.", None
     api_url = DEFAULT_API_URL
     try:
-        agent = NativeGeminiAgent(gemini_api_key=gemini_key, api_url=api_url)
         questions_data = requests.get(f"{api_url}/questions", timeout=15).json()
     except Exception as e: return f"Error during setup: {e}", None
@@ -134,11 +151,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             error_message = f"AGENT CRASH: {e}"
-             print(error_message)
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": error_message})
-        print(f"--- Waiting for 10 seconds before next question... ---")
         time.sleep(10)
     if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
@@ -160,8 +175,8 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Native Multi-Modal GAIA Agent (Corrected)")
-    gr.Markdown("This agent uses the improved architecture with proper tool configuration, MIME type detection, and error handling.")
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)

 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+MAX_ITERATIONS = 7
+# --- HYBRID: Re-introducing the WebSearchTool ---
+class WebSearchTool:
+    """A tool to search the web using the Perplexity API."""
+    def __init__(self, api_key):
+        self.api_key = api_key
+        self.url = "https://api.perplexity.ai/chat/completions"
+        print("WebSearchTool initialized.")
+    def execute(self, query: str) -> str:
+        print(f"Executing WebSearchTool with query: {query}")
+        payload = {"model": "llama-3-sonar-small-32k-online", "messages": [{"role": "system", "content": "You are a research assistant. Provide a precise and factual answer to the query."}, {"role": "user", "content": query}]}
+        headers = {"accept": "application/json", "content-type": "application/json", "Authorization": f"Bearer {self.api_key}"}
+        try:
+            response = requests.post(self.url, json=payload, headers=headers, timeout=40)
+            response.raise_for_status()
+            return response.json()['choices'][0]['message']['content']
+        except requests.exceptions.RequestException as e:
+            return f"Error: Web search failed. {e}"
+# --- The New Hybrid Agent ---
+class HybridAgent:
+    def __init__(self, gemini_api_key: str, pplx_api_key: str, api_url: str):
+        print("Initializing HybridAgent...")
         genai.configure(api_key=gemini_api_key)
         self.api_url = api_url
+        self.web_search_tool = WebSearchTool(pplx_api_key)
+        # Using the stable, powerful model that we know works.
+        self.model_name = 'gemini-2.5-flash-preview-05-20'
+        # HYBRID: We do NOT enable the native search tool, as it's not supported by all models.
         self.model = genai.GenerativeModel(
             model_name=self.model_name,
+            system_instruction="""You are a powerful reasoning agent. You can understand files and URLs provided to you directly.
+For general web searches or to find new information, you MUST use the `WebSearch` tool.
+Follow the ReAct format: Thought, Action, Observation, Final Answer.""",
             safety_settings={
                 HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
+                # Add other categories as needed
             }
         )
+        print(f"Agent initialized with {self.model_name} and an external WebSearchTool.")
     def _get_mime_type(self, url: str) -> str:
+        # (Using the robust MIME type detection from your last recommendation)
         url_lower = url.lower()
         if url_lower.endswith(('.jpg', '.jpeg')): return "image/jpeg"
         elif url_lower.endswith('.png'): return "image/png"
         elif url_lower.endswith('.pdf'): return "application/pdf"
+        # Add other types as needed...
         else: return "application/octet-stream"
     def _check_if_file_exists(self, url: str) -> bool:
         try:
             response = requests.head(url, timeout=15, allow_redirects=True)
             return response.status_code == 200
+        except requests.exceptions.RequestException:
             return False
     def __call__(self, question: str, task_id: str) -> str:
         print(f"\n{'='*20}\nProcessing Task ID: {task_id}")
+        # --- HYBRID: Multi-modal part preparation ---
+        prompt_parts = [
+            "You will solve the following question. You have been provided with the question and any relevant files or URLs.",
+            "Remember, for web searches, you must use the `WebSearch` tool in the ReAct format (Thought, Action, Observation).",
+            f"\n--- QUESTION ---\n{question}"
+        ]
         urls_in_question = re.findall(r'https?://[^\s<>"{}|\\^`\[\]]+', question)
         for url in urls_in_question:
             try:
                 mime_type = self._get_mime_type(url)
                 prompt_parts.append(genai.Part.from_uri(uri=url, mime_type=mime_type))
+                print(f"Appended URL to prompt parts: {url}")
+            except Exception as e: print(f"Failed to add URL {url}: {e}")
         file_url = f"{self.api_url}/files/{task_id}"
         if self._check_if_file_exists(file_url):
             try:
                 mime_type = self._get_mime_type(file_url)
                 prompt_parts.append(genai.Part.from_uri(uri=file_url, mime_type=mime_type))
+                print(f"Appended file to prompt parts: {file_url}")
+            except Exception as e: print(f"Failed to add file {file_url}: {e}")
+        # --- HYBRID: ReAct Loop ---
+        for i in range(MAX_ITERATIONS):
+            print(f"\n--- Hybrid Iteration {i+1} ---")
+            try:
+                response = self.model.generate_content(
+                    prompt_parts,
+                    generation_config=genai.types.GenerationConfig(temperature=0.1)
                 )
+                response_text = response.text
+            except Exception as e: return f"AGENT_ERROR: {e}"
+            print(f"LLM Response:\n{response_text}")
+            final_answer_match = re.search(r"Final Answer:\s*(.*)", response_text, re.DOTALL)
+            if final_answer_match:
+                return final_answer_match.group(1).strip()
+            action_match = re.search(r"Action:\s*WebSearch\[(.*?)\]", response_text, re.DOTALL)
+            if action_match:
+                query = action_match.group(1).strip()
+                observation = self.web_search_tool.execute(query)
+                prompt_parts.append(f"\nThought: {response_text.split('Thought:')[1]}")
+                prompt_parts.append(f"Observation: {observation}")
             else:
+                # If the model gives a direct answer without the "Final Answer:" tag
+                return response_text.strip()
+        return "AGENT_ERROR: Max iterations reached."
 # --- Main run_and_submit_all function ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     space_id = os.getenv("SPACE_ID")
+    if not profile: return "Please Login to Hugging Face.", None
     username = f"{profile.username}"
+    # HYBRID: We need both keys again.
     gemini_key = os.getenv("GEMINI_API_KEY")
+    pplx_key = os.getenv("PPLX_API_KEY")
+    if not gemini_key or not pplx_key: return "CRITICAL ERROR: GEMINI_API_KEY or PPLX_API_KEY not found.", None
     api_url = DEFAULT_API_URL
     try:
+        agent = HybridAgent(gemini_api_key=gemini_key, pplx_api_key=pplx_key, api_url=api_url)
         questions_data = requests.get(f"{api_url}/questions", timeout=15).json()
     except Exception as e: return f"Error during setup: {e}", None
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
+             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT CRASH: {e}"})
+        print(f"--- Waiting for 10 seconds... ---")
         time.sleep(10)
     if not answers_payload: return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
 # --- Gradio Interface ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Hybrid GAIA Agent")
+    gr.Markdown("This agent uses Gemini 1.5 Pro's native multi-modality (files, URLs) combined with an external Perplexity web search tool.")
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)