OpenDeepResearch

Runtime error

App Files Files Community

Leonardo commited on Mar 25, 2025

Commit

8df2ba2

verified ·

1 Parent(s): cb66cbc

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -135

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import json
 import contextlib
-import mimetypes  # Added missing import
 import os
-import re  # Added missing import
-import shutil  # Added missing import
 from typing import Dict, List, Optional, Any
 from dotenv import load_dotenv
@@ -22,106 +22,18 @@ from scripts.text_web_browser import (
 )
 from scripts.visual_qa import visualizer
-# from scripts.flux_lora_tool import FluxLoRATool
 from smolagents import (
     CodeAgent,
     HfApiModel,
     LiteLLMModel,
-    OpenAIServerModel,  # Added missing model
-    TransformersModel,  # Added missing model
     Tool,
 )
 from smolagents.agent_types import AgentText, AgentImage, AgentAudio
 from smolagents.gradio_ui import pull_messages_from_step, handle_agent_output_types
-class GoogleSearchTool(Tool):
-    """Performs Google web searches using the Serper API."""
-    name = "web_search"
-    description = """Performs a google web search for your query then returns a string of the top search results."""
-    inputs = {
-        "query": {"type": "string", "description": "The search query to perform."},
-        "filter_year": {
-            "type": "integer",
-            "description": "Optionally restrict results to a certain year",
-            "nullable": True,
-        },
-    }
-    output_type = "string"
-    def __init__(self):
-        """Initialize the tool with API key from environment."""
-        super().__init__(self)
-        self.serpapi_key = os.getenv("SERPER_API_KEY")
-        self._validate_dependencies()
-    def _validate_dependencies(self):
-        """Ensure API key is available."""
-        if not self.serpapi_key:
-            raise ValueError(
-                "Missing SerpAPI key. Make sure you have 'SERPER_API_KEY' in your env variables."
-            )
-    def forward(self, query: str, filter_year: Optional[int] = None) -> str:
-        """Execute the search query and return formatted results."""
-        import requests
-        params = {
-            "engine": "google",
-            "q": query,
-            "api_key": self.serpapi_key,
-            "google_domain": "google.com",
-        }
-        headers = {"X-API-KEY": self.serpapi_key, "Content-Type": "application/json"}
-        if filter_year is not None:
-            params["tbs"] = (
-                f"cdr:1,cd_min:01/01/{filter_year},cd_max:12/31/{filter_year}"
-            )
-        response = requests.request(
-            "POST",
-            "https://google.serper.dev/search",
-            headers=headers,
-            data=json.dumps(params),
-        )
-        if response.status_code == 200:
-            results = response.json()
-        else:
-            raise ValueError(response.json())
-        if "organic" not in results.keys() or len(results["organic"]) == 0:
-            year_filter_message = (
-                f" with filter year={filter_year}" if filter_year is not None else ""
-            )
-            return f"No results found for '{query}'{year_filter_message}. Try with a more general query, or remove the year filter."
-        return self._format_search_results(results["organic"])
-    def _format_search_results(self, organic_results: List[Dict[str, Any]]) -> str:
-        """Format organic search results into a readable string."""
-        web_snippets = []
-        for idx, page in enumerate(organic_results):
-            date_published = (
-                f"\nDate published: {page['date']}" if "date" in page else ""
-            )
-            source = f"\nSource: {page['source']}" if "source" in page else ""
-            snippet = f"\n{page['snippet']}" if "snippet" in page else ""
-            formatted_result = f"{idx}. [{page['title']}]({page['link']}){date_published}{source}\n{snippet}"
-            formatted_result = formatted_result.replace(
-                "Your browser can't play this video.", ""
-            )
-            web_snippets.append(formatted_result)
-        return "## Search Results\n" + "\n\n".join(web_snippets)
 # Constants and configurations
 AUTHORIZED_IMPORTS = [
     "requests",
@@ -148,16 +60,6 @@ AUTHORIZED_IMPORTS = [
     "csv",
 ]
-# Configuration setup
-def setup_environment():
-    """Initialize environment variables and authentication."""
-    load_dotenv(override=True)
-    login(os.getenv("HF_TOKEN"))
-    print("TOKKKK", os.getenv("HF_TOKEN")[-10:])
-# Browser configuration
 user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
 BROWSER_CONFIG = {
     "viewport_size": 1024 * 5,
@@ -169,10 +71,32 @@ BROWSER_CONFIG = {
     "serpapi_key": os.getenv("SERPAPI_API_KEY"),
 }
-# Custom role conversions for model response handling
 custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
 class ModelManager:
     """Manages model loading and initialization."""
@@ -211,7 +135,6 @@ class ModelManager:
             print(f"✗ Couldn't load model: {e}")
             raise
 class ToolRegistry:
     """Manages tool initialization and organization."""
@@ -255,8 +178,7 @@ class ToolRegistry:
             print(f"✗ Couldn't initialize image generation tool: {e}")
             raise
-# Agent creation in a factory function
 def create_agent():
     """Creates a fresh agent instance with properly configured tools."""
     # Initialize model
@@ -274,7 +196,7 @@ def create_agent():
     gemma_vision_tool = ToolRegistry.load_vision_tools()
     # Combine all tools into a single list (not a tuple)
-    all_tools = [visualizer] + web_tools + [gemma_vision_tool]
     # Validate tools before creating agent
     for tool in all_tools:
@@ -292,21 +214,20 @@ def create_agent():
         planning_interval=4,
     )
 def stream_to_gradio(
     agent,
     task: str,
     reset_agent_memory: bool = False,
     additional_args: Optional[dict] = None,
 ):
-    """Runs an agent with the given task and streams messages as gradio ChatMessages."""
     for step_log in agent.run(
         task, stream=True, reset=reset_agent_memory, additional_args=additional_args
     ):
         for message in pull_messages_from_step(step_log):
             yield message
-    # Process final answer
     final_answer = step_log  # Last log is the run's final_answer
     final_answer = handle_agent_output_types(final_answer)
@@ -318,19 +239,19 @@ def stream_to_gradio(
     elif isinstance(final_answer, AgentImage):
         yield gr.ChatMessage(
             role="assistant",
-            content={"path": final_answer.to_string(), "mime_type": "image/png"},
-        )
     elif isinstance(final_answer, AgentAudio):
         yield gr.ChatMessage(
             role="assistant",
-            content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
-        )
     else:
         yield gr.ChatMessage(
             role="assistant", content=f"**Final answer:** {str(final_answer)}"
         )
 class GradioUI:
     """A one-line interface to launch your agent in Gradio."""
@@ -344,6 +265,7 @@ class GradioUI:
     def interact_with_agent(self, prompt, messages, session_state):
         """Main interaction handler with the agent."""
         # Get or create session-specific agent
         if "agent" not in session_state:
             session_state["agent"] = create_agent()
@@ -363,8 +285,9 @@ class GradioUI:
                 session_state["agent"], task=prompt, reset_agent_memory=False
             ):
                 messages.append(msg)
-                yield messages
-            yield messages
         except Exception as e:
             print(f"Error in interaction: {str(e)}")
             raise
@@ -373,13 +296,6 @@ class GradioUI:
         self,
         file,
         file_uploads_log,
-        allowed_file_types=[
-            "application/pdf",
-            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-            "text/plain",
-            "image/png",  # Add PNG MIME type
-            "video/mp4",  # Add MP4 MIME type"
-        ],
     ):
         """Handle file uploads with proper validation and security."""
         if file is None:
@@ -390,7 +306,7 @@ class GradioUI:
         except Exception as e:
             return gr.Textbox(f"Error: {e}", visible=True), file_uploads_log
-        if mime_type not in allowed_file_types:
             return gr.Textbox("File type disallowed", visible=True), file_uploads_log
         # Sanitize file name
@@ -410,6 +326,13 @@ class GradioUI:
         extension = type_to_ext.get(mime_type, "")
         sanitized_name = "".join(name_parts) + extension
         # Save the uploaded file to the specified folder
         file_path = os.path.join(self.file_upload_folder, sanitized_name)
         shutil.copy(file.name, file_path)
@@ -423,14 +346,14 @@ class GradioUI:
         message = text_input
         if len(file_uploads_log) > 0:
-            message += f"\nYou have been provided with these files, which might be helpful or not: {file_uploads_log}"
         return (
             message,
             gr.Textbox(
                 value="",
                 interactive=False,
-                placeholder="Please wait while Steps are getting populated",
             ),
             gr.Button(interactive=False),
         )
@@ -438,7 +361,7 @@ class GradioUI:
     def detect_device(self, request: gr.Request):
         """Detect whether the user is on mobile or desktop device."""
         if not request:
-            return "Unknown device"
         # Method 1: Check sec-ch-ua-mobile header
         is_mobile_header = request.headers.get("sec-ch-ua-mobile")
@@ -477,7 +400,7 @@ class GradioUI:
                 else:
                     return self._create_mobile_layout()
-        demo.launch(debug=True, **kwargs)
     def _create_desktop_layout(self):
         """Create the desktop layout with sidebar."""
@@ -512,7 +435,7 @@ class GradioUI:
                     gr.HTML(
                         """
                     <div style="display: flex; align-items: center; gap: 8px; font-family: system-ui, -apple-system, sans-serif;">
-                    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png"
                          style="width: 32px; height: 32px; object-fit: contain;" alt="logo">
                     <a target="_blank" href="https://github.com/huggingface/smolagents">
                         <b>huggingface/smolagents</b>
@@ -652,7 +575,7 @@ class GradioUI:
             [text_input, launch_research_btn],
         )
 def main():
     """Main entry point for the application."""
     # Initialize environment
@@ -662,8 +585,7 @@ def main():
     os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
     # Launch UI
-    GradioUI().launch()
 if __name__ == "__main__":
     main()

 import json
 import contextlib
+import mimetypes
 import os
+import re
+import shutil
 from typing import Dict, List, Optional, Any
 from dotenv import load_dotenv
 )
 from scripts.visual_qa import visualizer
 from smolagents import (
     CodeAgent,
     HfApiModel,
     LiteLLMModel,
+    OpenAIServerModel,
+    TransformersModel,
     Tool,
 )
 from smolagents.agent_types import AgentText, AgentImage, AgentAudio
 from smolagents.gradio_ui import pull_messages_from_step, handle_agent_output_types
+# ------------------------ Configuration and Setup ------------------------
 # Constants and configurations
 AUTHORIZED_IMPORTS = [
     "requests",
     "csv",
 ]
 user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
 BROWSER_CONFIG = {
     "viewport_size": 1024 * 5,
     "serpapi_key": os.getenv("SERPAPI_API_KEY"),
 }
 custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
+# Multimedia file types supported:
+ALLOWED_FILE_TYPES = [
+    "application/pdf",
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    "text/plain",
+    "image/png",
+    "image/jpeg",  # Added JPEG support
+    "image/gif",   # Added GIF support
+    "video/mp4",
+    "audio/mpeg",  # Added MP3 support
+    "audio/wav",   # Added WAV support
+    "audio/ogg",   # Added OGG support
+]
+def setup_environment():
+    """Initialize environment variables and authentication."""
+    load_dotenv(override=True)
+    if os.getenv("HF_TOKEN"):  # Check if token is actually set
+        login(os.getenv("HF_TOKEN"))
+        print("HF_TOKEN (last 10 characters):", os.getenv("HF_TOKEN")[-10:])
+    else:
+        print("HF_TOKEN not found in environment variables.")
+# ------------------------ Model and Tool Management ------------------------
 class ModelManager:
     """Manages model loading and initialization."""
             print(f"✗ Couldn't load model: {e}")
             raise
 class ToolRegistry:
     """Manages tool initialization and organization."""
             print(f"✗ Couldn't initialize image generation tool: {e}")
             raise
+# ------------------------ Agent Creation and Execution ------------------------
 def create_agent():
     """Creates a fresh agent instance with properly configured tools."""
     # Initialize model
     gemma_vision_tool = ToolRegistry.load_vision_tools()
     # Combine all tools into a single list (not a tuple)
+    all_tools = [visualizer] + web_tools + [gemma_vision_tool] + [image_generator]
     # Validate tools before creating agent
     for tool in all_tools:
         planning_interval=4,
     )
 def stream_to_gradio(
     agent,
     task: str,
     reset_agent_memory: bool = False,
     additional_args: Optional[dict] = None,
 ):
+    """Runs an agent with the given task and streams messages as Gradio ChatMessages."""
     for step_log in agent.run(
         task, stream=True, reset=reset_agent_memory, additional_args=additional_args
     ):
         for message in pull_messages_from_step(step_log):
             yield message
+    # Process final answer : Use a more comprehensive media output
     final_answer = step_log  # Last log is the run's final_answer
     final_answer = handle_agent_output_types(final_answer)
     elif isinstance(final_answer, AgentImage):
         yield gr.ChatMessage(
             role="assistant",
+            content= { "image": final_answer.to_string(), "type": "file" },
+        )  # Send as Gradio-compatible file object:
     elif isinstance(final_answer, AgentAudio):
         yield gr.ChatMessage(
             role="assistant",
+            content={ "audio": final_answer.to_string(), "type": "file" },
+        ) # Send as Gradio-compatible file object
     else:
         yield gr.ChatMessage(
             role="assistant", content=f"**Final answer:** {str(final_answer)}"
         )
+# ------------------------ Gradio UI Components ------------------------
 class GradioUI:
     """A one-line interface to launch your agent in Gradio."""
     def interact_with_agent(self, prompt, messages, session_state):
         """Main interaction handler with the agent."""
         # Get or create session-specific agent
         if "agent" not in session_state:
             session_state["agent"] = create_agent()
                 session_state["agent"], task=prompt, reset_agent_memory=False
             ):
                 messages.append(msg)
+                yield messages  # Yield messages after each step
+            yield messages  # Yield messages one last time
         except Exception as e:
             print(f"Error in interaction: {str(e)}")
             raise
         self,
         file,
         file_uploads_log,
     ):
         """Handle file uploads with proper validation and security."""
         if file is None:
         except Exception as e:
             return gr.Textbox(f"Error: {e}", visible=True), file_uploads_log
+        if mime_type not in ALLOWED_FILE_TYPES:
             return gr.Textbox("File type disallowed", visible=True), file_uploads_log
         # Sanitize file name
         extension = type_to_ext.get(mime_type, "")
         sanitized_name = "".join(name_parts) + extension
+        # Limit File Size, and Throw Error
+        max_file_size_mb = 50 # Define the limit
+        file_size_mb = os.path.getsize(file.name) / (1024 * 1024)  # Size in MB
+        if file_size_mb > max_file_size_mb:
+            return gr.Textbox(f"File size exceeds {max_file_size_mb} MB limit.", visible=True), file_uploads_log
         # Save the uploaded file to the specified folder
         file_path = os.path.join(self.file_upload_folder, sanitized_name)
         shutil.copy(file.name, file_path)
         message = text_input
         if len(file_uploads_log) > 0:
+            message += f"\nYou have been provided with these files, which might be helpful or not: {file_uploads_log}"  # Added file list
         return (
             message,
             gr.Textbox(
                 value="",
                 interactive=False,
+                placeholder="Processing...", # Changed placeholder.
             ),
             gr.Button(interactive=False),
         )
     def detect_device(self, request: gr.Request):
         """Detect whether the user is on mobile or desktop device."""
         if not request:
+            return "Unknown device"  # Handle case where request is none.
         # Method 1: Check sec-ch-ua-mobile header
         is_mobile_header = request.headers.get("sec-ch-ua-mobile")
                 else:
                     return self._create_mobile_layout()
+        demo.queue(max_size=20).launch(debug=True, **kwargs)  # Add queue with reasonable size
     def _create_desktop_layout(self):
         """Create the desktop layout with sidebar."""
                     gr.HTML(
                         """
                     <div style="display: flex; align-items: center; gap: 8px; font-family: system-ui, -apple-system, sans-serif;">
+                    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png"
                          style="width: 32px; height: 32px; object-fit: contain;" alt="logo">
                     <a target="_blank" href="https://github.com/huggingface/smolagents">
                         <b>huggingface/smolagents</b>
             [text_input, launch_research_btn],
         )
+# ------------------------ Execution ------------------------
 def main():
     """Main entry point for the application."""
     # Initialize environment
     os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
     # Launch UI
+    GradioUI(file_upload_folder="uploaded_files").launch()
 if __name__ == "__main__":
     main()