OpenDeepResearch

Runtime error

App Files Files Community

Leonardo commited on Mar 28, 2025

Commit

c508178

verified ·

1 Parent(s): 1ab1fd4

Update app.py

Browse files

Files changed (1) hide show

app.py +261 -149

app.py CHANGED Viewed

@@ -1,8 +1,16 @@
 import mimetypes
 import os
 import re
 import shutil
-from typing import Optional
 from dotenv import load_dotenv
 from huggingface_hub import login
@@ -139,13 +147,13 @@ class ModelManager:
             if chosen_inference == "hf_api":
                 return HfApiModel(model_id=model_id)
-            elif chosen_inference == "hf_api_provider":
                 return HfApiModel(provider="together")
-            elif chosen_inference == "litellm":
                 return LiteLLMModel(model_id=model_id)
-            elif chosen_inference == "openai":
                 if not key_manager:
                     raise ValueError("Key manager required for OpenAI model")
@@ -153,15 +161,14 @@ class ModelManager:
                     model_id=model_id, api_key=key_manager.get_key("openai_api_key")
                 )
-            elif chosen_inference == "transformers":
                 return TransformersModel(
                     model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct",
                     device_map="auto",
                     max_new_tokens=1000,
                 )
-            else:
-                raise ValueError(f"Invalid inference type: {chosen_inference}")
         except Exception as e:
             print(f"✗ Couldn't load model: {e}")
@@ -205,7 +212,9 @@ class ToolRegistry:
             return Tool.from_space(
                 space_id="xkerser/FLUX.1-dev",
                 name="image_generator",
-                description="Generates high-quality AgentImage using the FLUX.1-dev model based on text prompts.",
             )
         except Exception as e:
             print(f"✗ Couldn't initialize image generation tool: {e}")
@@ -235,21 +244,38 @@ def create_agent():
         text_limit = 30000
         browser = SimpleTextBrowser(**BROWSER_CONFIG)
-        # Collect all tools in a single list
         web_tools = ToolRegistry.load_web_tools(model, browser, text_limit)
-        doc_tools = ToolRegistry.load_document_tools()  # New document tools
-        image_generator = ToolRegistry.load_image_generation_tools()
-        # Combine all tools into a single list
-        all_tools = [visualizer] + web_tools + doc_tools + [image_generator]
-        # Validate tools before creating agent
         for tool in all_tools:
-            if not isinstance(tool, Tool):
-                raise ValueError(
-                    f"Invalid tool type: {type(tool)}. "
-                    f"All tools must be instances of Tool class."
-                )
         return CodeAgent(
             model=model,
@@ -259,46 +285,55 @@ def create_agent():
             additional_authorized_imports=AUTHORIZED_IMPORTS,
             planning_interval=4,
         )
-    except (ValueError, RuntimeError) as e:
         print(f"Failed to create agent: {e}")
-        raise RuntimeError(f"Agent creation failed: {e}")
-def stream_to_gradio(
-    agent,
-    task: str,
-    reset_agent_memory: bool = False,
-    additional_args: Optional[dict] = None,
-):
     """Runs an agent with the given task and streams messages as Gradio ChatMessages."""
-    for step_log in agent.run(
-        task, stream=True, reset=reset_agent_memory, additional_args=additional_args
-    ):
-        for message in pull_messages_from_step(step_log):
-            yield message
-    # Process final answer : Use a more comprehensive media output
-    final_answer = step_log  # Last log is the run's final_answer
-    final_answer = handle_agent_output_types(final_answer)
-    if isinstance(final_answer, AgentText):
-        yield gr.ChatMessage(
-            role="assistant",
-            content=f"**Final answer:**\n{final_answer.to_string()}\n",
-        )
-    elif isinstance(final_answer, AgentImage):
-        yield gr.ChatMessage(
-            role="assistant",
-            content={"image": final_answer.to_string(), "type": "file"},
-        )  # Send as Gradio-compatible file object:
-    elif isinstance(final_answer, AgentAudio):
         yield gr.ChatMessage(
             role="assistant",
-            content={"audio": final_answer.to_string(), "type": "file"},
-        )  # Send as Gradio-compatible file object
-    else:
-        yield gr.ChatMessage(
-            role="assistant", content=f"**Final answer:** {str(final_answer)}"
         )
@@ -317,100 +352,134 @@ class GradioUI:
     def interact_with_agent(self, prompt, messages, session_state):
         """Main interaction handler with the agent."""
-        # Get or create session-specific agent
         if "agent" not in session_state:
-            session_state["agent"] = create_agent()
-        # Adding monitoring
-        try:
-            # Log the existence of agent memory
-            has_memory = hasattr(session_state["agent"], "memory")
-            print(f"Agent has memory: {has_memory}")
-            if has_memory:
-                print(f"Memory type: {type(session_state['agent'].memory)}")
-            messages.append(gr.ChatMessage(role="user", content=prompt))
-            yield messages
             for msg in stream_to_gradio(
-                session_state["agent"], task=prompt, reset_agent_memory=False
             ):
                 messages.append(msg)
-                yield messages  # Yield messages after each step
-            yield messages  # Yield messages one last time
         except Exception as e:
-            print(f"Error in interaction: {str(e)}")
-            raise
-    def upload_file(
-        self,
-        file,
-        file_uploads_log,
-    ):
-        """Handle file uploads with proper validation and security."""
         if file is None:
             return gr.Textbox("No file uploaded", visible=True), file_uploads_log
         try:
-            mime_type, _ = mimetypes.guess_type(file.name)
-        except Exception as e:
-            return gr.Textbox(f"Error: {e}", visible=True), file_uploads_log
-        if mime_type not in ALLOWED_FILE_TYPES:
-            return gr.Textbox("File type disallowed", visible=True), file_uploads_log
-        # Sanitize file name
-        original_name = os.path.basename(file.name)
-        sanitized_name = re.sub(
-            r"[^\w\-.]", "_", original_name
-        )  # Replace invalid chars with underscores
-        # Ensure the extension correlates to the mime type
-        type_to_ext = {}
-        for ext, t in mimetypes.types_map.items():
-            if t not in type_to_ext:
-                type_to_ext[t] = ext
-        # Build sanitized filename with proper extension
-        name_parts = sanitized_name.split(".")[:-1]
-        extension = type_to_ext.get(mime_type, "")
-        sanitized_name = "".join(name_parts) + extension
-        # Limit File Size, and Throw Error
-        max_file_size_mb = 50  # Define the limit
-        file_size_mb = os.path.getsize(file.name) / (1024 * 1024)  # Size in MB
-        if file_size_mb > max_file_size_mb:
             return (
-                gr.Textbox(
-                    f"File size exceeds {max_file_size_mb} MB limit.", visible=True
-                ),
                 file_uploads_log,
             )
-        # Save the uploaded file to the specified folder
-        file_path = os.path.join(self.file_upload_folder, sanitized_name)
-        shutil.copy(file.name, file_path)
-        return gr.Textbox(
-            f"File uploaded: {file_path}", visible=True
-        ), file_uploads_log + [file_path]
     def log_user_message(self, text_input, file_uploads_log):
-        """Process user message and handle file references."""
         message = text_input
         if len(file_uploads_log) > 0:
-            message += f"\nYou have been provided with these files, which might be helpful or not: {file_uploads_log}"  # Added file list
         return (
             message,
-            gr.Textbox(
-                value="",
-                interactive=False,
-                placeholder="Processing...",  # Changed placeholder.
-            ),
             gr.Button(interactive=False),
         )
@@ -460,68 +529,111 @@ class GradioUI:
         )  # Add queue with reasonable size
     def _create_desktop_layout(self):
-        """Create the desktop layout with sidebar."""
         with gr.Blocks(fill_height=True) as sidebar_demo:
             with gr.Sidebar():
                 gr.Markdown(
-                    """#OpenDeepResearch - 3theSmolagents!
-                Model_id: google/gemini-2.0-flash-001"""
                 )
                 with gr.Group():
-                    gr.Markdown("**What's on your mind mate?**", container=True)
                     text_input = gr.Textbox(
-                        lines=3,
                         label="Your request",
                         container=False,
-                        placeholder="Enter your prompt here and press Shift+Enter or press the button",
                     )
-                    launch_research_btn = gr.Button("Run", variant="primary")
-                # If an upload folder is provided, enable the upload feature
-                if self.file_upload_folder is not None:
-                    upload_file = gr.File(label="Upload a file")
-                    upload_status = gr.Textbox(
-                        label="Upload Status", interactive=False, visible=False
-                    )
-                    file_uploads_log = gr.State([])
-                    upload_file.change(
-                        self.upload_file,
-                        [upload_file, file_uploads_log],
-                        [upload_status, file_uploads_log],
-                    )
-                gr.HTML("<br><br><h4><center>Powered by:</center></h4>")
                 with gr.Row():
                     gr.HTML(
                         """
-                    <div style="display: flex; align-items: center; gap: 8px; font-family: system-ui, -apple-system, sans-serif;">
-                    <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png"
-                         style="width: 32px; height: 32px; object-fit: contain;" alt="logo">
-                    <a target="_blank" href="https://github.com/huggingface/smolagents">
-                        <b>huggingface/smolagents</b>
-                    </a>
-                    </div>
-                    """
                     )
-            # Add session state to store session-specific data
-            session_state = gr.State({})  # Initialize empty state for each session
             stored_messages = gr.State([])
             if "file_uploads_log" not in locals():
                 file_uploads_log = gr.State([])
             chatbot = gr.Chatbot(
-                label="open-Deep-Research",
                 type="messages",
                 avatar_images=(
                     None,
                     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
                 ),
-                resizeable=False,
                 scale=1,
                 elem_id="my-chatbot",
             )
             self._connect_event_handlers(
                 text_input,
                 launch_research_btn,

+"""
+OpenDeepResearch Web Interface Application
+This module provides a Gradio-based web interface for interacting with AI agents
+using the smolagents framework. It integrates document processing tools,
+web searching, and image generation capabilities.
+"""
 import mimetypes
 import os
 import re
 import shutil
+import datetime
 from dotenv import load_dotenv
 from huggingface_hub import login
             if chosen_inference == "hf_api":
                 return HfApiModel(model_id=model_id)
+            if chosen_inference == "hf_api_provider":
                 return HfApiModel(provider="together")
+            if chosen_inference == "litellm":
                 return LiteLLMModel(model_id=model_id)
+            if chosen_inference == "openai":
                 if not key_manager:
                     raise ValueError("Key manager required for OpenAI model")
                     model_id=model_id, api_key=key_manager.get_key("openai_api_key")
                 )
+            if chosen_inference == "transformers":
                 return TransformersModel(
                     model_id="HuggingFaceTB/SmolLM2-1.7B-Instruct",
                     device_map="auto",
                     max_new_tokens=1000,
                 )
+            raise ValueError(f"Invalid inference type: {chosen_inference}")
         except Exception as e:
             print(f"✗ Couldn't load model: {e}")
             return Tool.from_space(
                 space_id="xkerser/FLUX.1-dev",
                 name="image_generator",
+                description=(
+                    "Generates high-quality AgentImage using the FLUX.1-dev model based on text prompts."
+                ),
             )
         except Exception as e:
             print(f"✗ Couldn't initialize image generation tool: {e}")
         text_limit = 30000
         browser = SimpleTextBrowser(**BROWSER_CONFIG)
+        # Create tool instances with proper error handling
         web_tools = ToolRegistry.load_web_tools(model, browser, text_limit)
+        try:
+            doc_tools = ToolRegistry.load_document_tools()
+        except AssertionError as e:
+            print(f"Warning: Error loading document tools: {str(e)}")
+            print("Attempting to continue with available tools...")
+            doc_tools = []
+        try:
+            image_generator = ToolRegistry.load_image_generation_tools()
+        except Exception as e:
+            print(f"Warning: Image generation tools unavailable: {str(e)}")
+            image_generator = None
+        # Combine available tools (filter out None values)
+        all_tools = [
+            tool
+            for tool in (
+                [visualizer]
+                + web_tools
+                + doc_tools
+                + ([image_generator] if image_generator else [])
+            )
+            if tool is not None
+        ]
+        # Log available tools
+        print(f"Loaded {len(all_tools)} tools successfully")
         for tool in all_tools:
+            print(f"- {tool.name}: {tool.description[:50]}...")
         return CodeAgent(
             model=model,
             additional_authorized_imports=AUTHORIZED_IMPORTS,
             planning_interval=4,
         )
+    except Exception as e:
         print(f"Failed to create agent: {e}")
+        raise RuntimeError(f"Agent creation failed: {e}") from e
+def stream_to_gradio(agent, task, reset_agent_memory=False, additional_args=None):
     """Runs an agent with the given task and streams messages as Gradio ChatMessages."""
+    try:
+        for step_log in agent.run(
+            task, stream=True, reset=reset_agent_memory, additional_args=additional_args
+        ):
+            yield from pull_messages_from_step(step_log)
+        # Get the last step log from the agent's memory for final answer
+        last_step_log = agent.memory.steps[-1] if agent.memory.steps else None
+        if last_step_log:
+            # Process final answer with comprehensive media output
+            final_answer = handle_agent_output_types(last_step_log)
+            # Output handling based on type
+            if isinstance(final_answer, AgentText):
+                yield gr.ChatMessage(
+                    role="assistant",
+                    content=f"**Final answer:**\n{final_answer.to_string()}\n",
+                )
+            elif isinstance(final_answer, AgentImage):
+                yield gr.ChatMessage(
+                    role="assistant",
+                    content={"image": final_answer.to_string(), "type": "file"},
+                )
+            elif isinstance(final_answer, AgentAudio):
+                yield gr.ChatMessage(
+                    role="assistant",
+                    content={"audio": final_answer.to_string(), "type": "file"},
+                )
+            else:
+                yield gr.ChatMessage(
+                    role="assistant", content=f"**Final answer:** {str(final_answer)}"
+                )
+        else:
+            yield gr.ChatMessage(
+                role="assistant",
+                content="No final answer was generated. Please try again.",
+            )
+    except Exception as e:
         yield gr.ChatMessage(
             role="assistant",
+            content=f"**Error occurred during processing**: {str(e)}\n\nPlease try again with a different query or check your inputs.",
         )
     def interact_with_agent(self, prompt, messages, session_state):
         """Main interaction handler with the agent."""
+        # Get or create session-specific agent with cache persistence
         if "agent" not in session_state:
+            try:
+                session_state["agent"] = create_agent()
+                session_state["creation_time"] = datetime.datetime.now()
+                session_state["request_count"] = 0
+            except Exception as e:
+                messages.append(
+                    gr.ChatMessage(
+                        role="assistant",
+                        content=f"**Error initializing agent**: {str(e)}\n\nPlease refresh the page and try again.",
+                    )
+                )
+                yield messages
+                return
+        session_state["request_count"] += 1
+        # Add user message
+        messages.append(gr.ChatMessage(role="user", content=prompt))
+        yield messages
+        try:
+            # Check if agent should be reset (e.g., if too many requests)
+            reset_needed = session_state["request_count"] > 15
             for msg in stream_to_gradio(
+                session_state["agent"], task=prompt, reset_agent_memory=reset_needed
             ):
                 messages.append(msg)
+                yield messages
+            # If we reset the agent memory, update the request count
+            if reset_needed:
+                session_state["request_count"] = 1
         except Exception as e:
+            messages.append(
+                gr.ChatMessage(
+                    role="assistant",
+                    content=f"**Error processing your request**: {str(e)}\n\nPlease try again with a different query.",
+                )
+            )
+            yield messages
+    def upload_file(self, file, file_uploads_log):
+        """Handle file uploads with validation, security, and clear feedback."""
         if file is None:
             return gr.Textbox("No file uploaded", visible=True), file_uploads_log
         try:
+            # Get file size and check limit before processing
+            file_size_mb = os.path.getsize(file.name) / (1024 * 1024)  # Size in MB
+            max_file_size_mb = 50  # Define the limit
+            if file_size_mb > max_file_size_mb:
+                return (
+                    gr.Textbox(
+                        f"❌ File size ({file_size_mb:.1f} MB) exceeds {max_file_size_mb} MB limit.",
+                        visible=True,
+                    ),
+                    file_uploads_log,
+                )
+            # Check MIME type
+            mime_type, _ = mimetypes.guess_type(file.name)
+            if mime_type not in ALLOWED_FILE_TYPES:
+                allowed_extensions = [
+                    t.rsplit("/", maxsplit=1)[-1] for t in ALLOWED_FILE_TYPES
+                ]
+                return (
+                    gr.Textbox(
+                        f"❌ File type '{mime_type or 'unknown'}' is not allowed. Supported types: {', '.join(allowed_extensions)}",
+                        visible=True,
+                    ),
+                    file_uploads_log,
+                )
+            # Sanitize file name with better pattern
+            original_name = os.path.basename(file.name)
+            sanitized_name = re.sub(r"[^\w\-.]", "_", original_name)
+            # Save the uploaded file
+            file_path = os.path.join(self.file_upload_folder, sanitized_name)
+            shutil.copy(file.name, file_path)
+            return gr.Textbox(
+                f"✓ File uploaded successfully: {os.path.basename(file_path)} ({file_size_mb:.1f} MB)",
+                visible=True,
+            ), file_uploads_log + [file_path]
+        except Exception as e:
             return (
+                gr.Textbox(f"❌ Upload error: {str(e)}", visible=True),
                 file_uploads_log,
             )
     def log_user_message(self, text_input, file_uploads_log):
+        """Process user message and handle file references with proper agent types."""
         message = text_input
         if len(file_uploads_log) > 0:
+            # Group files by type for better agent processing
+            file_info = {}
+            for file_path in file_uploads_log:
+                ext = os.path.splitext(file_path)[1].lower()
+                if ext in [".jpg", ".jpeg", ".png", ".gif", ".webp"]:
+                    category = "images"
+                elif ext in [".mp3", ".wav", ".ogg"]:
+                    category = "audio"
+                else:
+                    category = "documents"
+                if category not in file_info:
+                    file_info[category] = []
+                file_info[category].append(os.path.basename(file_path))
+            # Format file information for the agent
+            file_message = "\nYou have been provided with these files:\n"
+            for category, files in file_info.items():
+                file_message += f"- {category.capitalize()}: {', '.join(files)}\n"
+            message += file_message
+            message += "\nUse inspect_file_as_text for documents, visualizer for images, and the appropriate tools for audio files."
         return (
             message,
+            gr.Textbox(value="", interactive=False, placeholder="Processing..."),
             gr.Button(interactive=False),
         )
         )  # Add queue with reasonable size
     def _create_desktop_layout(self):
+        """Create the desktop layout with sidebar and enhanced styling."""
         with gr.Blocks(fill_height=True) as sidebar_demo:
             with gr.Sidebar():
                 gr.Markdown(
+                    """#
+                    ### Smolagents + Document Tools
+                    """
                 )
                 with gr.Group():
+                    gr.Markdown("**What can I help you with today?**", container=True)
                     text_input = gr.Textbox(
+                        lines=4,
                         label="Your request",
                         container=False,
+                        placeholder="Enter your question or task here...",
+                        show_label=False,
                     )
+                    with gr.Row():
+                        clear_btn = gr.Button("Clear", variant="secondary")
+                        launch_research_btn = gr.Button("Run", variant="primary")
+                # File upload section with better labeling
+                if self.file_upload_folder is not None:
+                    with gr.Group():
+                        gr.Markdown("** Upload Documents**")
+                        upload_file = gr.File(
+                            label="Upload files for analysis",
+                            file_types=[
+                                "pdf",
+                                "docx",
+                                "txt",
+                                "md",
+                                "csv",
+                                "xlsx",
+                                "jpg",
+                                "png",
+                            ],
+                            file_count="multiple",
+                        )
+                        upload_status = gr.Textbox(
+                            label="Upload Status", interactive=False, visible=False
+                        )
+                        file_uploads_log = gr.State([])
+                        # Show uploaded files list
+                        uploaded_files_display = gr.Markdown("No files uploaded yet")
+                        upload_file.change(
+                            self.upload_file,
+                            [upload_file, file_uploads_log],
+                            [upload_status, file_uploads_log],
+                        ).then(
+                            lambda files: (
+                                "**Uploaded Files:**\n"
+                                + "\n".join([f"- {os.path.basename(f)}" for f in files])
+                                if files
+                                else "No files uploaded yet"
+                            ),
+                            [file_uploads_log],
+                            [uploaded_files_display],
+                        )
+                gr.HTML("<br><hr><h4><center>Powered by:</center></h4>")
                 with gr.Row():
                     gr.HTML(
                         """
+                        <div style="display: flex; align-items: center; justify-content: center; gap: 8px; font-family: system-ui, -apple-system, sans-serif;">
+                        <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png"
+                            style="width: 32px; height: 32px; object-fit: contain;" alt="logo">
+                        <a target="_blank" href="https://github.com/huggingface/smolagents">
+                            <b>huggingface/smolagents</b>
+                        </a>
+                        </div>
+                        """
                     )
+            # Main chat area with improved styling
+            session_state = gr.State({})
             stored_messages = gr.State([])
             if "file_uploads_log" not in locals():
                 file_uploads_log = gr.State([])
             chatbot = gr.Chatbot(
+                label="OpenDeepResearch Assistant",
                 type="messages",
                 avatar_images=(
                     None,
                     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/mascot_smol.png",
                 ),
+                resizeable=True,
+                show_copy_button=True,
                 scale=1,
                 elem_id="my-chatbot",
+                height=700,
+            )
+            # Connect clear button
+            clear_btn.click(
+                lambda: ([], [], {"agent": session_state.get("agent")}),
+                None,
+                [chatbot, stored_messages, session_state],
             )
+            # Connect event handlers
             self._connect_event_handlers(
                 text_input,
                 launch_research_btn,