OpenDeepResearch

Runtime error

App Files Files Community

Leonardo commited on Mar 28, 2025

Commit

7ab7a72

verified ·

1 Parent(s): fbec7e7

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -58

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ import os
 import re
 import shutil
 import datetime
 from dotenv import load_dotenv
 from huggingface_hub import login
@@ -38,9 +39,9 @@ from smolagents import (
     TransformersModel,
     GoogleSearchTool,
     Tool,
 )
-from smolagents.agent_types import AgentText, AgentImage, AgentAudio
-from smolagents.gradio_ui import pull_messages_from_step, handle_agent_output_types
 # ------------------------ Configuration and Setup ------------------------
 # Constants and configurations
@@ -247,6 +248,9 @@ def create_agent():
         # Create tool instances with proper error handling
         web_tools = ToolRegistry.load_web_tools(model, browser, text_limit)
         try:
             doc_tools = ToolRegistry.load_document_tools()
         except AssertionError as e:
@@ -265,6 +269,7 @@ def create_agent():
             tool
             for tool in (
                 [visualizer]
                 + web_tools
                 + doc_tools
                 + ([image_generator] if image_generator else [])
@@ -280,61 +285,44 @@ def create_agent():
         return CodeAgent(
             model=model,
             tools=all_tools,
-            max_steps=12,
             verbosity_level=2,
             additional_authorized_imports=AUTHORIZED_IMPORTS,
             planning_interval=4,
         )
     except Exception as e:
         print(f"Failed to create agent: {e}")
         raise RuntimeError(f"Agent creation failed: {e}") from e
-def stream_to_gradio(agent, task, reset_agent_memory=False, additional_args=None):
-    """Runs an agent with the given task and streams messages as Gradio ChatMessages."""
-    try:
-        for step_log in agent.run(
-            task, stream=True, reset=reset_agent_memory, additional_args=additional_args
-        ):
-            yield from pull_messages_from_step(step_log)
-        # Get the last step log from the agent's memory for final answer
-        last_step_log = agent.memory.steps[-1] if agent.memory.steps else None
-        if last_step_log:
-            # Process final answer with comprehensive media output
-            final_answer = handle_agent_output_types(last_step_log)
-            # Output handling based on type
-            if isinstance(final_answer, AgentText):
-                yield gr.ChatMessage(
-                    role="assistant",
-                    content=f"**Final answer:**\n{final_answer.to_string()}\n",
-                )
-            elif isinstance(final_answer, AgentImage):
-                yield gr.ChatMessage(
-                    role="assistant",
-                    content={"image": final_answer.to_string(), "type": "file"},
-                )
-            elif isinstance(final_answer, AgentAudio):
-                yield gr.ChatMessage(
-                    role="assistant",
-                    content={"audio": final_answer.to_string(), "type": "file"},
-                )
-            else:
-                yield gr.ChatMessage(
-                    role="assistant", content=f"**Final answer:** {str(final_answer)}"
-                )
-        else:
-            yield gr.ChatMessage(
-                role="assistant",
-                content="No final answer was generated. Please try again.",
-            )
-    except Exception as e:
-        yield gr.ChatMessage(
-            role="assistant",
-            content=f"**Error occurred during processing**: {str(e)}\n\nPlease try again with a different query or check your inputs.",
-        )
 # ------------------------ Gradio UI Components ------------------------
@@ -376,7 +364,19 @@ class GradioUI:
         try:
             # Check if agent should be reset (e.g., if too many requests)
-            reset_needed = session_state["request_count"] > 15
             for msg in stream_to_gradio(
                 session_state["agent"], task=prompt, reset_agent_memory=reset_needed
@@ -418,6 +418,11 @@ class GradioUI:
             # Check MIME type
             mime_type, _ = mimetypes.guess_type(file.name)
             if mime_type not in ALLOWED_FILE_TYPES:
                 allowed_extensions = [
                     t.rsplit("/", maxsplit=1)[-1] for t in ALLOWED_FILE_TYPES
@@ -462,20 +467,34 @@ class GradioUI:
                     category = "images"
                 elif ext in [".mp3", ".wav", ".ogg"]:
                     category = "audio"
                 else:
                     category = "documents"
                 if category not in file_info:
                     file_info[category] = []
-                file_info[category].append(os.path.basename(file_path))
             # Format file information for the agent
             file_message = "\nYou have been provided with these files:\n"
             for category, files in file_info.items():
-                file_message += f"- {category.capitalize()}: {', '.join(files)}\n"
             message += file_message
-            message += "\nUse inspect_file_as_text for documents, visualizer for images, and the appropriate tools for audio files."
         return (
             message,
@@ -533,7 +552,7 @@ class GradioUI:
         with gr.Blocks(fill_height=True) as sidebar_demo:
             with gr.Sidebar():
                 gr.Markdown(
-                    """#
                     ### Smolagents + Document Tools
                     """
                 )
@@ -554,7 +573,7 @@ class GradioUI:
                 # File upload section with better labeling
                 if self.file_upload_folder is not None:
                     with gr.Group():
-                        gr.Markdown("** Upload Documents**")
                         upload_file = gr.File(
                             label="Upload files for analysis",
                             file_types=[
@@ -592,6 +611,23 @@ class GradioUI:
                             [uploaded_files_display],
                         )
                 gr.HTML("<br><hr><h4><center>Powered by:</center></h4>")
                 with gr.Row():
                     gr.HTML(
@@ -648,14 +684,14 @@ class GradioUI:
     def _create_mobile_layout(self):
         """Create the mobile layout (simpler without sidebar)."""
         with gr.Blocks(fill_height=True) as simple_demo:
-            gr.Markdown("""#OpenDeepResearch - free the AI agents!""")
             # Add session state to store session-specific data
             session_state = gr.State({})
             stored_messages = gr.State([])
             file_uploads_log = gr.State([])
             chatbot = gr.Chatbot(
-                label="open-Deep-Research",
                 type="messages",
                 avatar_images=(
                     None,
@@ -667,7 +703,10 @@ class GradioUI:
             # If an upload folder is provided, enable the upload feature
             if self.file_upload_folder is not None:
-                upload_file = gr.File(label="Upload a file")
                 upload_status = gr.Textbox(
                     label="Upload Status", interactive=False, visible=False
                 )
@@ -678,11 +717,21 @@ class GradioUI:
                 )
             text_input = gr.Textbox(
-                lines=1,
-                label="What's on your mind mate?",
-                placeholder="Chuck in a question and we'll take care of the rest",
             )
-            launch_research_btn = gr.Button("Run", variant="primary")
             self._connect_event_handlers(
                 text_input,
@@ -757,6 +806,9 @@ def main():
     # Ensure downloads folder exists
     os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
     # Launch UI
     GradioUI(file_upload_folder="uploaded_files").launch()

 import re
 import shutil
 import datetime
+from typing import Optional, List, Dict, Any
 from dotenv import load_dotenv
 from huggingface_hub import login
     TransformersModel,
     GoogleSearchTool,
     Tool,
+    FinalAnswerTool,
 )
+from smolagents.gradio_ui import pull_messages_from_step, stream_to_gradio
 # ------------------------ Configuration and Setup ------------------------
 # Constants and configurations
         # Create tool instances with proper error handling
         web_tools = ToolRegistry.load_web_tools(model, browser, text_limit)
+        # Add FinalAnswerTool explicitly to ensure it's available
+        final_answer_tool = FinalAnswerTool()
         try:
             doc_tools = ToolRegistry.load_document_tools()
         except AssertionError as e:
             tool
             for tool in (
                 [visualizer]
+                + [final_answer_tool]  # Added explicitly
                 + web_tools
                 + doc_tools
                 + ([image_generator] if image_generator else [])
         return CodeAgent(
             model=model,
             tools=all_tools,
+            max_steps=15,  # Increased from 12 to give more room for complex tasks
             verbosity_level=2,
             additional_authorized_imports=AUTHORIZED_IMPORTS,
             planning_interval=4,
+            prompt_templates={
+                "system_prompt": """You are a helpful AI assistant with access to various tools.
+                Always think step by step, carefully planning your approach to the task.
+                When using Python code:
+                - Keep your code simple and readable
+                - Use the final_answer tool to provide your final response
+                Example of how to provide a final answer:
+                ```python
+                final_answer("This is my final answer based on my analysis.")
+                ```
+                """
+            },
         )
     except Exception as e:
         print(f"Failed to create agent: {e}")
         raise RuntimeError(f"Agent creation failed: {e}") from e
+def detect_agent_loop(agent):
+    """Check if agent is stuck in a loop of similar errors"""
+    if not hasattr(agent, "memory") or not hasattr(agent.memory, "steps"):
+        return False
+    if len(agent.memory.steps) < 4:
+        return False
+    recent_steps = agent.memory.steps[-4:]
+    error_count = sum(
+        1 for step in recent_steps if hasattr(step, "error") and step.error is not None
+    )
+    if error_count >= 3:
+        return True
+    return False
 # ------------------------ Gradio UI Components ------------------------
         try:
             # Check if agent should be reset (e.g., if too many requests)
+            reset_needed = session_state["request_count"] > 15 or detect_agent_loop(
+                session_state["agent"]
+            )
+            # If agent is in a loop, provide a hint
+            if detect_agent_loop(session_state["agent"]):
+                messages.append(
+                    gr.ChatMessage(
+                        role="assistant",
+                        content="I notice I'm having trouble executing some commands. Let me try a different approach...",
+                    )
+                )
+                yield messages
             for msg in stream_to_gradio(
                 session_state["agent"], task=prompt, reset_agent_memory=reset_needed
             # Check MIME type
             mime_type, _ = mimetypes.guess_type(file.name)
+            # Ensure Markdown files are recognized properly
+            if file.name.lower().endswith(".md"):
+                mime_type = "text/markdown"
             if mime_type not in ALLOWED_FILE_TYPES:
                 allowed_extensions = [
                     t.rsplit("/", maxsplit=1)[-1] for t in ALLOWED_FILE_TYPES
                     category = "images"
                 elif ext in [".mp3", ".wav", ".ogg"]:
                     category = "audio"
+                elif ext in [".md"]:
+                    category = "markdown"
+                elif ext in [".pdf"]:
+                    category = "pdf"
                 else:
                     category = "documents"
                 if category not in file_info:
                     file_info[category] = []
+                file_info[category].append(
+                    file_path
+                )  # Store full path for easier access
             # Format file information for the agent
             file_message = "\nYou have been provided with these files:\n"
             for category, files in file_info.items():
+                # Convert to filename-only for display
+                file_names = [os.path.basename(f) for f in files]
+                file_message += f"- {category.capitalize()}: {', '.join(file_names)}\n"
+                # Add full paths after names
+                file_message += f"  Paths: {', '.join(files)}\n"
             message += file_message
+            message += (
+                "\nUse inspect_file_as_text for documents/markdown/pdf, "
+                "visualizer for images, and the appropriate tools for audio files. "
+                "Remember to use the full file path when accessing the files."
+            )
         return (
             message,
         with gr.Blocks(fill_height=True) as sidebar_demo:
             with gr.Sidebar():
                 gr.Markdown(
+                    """# 🔍 OpenDeepResearch
                     ### Smolagents + Document Tools
                     """
                 )
                 # File upload section with better labeling
                 if self.file_upload_folder is not None:
                     with gr.Group():
+                        gr.Markdown("**📎 Upload Documents**")
                         upload_file = gr.File(
                             label="Upload files for analysis",
                             file_types=[
                             [uploaded_files_display],
                         )
+                    # Add helpful tool usage examples
+                    with gr.Accordion("Tool Usage Examples", open=False):
+                        gr.Markdown(
+                            """
+                            ### Document Tools
+                            - "Extract metadata from this document" - Uses frontmatter generator
+                            - "Clean and format this text" - Uses text cleaner
+                            ### File Analysis
+                            - "Analyze this PDF and summarize the key points" - Uses inspect_file_as_text
+                            - "What's in this image?" - Uses visualizer
+                            ### Web Search
+                            - "Find information about XYZ" - Uses search tools
+                            """
+                        )
                 gr.HTML("<br><hr><h4><center>Powered by:</center></h4>")
                 with gr.Row():
                     gr.HTML(
     def _create_mobile_layout(self):
         """Create the mobile layout (simpler without sidebar)."""
         with gr.Blocks(fill_height=True) as simple_demo:
+            gr.Markdown("""# 🔍 OpenDeepResearch""")
             # Add session state to store session-specific data
             session_state = gr.State({})
             stored_messages = gr.State([])
             file_uploads_log = gr.State([])
             chatbot = gr.Chatbot(
+                label="OpenDeepResearch Assistant",
                 type="messages",
                 avatar_images=(
                     None,
             # If an upload folder is provided, enable the upload feature
             if self.file_upload_folder is not None:
+                upload_file = gr.File(
+                    label="Upload a file",
+                    file_types=["pdf", "docx", "txt", "md", "jpg", "png"],
+                )
                 upload_status = gr.Textbox(
                     label="Upload Status", interactive=False, visible=False
                 )
                 )
             text_input = gr.Textbox(
+                lines=2,
+                label="Your question",
+                placeholder="Enter your question here",
+            )
+            with gr.Row():
+                clear_btn = gr.Button("Clear", variant="secondary")
+                launch_research_btn = gr.Button("Run", variant="primary")
+            # Connect clear button
+            clear_btn.click(
+                lambda: ([], [], {"agent": session_state.get("agent")}),
+                None,
+                [chatbot, stored_messages, session_state],
             )
             self._connect_event_handlers(
                 text_input,
     # Ensure downloads folder exists
     os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
+    # Ensure uploads folder exists
+    os.makedirs("uploaded_files", exist_ok=True)
     # Launch UI
     GradioUI(file_upload_folder="uploaded_files").launch()