likable

Sleeping

jens.luecke commited on Jun 3, 2025

Commit

f224834

1 Parent(s): dabb6ef

Refactor application architecture to use a manager agent

- Replaced individual coding and planning agents with a new `GradioManagerAgent` that orchestrates the entire development workflow.
- Updated `app.py` to utilize the manager agent for generating AI responses, streamlining the process from planning to implementation.
- Introduced a new `manager_agent.py` file to handle the coordination of planning, coding, and testing agents.
- Enhanced `settings.py` to include configurations for the manager agent.
- Updated tests to cover the new manager agent functionality and ensure reliability.
- Improved project structure and dependencies in `pyproject.toml` to reflect changes in the agent architecture.

Files changed (10) hide show

app.py +13 -143
coding_agent.py +161 -258
manager_agent.py +172 -0
planning_agent.py +42 -166
pyproject.toml +2 -0
settings.py +40 -0
test_manager_agent.py +219 -0
test_testing_agent.py +0 -16
testing_agent.py +202 -307
uv.lock +3 -3

app.py CHANGED Viewed

@@ -4,165 +4,35 @@ import sys
 import gradio as gr
-from coding_agent import GradioCodingAgent
-from planning_agent import GradioPlanningAgent
 from settings import settings
 from utils import load_file
 gr.NO_RELOAD = False
-# Initialize the agents globally
-planning_agent = None
-coding_agent = None
-def get_planning_agent():
-    """Get or initialize the planning agent (lazy loading)."""
-    global planning_agent
-    if planning_agent is None:
-        try:
-            planning_agent = GradioPlanningAgent()
-        except Exception as e:
-            print(f"Error initializing planning agent: {e}")
-            return None
-    return planning_agent
-def get_coding_agent():
-    """Get or initialize the coding agent (lazy loading)."""
-    global coding_agent
-    if coding_agent is None:
-        try:
-            coding_agent = GradioCodingAgent()
-        except Exception as e:
-            print(f"Error initializing coding agent: {e}")
-            return None
-    return coding_agent
-# Enhanced AI response using both planning and coding agents
-def ai_response_with_planning_and_coding(message, history):
-    """Generate AI response using the planning agent for planning and \
 coding agent for implementation."""
-    planning_agent_instance = get_planning_agent()
-    coding_agent_instance = get_coding_agent()
-    if planning_agent_instance is None:
         # Fallback to mock response if planning agent fails to initialize
         response = (
-            "Sorry, the planning agent is not available. "
             "Please check your API_KEY environment variable."
         )
-        history.append({"role": "user", "content": message})
-        history.append({"role": "assistant", "content": response})
-        return history, ""
-    if coding_agent_instance is None:
-        # Fallback if coding agent fails to initialize
-        response = (
-            "Sorry, the coding agent is not available. "
-            "Planning is available but implementation will be limited."
-        )
-        history.append({"role": "user", "content": message})
         history.append({"role": "assistant", "content": response})
         return history, ""
     try:
-        # Step 1: Use the planning agent for planning
-        history.append({"role": "user", "content": message})
-        history.append(
-            {"role": "assistant", "content": "🎯 Starting to plan your application..."}
-        )
-        planning_result = planning_agent_instance.plan_application(message)
-        # Format the planning response
-        action_summary = (
-            planning_result.action_plan[:300] + "..."
-            if len(planning_result.action_plan) > 300
-            else planning_result.action_plan
-        )
-        components_list = chr(10).join(
-            [f"• {comp}" for comp in planning_result.gradio_components[:5]]
-        )
-        dependencies_list = chr(10).join(
-            [f"• {dep}" for dep in planning_result.dependencies[:5]]
-        )
-        planning_response = f"""✅ **Planning Complete!**
-**Complexity**: {planning_result.estimated_complexity}
-**Key Gradio Components Needed**:
-{components_list}
-**Dependencies Required**:
-{dependencies_list}
-**High-Level Action Plan**:
-{action_summary}
-🚀 **Now starting implementation...**"""
-        history.append({"role": "assistant", "content": planning_response})
-        # Step 2: Use the coding agent for implementation
-        history.append(
-            {
-                "role": "assistant",
-                "content": "⚡ Implementing your application with proper \
-project structure...",
-            }
-        )
-        coding_result = coding_agent_instance.iterative_implementation(planning_result)
-        # Format the implementation response
-        if coding_result.success:
-            implementation_response = f"""✅ **Implementation Complete!**
-**Project Created**: `{coding_result.project_path}`
-**Features Implemented**: {len(coding_result.implemented_features)} components
-**Status**: Ready to run!
-Your Gradio application has been created with:
-- Proper `uv` project structure
-- All required dependencies installed
-- Complete README.md with usage instructions
-- Functional app.py with all requested features
-You can view and test your app in the **Preview** tab, or check the code in \
-the **Code** tab.
-To run locally: `cd {coding_result.project_path} && uv run python app.py`"""
-            if coding_result.remaining_tasks:
-                implementation_response += f"\n\n**Remaining Tasks**: \
-{chr(10).join([f'• {task}' for task in coding_result.remaining_tasks])}"
-        else:
-            implementation_response = f"""⚠️ **Implementation Partially Complete**
-**Project Path**: `{coding_result.project_path}`
-**Issues Encountered**: {len(coding_result.error_messages)} errors
-**Error Messages**:
-{chr(10).join([f'• {error}' for error in coding_result.error_messages])}
-**Remaining Tasks**:
-{chr(10).join([f'• {task}' for task in coding_result.remaining_tasks])}
-The project structure has been set up, but some features may need manual completion."""
-        history.append({"role": "assistant", "content": implementation_response})
     except Exception as e:
-        error_response = (
-            f"I encountered an error during planning and implementation: {str(e)}. "
-            "Let me try a simpler approach..."
-        )
         history.append({"role": "assistant", "content": error_response})
     return history, ""
@@ -328,13 +198,13 @@ complete applications*"
         # Event handlers for chat - updated to use the combined planning and
         # coding function
         msg_input.submit(
-            ai_response_with_planning_and_coding,
             inputs=[msg_input, chatbot],
             outputs=[chatbot, msg_input],
         )
         send_btn.click(
-            ai_response_with_planning_and_coding,
             inputs=[msg_input, chatbot],
             outputs=[chatbot, msg_input],
         )

 import gradio as gr
+from manager_agent import GradioManagerAgent
 from settings import settings
 from utils import load_file
 gr.NO_RELOAD = False
+def generate_ai_response(message, history):
+    """Generate AI response using the manager agent for planning and \
 coding agent for implementation."""
+    history.append({"role": "user", "content": message})
+    manager_agent_instance = GradioManagerAgent()
+    if manager_agent_instance is None:
         # Fallback to mock response if planning agent fails to initialize
         response = (
+            "Sorry, the manager agent is not available. "
             "Please check your API_KEY environment variable."
         )
         history.append({"role": "assistant", "content": response})
         return history, ""
     try:
+        manager_result = manager_agent_instance(message)
+        history.append({"role": "assistant", "content": manager_result})
     except Exception as e:
+        error_response = f"I encountered an error: {str(e)}"
         history.append({"role": "assistant", "content": error_response})
     return history, ""
         # Event handlers for chat - updated to use the combined planning and
         # coding function
         msg_input.submit(
+            generate_ai_response,
             inputs=[msg_input, chatbot],
             outputs=[chatbot, msg_input],
         )
         send_btn.click(
+            generate_ai_response,
             inputs=[msg_input, chatbot],
             outputs=[chatbot, msg_input],
         )

coding_agent.py CHANGED Viewed

@@ -16,11 +16,9 @@ from dataclasses import dataclass
 from pathlib import Path
 from mcp import StdioServerParameters
-from smolagents import LiteLLMModel, MCPClient, ToolCallingAgent
-from planning_agent import PlanningResult
 from settings import settings
-from utils import load_file
 @dataclass
@@ -35,6 +33,72 @@ class CodingResult:
     final_app_code: str
 class GradioCodingAgent:
     """
     A specialized CodeAgent for implementing Gradio applications.
@@ -61,6 +125,22 @@ class GradioCodingAgent:
             verbosity_level: Level of verbosity for agent output (uses settings if None)
             max_steps: Maximum number of coding steps (uses settings if None)
         """
         # Use settings as defaults, but allow override
         self.model_id = model_id or settings.code_model_id
         self.api_base_url = api_base_url or settings.api_base_url
@@ -86,14 +166,19 @@ class GradioCodingAgent:
         self.mcp_client = MCPClient(server_parameters)
-        tool_collection = self.mcp_client.get_tools()
         # Initialize the CodeAgent with tools for file operations and project setup
         self.agent = ToolCallingAgent(
             model=self.model,
-            tools=tool_collection,
             verbosity_level=verbosity_level,
             max_steps=max_steps,
         )
         self.sandbox_path = Path("sandbox")
@@ -121,255 +206,79 @@ class GradioCodingAgent:
         except Exception:
             pass
-    def setup_project_structure(self, project_name: str = "gradio_app") -> bool:
-        """
-        Set up the initial project structure using uv.
-        Args:
-            project_name: Name of the project
-        Returns:
-            bool: True if setup was successful
-        """
-        try:
-            # Ensure sandbox directory exists and is clean
-            if self.sandbox_path.exists():
-                shutil.rmtree(self.sandbox_path)
-            self.sandbox_path.mkdir(exist_ok=True)
-            # Change to sandbox directory
-            os.chdir(self.sandbox_path)
-            # Initialize with uv
-            subprocess.run(
-                ["uv", "init", project_name],
-                capture_output=True,
-                text=True,
-                check=True,
-            )
-            # Change to project directory
-            os.chdir(project_name)
-            # Add gradio as a dependency
-            subprocess.run(
-                ["uv", "add", "gradio"],
-                capture_output=True,
-                text=True,
-                check=True,
-            )
-            # Change back to workspace root
-            os.chdir("../..")
-            return True
-        except subprocess.CalledProcessError as e:
-            print(f"Error setting up project structure: {e}")
-            print(f"stdout: {e.stdout}")
-            print(f"stderr: {e.stderr}")
-            return False
-        except Exception as e:
-            print(f"Unexpected error setting up project: {e}")
-            return False
-    def implement_application(self, planning_result: PlanningResult) -> CodingResult:
         """
-        Implement the full Gradio application based on the planning result.
         Args:
-            planning_result: The planning result from the planning agent
         Returns:
-            CodingResult containing implementation details
         """
-        # Set up project structure
-        project_name = "gradio_app"
-        if not self.setup_project_structure(project_name):
-            return CodingResult(
-                success=False,
-                project_path="",
-                implemented_features=[],
-                remaining_tasks=["Failed to set up project structure"],
-                error_messages=["Could not initialize uv project"],
-                final_app_code="",
-            )
-        project_path = str(self.sandbox_path / project_name)
-        # Create comprehensive prompt for implementation
-        gradio_components = chr(10).join(
-            [f"- {comp}" for comp in planning_result.gradio_components]
-        )
-        dependencies = chr(10).join(
-            [f"- {dep}" for dep in planning_result.dependencies if dep != "gradio"]
-        )
-        # Create the user prompt for the specific implementation
-        user_prompt = f"""You are an expert Python developer and Gradio \
-application architect.
-Your task is to implement a complete, working Gradio application based on \
-the provided plan.
-PROJECT SETUP:
-- You are working in the directory: {project_path}
-- The project has been initialized with `uv` and `gradio` is already installed
-- Use proper Python project structure with a main app.py file
-- Add any additional dependencies needed using `uv add package_name`
-IMPLEMENTATION REQUIREMENTS:
-1. Create a complete, functional Gradio application in app.py
-2. Follow the provided action plan and implementation plan exactly
-3. Implement ALL gradio components mentioned in the plan
-4. Add proper error handling and user feedback
-5. Create a comprehensive README.md with usage instructions
-6. Add all required dependencies to the project using `uv add`
-7. Make sure the app can be run with `uv run python app.py`
-8. Test the implementation and fix any issues
-QUALITY STANDARDS:
-- Write clean, well-documented code
-- Use proper type hints where appropriate
-- Follow Python best practices
-- Add docstrings to functions and classes
-- Handle edge cases and errors gracefully
-- Make the UI intuitive and user-friendly
-- When using multiline strings within multiline strings, properly escape them \
-using triple quotes
-  Example: Instead of using f\"\"\"...\"\"\", use f'''...''' or escape inner quotes \
-like f\"\"\"...\\\"\\\"\\\"...\\\"\\\"\\\"...\"\"\"
-GRADIO COMPONENTS TO IMPLEMENT:
-{gradio_components}
-DEPENDENCIES TO ADD:
-{dependencies}
-ACTION PLAN TO FOLLOW:
-{planning_result.action_plan}
-IMPLEMENTATION PLAN TO FOLLOW:
-{planning_result.implementation_plan}
-TESTING PLAN TO CONSIDER:
-{planning_result.testing_plan}
-You must implement the complete application and ensure it works properly.
-Use subprocess to run `uv add` commands to install any needed packages.
-Create all necessary files and make sure the application runs without errors.
-Please implement the complete Gradio application based on the planning result.
-The application should be fully functional and implement all the features
-described in the plans.
-Working directory: {project_path}
-Please:
-1. Start by creating/updating the README.md file with project description
-   and usage instructions
-2. Add any additional dependencies needed using `uv add package_name`
-3. Create the complete app.py file with all the Gradio components and
-   functionality
-4. Test the implementation to ensure it works
-5. Fix any issues that arise during testing
-Make sure the final application is complete and functional.
-/no_think
-"""
         try:
-            # Run the coding agent to implement the application
-            self.agent.run(
-                user_prompt,
-                additional_args={
-                    "current_app_py": load_file(str(Path(project_path) / "app.py")),
-                },
-            )
-            # Check if the implementation was successful
-            app_file = Path(project_path) / "app.py"
-            if app_file.exists():
-                with open(app_file, encoding="utf-8") as f:
-                    final_app_code = f.read()
-                return CodingResult(
-                    success=True,
-                    project_path=project_path,
-                    implemented_features=planning_result.gradio_components,
-                    remaining_tasks=[],
-                    error_messages=[],
-                    final_app_code=final_app_code,
-                )
-            else:
-                return CodingResult(
-                    success=False,
-                    project_path=project_path,
-                    implemented_features=[],
-                    remaining_tasks=["Main app.py file was not created"],
-                    error_messages=["Implementation failed to create app.py"],
-                    final_app_code="",
-                )
         except Exception as e:
-            return CodingResult(
-                success=False,
-                project_path=project_path,
-                implemented_features=[],
-                remaining_tasks=["Complete implementation"],
-                error_messages=[f"Coding agent error: {str(e)}"],
-                final_app_code="",
-            )
-    def iterative_implementation(
-        self, planning_result: PlanningResult, max_iterations: int = 3
-    ) -> CodingResult:
-        """
-        Implement the application with iterative refinement.
-        Args:
-            planning_result: The planning result from the planning agent
-            max_iterations: Maximum number of implementation iterations
-        Returns:
-            CodingResult containing final implementation details
-        """
-        last_result = None
-        for iteration in range(max_iterations):
-            print(f"🔄 Implementation iteration {iteration + 1}/{max_iterations}")
-            # Implement or refine the application
-            result = self.implement_application(planning_result)
-            if result.success and not result.remaining_tasks:
-                print(f"✅ Implementation successful in {iteration + 1} iteration(s)")
-                return result
-            last_result = result
-            if iteration < max_iterations - 1:
-                print(f"⚠️ Iteration {iteration + 1} incomplete. Refining...")
-                # For subsequent iterations, we could modify the prompt to focus
-                # on remaining tasks. This is a simplified version - in practice,
-                # you'd want more sophisticated iteration logic
-        print(f"⚠️ Implementation completed with {max_iterations} iterations")
-        return last_result or CodingResult(
-            success=False,
-            project_path="",
-            implemented_features=[],
-            remaining_tasks=["Complete implementation failed"],
-            error_messages=["Maximum iterations reached without completion"],
-            final_app_code="",
-        )
-# Convenience function for the main app
-def create_gradio_coding_agent() -> GradioCodingAgent:
-    """Create a GradioCodingAgent with default settings."""
-    return GradioCodingAgent()
 if __name__ == "__main__":
@@ -378,17 +287,11 @@ if __name__ == "__main__":
     # Test with a simple planning result
     planning_agent = GradioPlanningAgent()
-    planning_result = planning_agent.plan_application(
-        "Create a simple text-to-text translator app"
-    )
-    # Create coding agent and implement
-    coding_agent = create_gradio_coding_agent()
-    coding_result = coding_agent.iterative_implementation(planning_result)
-    print("Coding Result:")
-    print(f"Success: {coding_result.success}")
-    print(f"Project Path: {coding_result.project_path}")
-    print(f"Implemented Features: {coding_result.implemented_features}")
-    print(f"Remaining Tasks: {coding_result.remaining_tasks}")
-    print(f"Error Messages: {coding_result.error_messages}")

 from pathlib import Path
 from mcp import StdioServerParameters
+from smolagents import LiteLLMModel, MCPClient, ToolCallingAgent, tool
 from settings import settings
 @dataclass
     final_app_code: str
+@tool
+def setup_project_structure(project_name: str = "gradio_app") -> str:
+    """
+    Set up the initial project structure using uv.
+    Args:
+        project_name: Name of the project
+    Returns:
+        Status message indicating success or failure
+    """
+    try:
+        sandbox_path = Path("sandbox")
+        # Ensure sandbox directory exists and is clean
+        if sandbox_path.exists():
+            shutil.rmtree(sandbox_path)
+        sandbox_path.mkdir(exist_ok=True)
+        # Store original working directory
+        original_cwd = os.getcwd()
+        # Change to sandbox directory
+        os.chdir(sandbox_path)
+        # Initialize with uv
+        subprocess.run(
+            ["uv", "init", project_name],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        # Change to project directory
+        os.chdir(project_name)
+        # Add gradio as a dependency
+        subprocess.run(
+            ["uv", "add", "gradio"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        # Change back to workspace root
+        os.chdir(original_cwd)
+        return f"Successfully set up project structure for {project_name} \
+in sandbox/{project_name}"
+    except subprocess.CalledProcessError as e:
+        # Restore working directory on error
+        try:
+            os.chdir(original_cwd)
+        except NameError:
+            pass
+        return f"Error setting up project structure: {e.stderr}"
+    except Exception as e:
+        # Restore working directory on error
+        try:
+            os.chdir(original_cwd)
+        except NameError:
+            pass
+        return f"Unexpected error setting up project: {str(e)}"
 class GradioCodingAgent:
     """
     A specialized CodeAgent for implementing Gradio applications.
             verbosity_level: Level of verbosity for agent output (uses settings if None)
             max_steps: Maximum number of coding steps (uses settings if None)
         """
+        self.name = "coding_agent"
+        self.description = """Expert Python developer specializing in Gradio \
+application implementation.
+This agent takes planning results and creates complete, working Gradio \
+applications with:
+    - Proper project structure using uv for package management
+    - Complete implementation of all planned features
+    - Working app.py file with functional Gradio interface
+    - Proper dependency management and documentation
+    - Error handling and iterative development approach
+The agent only exits when the full plan is implemented successfully.
+Handles complex applications and follows best practices for Python/Gradio \
+development."""
         # Use settings as defaults, but allow override
         self.model_id = model_id or settings.code_model_id
         self.api_base_url = api_base_url or settings.api_base_url
         self.mcp_client = MCPClient(server_parameters)
+        # Get MCP tools and add our custom tools
+        mcp_tools = self.mcp_client.get_tools()
+        custom_tools = [setup_project_structure]
+        all_tools = list(mcp_tools) + custom_tools
         # Initialize the CodeAgent with tools for file operations and project setup
         self.agent = ToolCallingAgent(
             model=self.model,
+            tools=all_tools,
             verbosity_level=verbosity_level,
             max_steps=max_steps,
+            name=self.name,
+            description=self.description,
         )
         self.sandbox_path = Path("sandbox")
         except Exception:
             pass
+    def __call__(self, task: str, **kwargs) -> str:
         """
+        Handle coding tasks as a managed agent.
         Args:
+            task: The planning result or task description
+            **kwargs: Additional keyword arguments (ignored)
         Returns:
+            String response containing the formatted coding result
         """
+        full_prompt = f"""You are an expert Python developer specializing in \
+Gradio application implementation.
+Your mission is to implement a complete, working Gradio application based \
+on the following architectural plan:
+```
+{task}
+```
+## Implementation Guidelines:
+### 1. Project Setup
+- ALWAYS start by calling setup_project_structure() to create the \
+proper project structure
+- Use uv for package management (already configured)
+- The project will be created in ./sandbox/ directory
+### 2. Implementation Requirements
+- Create a complete, functional Gradio application
+- Implement ALL features described in the plan
+- Write clean, well-documented Python code
+- Follow best practices for Gradio development
+- Ensure proper error handling and user feedback
+### 3. File Structure
+- Create app.py as the main application file
+- Add any necessary helper modules or utilities
+- Include proper imports and dependencies
+- Document code with comments and docstrings
+### 4. Gradio Interface Guidelines
+- Create an intuitive and user-friendly interface
+- Use appropriate Gradio components for each feature
+- Implement proper input validation and error handling
+- Ensure responsive design and good UX practices
+- Add helpful descriptions and examples where needed
+### 5. Quality Standards
+- Test your implementation thoroughly
+- Handle edge cases and error scenarios
+- Provide clear feedback to users
+- Ensure the app runs without errors
+- Follow Python coding standards (PEP 8)
+### 6. Completion Criteria
+- All planned features are fully implemented
+- The application runs successfully with `python app.py`
+- Users can interact with all described functionality
+- Code is clean, documented, and maintainable
+Remember: You can ONLY access files in the ./sandbox directory.
+Do not attempt to access files outside this sandbox environment.
+Start by setting up the project structure, then implement each feature \
+systematically until the complete application is ready."""
         try:
+            return self.agent.run(full_prompt)
         except Exception as e:
+            return f"❌ Implementation failed: {str(e)}"
 if __name__ == "__main__":
     # Test with a simple planning result
     planning_agent = GradioPlanningAgent()
+    planning_result = planning_agent("Create a simple calculator app")
+    # Create coding agent and implement using managed agent approach
+    coding_agent = GradioCodingAgent()
+    coding_result_str = coding_agent(planning_result)
+    print("=== CODING RESULT ===")
+    print(coding_result_str)

manager_agent.py ADDED Viewed

	@@ -0,0 +1,172 @@

+"""
+Smolagents ToolCallingAgent for managing a multi-agent development workflow.
+This module provides a manager agent that orchestrates:
+- Planning Agent: Creates comprehensive plans for Gradio applications
+- Coding Agent: Implements the planned applications with proper project structure
+- Testing Agent: Tests and validates the implemented applications
+The manager follows this workflow:
+1. Receives user prompt
+2. Hands prompt to Planning Agent → gets PlanningResult
+3. Hands planning result to Coding Agent → gets CodingResult
+4. Hands coding result to Testing Agent → gets TestingResult
+5. If testing fails, hands errors back to Coding Agent for fixes
+6. Continues until testing passes or max iterations reached
+"""
+from smolagents import CodeAgent, LiteLLMModel
+from coding_agent import GradioCodingAgent
+from planning_agent import GradioPlanningAgent
+from settings import settings
+from testing_agent import GradioTestingAgent
+class GradioManagerAgent:
+    """
+    A manager agent that orchestrates the planning, coding, and testing workflow.
+    This agent coordinates the entire development process from initial planning
+    through implementation to final testing and validation.
+    """
+    def __init__(
+        self,
+        model_id: str | None = None,
+        api_base_url: str | None = None,
+        api_key: str | None = None,
+        verbosity_level: int | None = None,
+        max_steps: int | None = None,
+        max_iterations: int = 3,
+    ):
+        """
+        Initialize the Gradio Manager Agent.
+        Args:
+            model_id: Model ID to use for management (uses settings if None)
+            api_base_url: API base URL (uses settings if None)
+            api_key: API key (uses settings if None)
+            verbosity_level: Level of verbosity for agent output (uses settings if None)
+            max_steps: Maximum number of management steps (uses settings if None)
+            max_iterations: Maximum number of coding/testing iterations
+        """
+        self.name = "manager_agent"
+        self.description = """Expert development manager coordinating multi-agent \
+Gradio application development.
+This agent orchestrates a complete development workflow by managing:
+    - Planning Agent: Creates comprehensive application plans
+    - Coding Agent: Implements planned applications with proper structure
+    - Testing Agent: Validates and tests implemented applications
+Coordinates iterative development cycles until applications are fully working \
+and tested.
+Provides comprehensive workflow management and detailed progress reporting."""
+        # Use settings as defaults, but allow override
+        self.model_id = model_id or settings.manager_model_id
+        self.api_base_url = api_base_url or settings.api_base_url
+        self.api_key = api_key or settings.api_key
+        verbosity_level = verbosity_level or settings.manager_verbosity
+        max_steps = max_steps or settings.max_manager_steps
+        self.max_iterations = max_iterations
+        # Initialize the language model
+        self.model = LiteLLMModel(
+            model_id=self.model_id,
+            api_base=self.api_base_url,
+            api_key=self.api_key,
+        )
+        # Create managed agent instances
+        self.planning_agent = GradioPlanningAgent()
+        self.coding_agent = GradioCodingAgent()
+        self.testing_agent = GradioTestingAgent()
+        # Initialize the main ToolCallingAgent with the managed agents
+        self.agent = CodeAgent(
+            model=self.model,
+            tools=[],  # No tools needed, only managed agents
+            managed_agents=[
+                self.planning_agent,
+                self.coding_agent,
+                self.testing_agent,
+            ],
+            verbosity_level=verbosity_level,
+            max_steps=max_steps,
+            name=self.name,
+            description=self.description,
+        )
+    def __call__(self, task: str, **kwargs) -> str:
+        """
+        Handle development management tasks as a managed agent.
+        Args:
+            task: The user's description of the application to build
+            **kwargs: Additional keyword arguments (ignored)
+        Returns:
+            String response containing the formatted workflow result
+        """
+        try:
+            # Run the development workflow
+            result = self.develop_application(task)
+            # Format the result for managed agent workflow
+            return self.format_result_as_markdown(result)
+        except Exception as e:
+            return f"❌ Development workflow failed: {str(e)}"
+    def develop_application(self, prompt: str) -> str:
+        """
+        Manage the full development workflow from planning to testing.
+        Args:
+            prompt: User's description of the application to build
+        Returns:
+            String containing the complete workflow results
+        """
+        try:
+            # Create comprehensive task for the manager workflow
+            manager_task = f"""You are a development manager coordinating a \
+team of specialists to build a Gradio application.
+The user wants: {prompt}
+Please coordinate the following workflow:
+1. **PLANNING PHASE**: Call the planning_agent to create a comprehensive \
+plan for this application
+2. **IMPLEMENTATION PHASE**: Call the coding_agent with the planning results \
+to implement the application
+3. **TESTING PHASE**: Call the testing_agent with the implementation results \
+to test the application
+4. **ITERATION**: If testing fails, call the coding_agent again with the \
+error details to fix issues
+5. **COMPLETION**: Continue until testing passes or maximum iterations reached
+Start by calling the planning_agent with the user's request."""
+            # Run the coordinated workflow
+            result = self.agent.run(manager_task)
+            # Return successful result with agent's response
+            return str(result)
+        except Exception as e:
+            return f"Manager workflow failed: {str(e)}"
+if __name__ == "__main__":
+    # Example usage
+    manager = GradioManagerAgent()
+    # Test the manager workflow using managed agent approach
+    result = manager("Create a simple calculator with basic arithmetic operations")
+    print("=== MANAGER RESULT ===")
+    print(result)

planning_agent.py CHANGED Viewed

@@ -7,25 +7,11 @@ This module provides a specialized planning agent that can:
 - Return an action, implementation and testing plan
 """
-from dataclasses import dataclass
-from smolagents import LiteLLMModel
 from settings import settings
-@dataclass
-class PlanningResult:
-    """Result of the planning agent containing structured plans."""
-    action_plan: str
-    implementation_plan: str
-    testing_plan: str
-    gradio_components: list[str]
-    estimated_complexity: str
-    dependencies: list[str]
 class GradioPlanningAgent:
     """
     A specialized CodeAgent for planning Gradio applications.
@@ -50,6 +36,24 @@ class GradioPlanningAgent:
             api_key: API key (uses settings if None)
             verbosity_level: Level of verbosity for agent output (uses settings if None)
         """
         # Use settings as defaults, but allow override
         self.model_id = model_id or settings.model_id
         self.api_base_url = api_base_url or settings.api_base_url
@@ -63,7 +67,15 @@ class GradioPlanningAgent:
             api_key=self.api_key,
         )
-        self.planning_prompt = """You are an expert software architect and Gradio \
 application developer. Your role is to create comprehensive, detailed plans \
 for building Gradio applications based on user requirements.
@@ -110,168 +122,32 @@ Be thorough, practical, and consider real-world constraints. Focus on creating \
 maintainable, user-friendly Gradio applications. Remember: NO CODE IMPLEMENTATION \
 at this stage - only architectural planning and structural design."""
-    def plan_application(self, prompt: str) -> PlanningResult:
         """
-        Create a comprehensive plan for a Gradio application based on the prompt.
         Args:
-            prompt: Natural language description of the program to build
         Returns:
-            PlanningResult containing structured plans
         """
-        # Enhanced prompt for the agent
-        user_prompt = f"""
 Create a comprehensive plan for building the following Gradio application:
-{prompt}
 Please provide detailed ACTION, IMPLEMENTATION, and TESTING plans following the \
 specified format. Consider all aspects of the application including UI/UX, \
-functionality, error handling, and deployment.
-"""
-        messages = [
-            {"role": "system", "content": self.planning_prompt},
-            {"role": "user", "content": user_prompt},
-        ]
-        response = self.model.generate(messages)
-        # Parse the response into structured result
-        return self._parse_planning_response(response.content)
-    def _parse_planning_response(self, response: str) -> PlanningResult:
-        """
-        Parse the agent's response into a structured PlanningResult.
-        Args:
-            response: Raw response from the planning agent
-        Returns:
-            Structured PlanningResult
-        """
-        # Initialize default values
-        action_plan = ""
-        implementation_plan = ""
-        testing_plan = ""
-        gradio_components = []
-        estimated_complexity = "Medium"
-        dependencies = ["gradio"]
-        # Parse sections from the response
-        sections = self._extract_sections(response)
-        action_plan = sections.get("ACTION PLAN", "")
-        implementation_plan = sections.get("IMPLEMENTATION PLAN", "")
-        testing_plan = sections.get("TESTING PLAN", "")
-        # Parse gradio components list
-        components_text = sections.get("GRADIO COMPONENTS", "")
-        if components_text:
-            gradio_components = self._extract_list_items(components_text)
-        # Parse complexity
-        complexity_text = sections.get("ESTIMATED COMPLEXITY", "")
-        if complexity_text:
-            estimated_complexity = complexity_text.strip()
-        # Parse dependencies
-        deps_text = sections.get("DEPENDENCIES", "")
-        if deps_text:
-            dependencies = ["gradio"] + self._extract_list_items(deps_text)
-            # Remove duplicates while preserving order
-            dependencies = list(dict.fromkeys(dependencies))
-        return PlanningResult(
-            action_plan=action_plan,
-            implementation_plan=implementation_plan,
-            testing_plan=testing_plan,
-            gradio_components=gradio_components,
-            estimated_complexity=estimated_complexity,
-            dependencies=dependencies,
-        )
-    def _extract_sections(self, text: str) -> dict[str, str]:
-        """Extract sections from markdown-formatted text."""
-        sections = {}
-        current_section = None
-        current_content = []
-        for line in text.split("\n"):
-            line = line.strip()
-            # Check if line is a section header
-            if line.startswith("## "):
-                # Save previous section if exists
-                if current_section and current_content:
-                    sections[current_section] = "\n".join(current_content).strip()
-                # Start new section
-                current_section = line[3:].strip()
-                current_content = []
-            elif current_section:
-                current_content.append(line)
-        # Save last section
-        if current_section and current_content:
-            sections[current_section] = "\n".join(current_content).strip()
-        return sections
-    def _extract_list_items(self, text: str) -> list[str]:
-        """Extract list items from text (handles bullet points, numbered lists, etc.)"""
-        items = []
-        for line in text.split("\n"):
-            line = line.strip()
-            if line:
-                # Remove common list prefixes
-                if line.startswith("- "):
-                    line = line[2:].strip()
-                elif line.startswith("* "):
-                    line = line[2:].strip()
-                elif ". " in line and line.split(".")[0].isdigit():
-                    line = line.split(".", 1)[1].strip()
-                if line:
-                    items.append(line)
-        return items
-    def format_plan_as_markdown(self, result: PlanningResult) -> str:
-        """
-        Format the planning result as a well-structured markdown document.
-        Args:
-            result: PlanningResult to format
-        Returns:
-            Markdown-formatted string
-        """
-        markdown = f"""# Gradio Application Plan
-## 📋 Action Plan
-{result.action_plan}
-## 🔧 Implementation Plan
-{result.implementation_plan}
-## 🧪 Testing Plan
-{result.testing_plan}
-## 🎨 Gradio Components
-{chr(10).join([f"- {component}" for component in result.gradio_components])}
-## ⚡ Estimated Complexity
-{result.estimated_complexity}
-## 📦 Dependencies
-{chr(10).join([f"- {dep}" for dep in result.dependencies])}
-"""
-        return markdown
 # Example usage and testing
@@ -280,9 +156,9 @@ if __name__ == "__main__":
     agent = GradioPlanningAgent()
     # Test with a simple calculator example
-    result = agent.plan_application(
         "Write a simple calculator app that can perform basic arithmetic operations"
     )
     print("=== PLANNING RESULT ===")
-    print(agent.format_plan_as_markdown(result))

 - Return an action, implementation and testing plan
 """
+from smolagents import LiteLLMModel, ToolCallingAgent
 from settings import settings
 class GradioPlanningAgent:
     """
     A specialized CodeAgent for planning Gradio applications.
             api_key: API key (uses settings if None)
             verbosity_level: Level of verbosity for agent output (uses settings if None)
         """
+        self.name = "planning_agent"
+        self.description = """Expert software architect specializing in Gradio \
+application planning.
+This agent creates comprehensive, detailed plans for building Gradio applications \
+based on user requirements.
+It provides:
+    - High-level action plans breaking down the implementation steps
+    - Detailed technical implementation plans using Python and Gradio
+    - Comprehensive testing strategies
+    - Analysis of required Gradio components and dependencies
+    - Complexity estimation for the project
+The agent focuses purely on planning and architecture - no actual code \
+implementation.
+Perfect for getting structured, well-thought-out plans before development \
+begins."""
         # Use settings as defaults, but allow override
         self.model_id = model_id or settings.model_id
         self.api_base_url = api_base_url or settings.api_base_url
             api_key=self.api_key,
         )
+        self.agent = ToolCallingAgent(
+            model=self.model,
+            tools=[],
+            verbosity_level=verbosity_level,
+            name=self.name,
+            description=self.description,
+        )
+        self.system_prompt = """You are an expert software architect and Gradio \
 application developer. Your role is to create comprehensive, detailed plans \
 for building Gradio applications based on user requirements.
 maintainable, user-friendly Gradio applications. Remember: NO CODE IMPLEMENTATION \
 at this stage - only architectural planning and structural design."""
+    def __call__(self, task: str, **kwargs) -> str:
         """
+        Handle planning tasks as a managed agent.
         Args:
+            task: The user's description of the application to build
+            **kwargs: Additional keyword arguments (ignored)
         Returns:
+            String response containing the formatted planning result
         """
+        full_prompt = f"""{self.system_prompt}
 Create a comprehensive plan for building the following Gradio application:
+{task}
 Please provide detailed ACTION, IMPLEMENTATION, and TESTING plans following the \
 specified format. Consider all aspects of the application including UI/UX, \
+functionality, error handling, and deployment. /no_think"""
+        try:
+            return self.agent.run(full_prompt)
+        except Exception as e:
+            return f"❌ Planning failed: {str(e)}"
 # Example usage and testing
     agent = GradioPlanningAgent()
     # Test with a simple calculator example
+    result = agent(
         "Write a simple calculator app that can perform basic arithmetic operations"
     )
     print("=== PLANNING RESULT ===")
+    print(result)

pyproject.toml CHANGED Viewed

@@ -46,4 +46,6 @@ members = [
     "sandbox/gradio_app",
     "sandbox/sandbox/gradio_app",
     "test_sandbox/test_project",
 ]

     "sandbox/gradio_app",
     "sandbox/sandbox/gradio_app",
     "test_sandbox/test_project",
+    "sandbox/calculator_app",
+    "sandbox/gradio_calculator",
 ]

settings.py CHANGED Viewed

@@ -23,6 +23,11 @@ class Settings:
         self.api_base_url: str | None = os.getenv("API_BASE_URL")
         self.api_key: str | None = os.getenv("API_KEY")
         # Coding Agent Settings
         self.code_model_id: str = os.getenv("CODE_MODEL_ID", self.model_id)
         self.coding_verbosity: int = int(os.getenv("CODING_VERBOSITY", "2"))
@@ -57,6 +62,14 @@ without a valid API key."
             print("   Set it in your .env file or as an environment variable.")
             print()
         if self.planning_verbosity not in [0, 1, 2]:
             print(
                 f"⚠️  Warning: PLANNING_VERBOSITY={self.planning_verbosity} is not \
@@ -92,6 +105,17 @@ in valid range [0, 1, 2]"
         return config
     def get_code_model_config(self) -> dict:
         """Get model configuration for the coding agent."""
         config = {"model_id": self.code_model_id, "api_key": self.api_key}
@@ -111,6 +135,13 @@ in valid range [0, 1, 2]"
             "debug": self.gradio_debug,
         }
     def get_planning_config(self) -> dict:
         """Get planning agent configuration."""
         return {
@@ -147,6 +178,7 @@ in valid range [0, 1, 2]"
         """String representation of settings (excluding sensitive data)."""
         return f"""Settings(
     model_id='{self.model_id}',
     code_model_id='{self.code_model_id}',
     test_model_id='{self.test_model_id}',
     api_key={'***' if self.api_key else 'None'},
@@ -154,6 +186,8 @@ in valid range [0, 1, 2]"
     gradio_host='{self.gradio_host}',
     gradio_port={self.gradio_port},
     gradio_debug={self.gradio_debug},
     planning_verbosity={self.planning_verbosity},
     max_planning_steps={self.max_planning_steps},
     coding_verbosity={self.coding_verbosity},
@@ -186,12 +220,18 @@ if __name__ == "__main__":
     print("Model Config:")
     print(settings.get_model_config())
     print()
     print("Code Model Config:")
     print(settings.get_code_model_config())
     print()
     print("Gradio Config:")
     print(settings.get_gradio_config())
     print()
     print("Planning Config:")
     print(settings.get_planning_config())
     print()

         self.api_base_url: str | None = os.getenv("API_BASE_URL")
         self.api_key: str | None = os.getenv("API_KEY")
+        # Manager Agent Settings
+        self.manager_model_id: str = os.getenv("MANAGER_MODEL_ID", self.model_id)
+        self.manager_verbosity: int = int(os.getenv("MANAGER_VERBOSITY", "1"))
+        self.max_manager_steps: int = int(os.getenv("MAX_MANAGER_STEPS", "15"))
         # Coding Agent Settings
         self.code_model_id: str = os.getenv("CODE_MODEL_ID", self.model_id)
         self.coding_verbosity: int = int(os.getenv("CODING_VERBOSITY", "2"))
             print("   Set it in your .env file or as an environment variable.")
             print()
+        if self.manager_verbosity not in [0, 1, 2]:
+            print(
+                f"⚠️  Warning: MANAGER_VERBOSITY={self.manager_verbosity} is not \
+in valid range [0, 1, 2]"
+            )
+            print("   Using default value of 1")
+            self.manager_verbosity = 1
         if self.planning_verbosity not in [0, 1, 2]:
             print(
                 f"⚠️  Warning: PLANNING_VERBOSITY={self.planning_verbosity} is not \
         return config
+    def get_manager_model_config(self) -> dict:
+        """Get model configuration for the manager agent."""
+        config = {"model_id": self.manager_model_id, "api_key": self.api_key}
+        if self.api_base_url:
+            config["api_base_url"] = self.api_base_url
+        if self.api_key:
+            config["api_key"] = self.api_key
+        return config
     def get_code_model_config(self) -> dict:
         """Get model configuration for the coding agent."""
         config = {"model_id": self.code_model_id, "api_key": self.api_key}
             "debug": self.gradio_debug,
         }
+    def get_manager_config(self) -> dict:
+        """Get manager agent configuration."""
+        return {
+            "verbosity_level": self.manager_verbosity,
+            "max_steps": self.max_manager_steps,
+        }
     def get_planning_config(self) -> dict:
         """Get planning agent configuration."""
         return {
         """String representation of settings (excluding sensitive data)."""
         return f"""Settings(
     model_id='{self.model_id}',
+    manager_model_id='{self.manager_model_id}',
     code_model_id='{self.code_model_id}',
     test_model_id='{self.test_model_id}',
     api_key={'***' if self.api_key else 'None'},
     gradio_host='{self.gradio_host}',
     gradio_port={self.gradio_port},
     gradio_debug={self.gradio_debug},
+    manager_verbosity={self.manager_verbosity},
+    max_manager_steps={self.max_manager_steps},
     planning_verbosity={self.planning_verbosity},
     max_planning_steps={self.max_planning_steps},
     coding_verbosity={self.coding_verbosity},
     print("Model Config:")
     print(settings.get_model_config())
     print()
+    print("Manager Model Config:")
+    print(settings.get_manager_model_config())
+    print()
     print("Code Model Config:")
     print(settings.get_code_model_config())
     print()
     print("Gradio Config:")
     print(settings.get_gradio_config())
     print()
+    print("Manager Config:")
+    print(settings.get_manager_config())
+    print()
     print("Planning Config:")
     print(settings.get_planning_config())
     print()

test_manager_agent.py ADDED Viewed

	@@ -0,0 +1,219 @@

+"""
+Test cases for the Gradio Manager Agent.
+This module contains unit tests and integration tests for the manager agent
+functionality, including managed agent coordination and workflow testing.
+"""
+import unittest
+from unittest.mock import Mock, patch
+from manager_agent import (
+    GradioManagerAgent,
+    ManagerResult,
+)
+class TestGradioManagerAgent(unittest.TestCase):
+    """Test the main GradioManagerAgent class."""
+    def setUp(self):
+        """Set up test fixtures."""
+        # Mock settings
+        self.mock_settings_patcher = patch("manager_agent.settings")
+        self.mock_settings = self.mock_settings_patcher.start()
+        # Set up mock settings
+        self.mock_settings.manager_model_id = "test-manager-model"
+        self.mock_settings.model_id = "test-model"
+        self.mock_settings.code_model_id = "test-code-model"
+        self.mock_settings.test_model_id = "test-test-model"
+        self.mock_settings.api_base_url = "http://test.api"
+        self.mock_settings.api_key = "test-key"
+        self.mock_settings.manager_verbosity = 1
+        self.mock_settings.planning_verbosity = 1
+        self.mock_settings.coding_verbosity = 1
+        self.mock_settings.testing_verbosity = 1
+        self.mock_settings.max_manager_steps = 10
+        self.mock_settings.max_coding_steps = 15
+        self.mock_settings.max_testing_steps = 10
+    def tearDown(self):
+        """Clean up test fixtures."""
+        self.mock_settings_patcher.stop()
+    @patch("manager_agent.LiteLLMModel")
+    @patch("manager_agent.ToolCallingAgent")
+    @patch("manager_agent.GradioPlanningAgent")
+    @patch("manager_agent.GradioCodingAgent")
+    @patch("manager_agent.GradioTestingAgent")
+    def test_manager_agent_initialization(
+        self,
+        mock_testing_agent,
+        mock_coding_agent,
+        mock_planning_agent,
+        mock_tool_calling_agent,
+        mock_litellm_model,
+    ):
+        """Test manager agent initialization."""
+        # Mock the managed agents
+        mock_planning_instance = Mock()
+        mock_planning_instance.name = "planning_agent"
+        mock_planning_instance.description = "Planning agent"
+        mock_planning_agent.return_value = mock_planning_instance
+        mock_coding_instance = Mock()
+        mock_coding_instance.name = "coding_agent"
+        mock_coding_instance.description = "Coding agent"
+        mock_coding_agent.return_value = mock_coding_instance
+        mock_testing_instance = Mock()
+        mock_testing_instance.name = "testing_agent"
+        mock_testing_instance.description = "Testing agent"
+        mock_testing_agent.return_value = mock_testing_instance
+        # Create manager agent
+        manager = GradioManagerAgent()
+        # Verify initialization
+        self.assertIsInstance(manager, GradioManagerAgent)
+        self.assertEqual(manager.max_iterations, 3)
+        mock_litellm_model.assert_called_once()
+        mock_tool_calling_agent.assert_called_once()
+    @patch("manager_agent.LiteLLMModel")
+    @patch("manager_agent.ToolCallingAgent")
+    @patch("manager_agent.GradioPlanningAgent")
+    @patch("manager_agent.GradioCodingAgent")
+    @patch("manager_agent.GradioTestingAgent")
+    def test_develop_application_success(
+        self,
+        mock_testing_agent,
+        mock_coding_agent,
+        mock_planning_agent,
+        mock_tool_calling_agent,
+        mock_litellm_model,
+    ):
+        """Test successful application development workflow."""
+        # Mock the managed agents
+        mock_planning_instance = Mock()
+        mock_planning_instance.name = "planning_agent"
+        mock_planning_instance.description = "Planning agent"
+        mock_planning_agent.return_value = mock_planning_instance
+        mock_coding_instance = Mock()
+        mock_coding_instance.name = "coding_agent"
+        mock_coding_instance.description = "Coding agent"
+        mock_coding_agent.return_value = mock_coding_instance
+        mock_testing_instance = Mock()
+        mock_testing_instance.name = "testing_agent"
+        mock_testing_instance.description = "Testing agent"
+        mock_testing_agent.return_value = mock_testing_instance
+        # Mock the main agent
+        mock_agent_instance = Mock()
+        mock_agent_instance.run.return_value = "Workflow completed successfully"
+        mock_tool_calling_agent.return_value = mock_agent_instance
+        # Create manager and test workflow
+        manager = GradioManagerAgent()
+        result = manager.develop_application("Create a simple calculator")
+        # Verify the result
+        self.assertIsInstance(result, ManagerResult)
+        self.assertTrue(result.success)
+        self.assertEqual(result.iterations, 1)
+        self.assertIn("Workflow completed successfully", result.final_message)
+    @patch("manager_agent.LiteLLMModel")
+    @patch("manager_agent.ToolCallingAgent")
+    @patch("manager_agent.GradioPlanningAgent")
+    @patch("manager_agent.GradioCodingAgent")
+    @patch("manager_agent.GradioTestingAgent")
+    def test_develop_application_failure(
+        self,
+        mock_testing_agent,
+        mock_coding_agent,
+        mock_planning_agent,
+        mock_tool_calling_agent,
+        mock_litellm_model,
+    ):
+        """Test application development workflow failure handling."""
+        # Mock the managed agents
+        mock_planning_instance = Mock()
+        mock_planning_instance.name = "planning_agent"
+        mock_planning_instance.description = "Planning agent"
+        mock_planning_agent.return_value = mock_planning_instance
+        mock_coding_instance = Mock()
+        mock_coding_instance.name = "coding_agent"
+        mock_coding_instance.description = "Coding agent"
+        mock_coding_agent.return_value = mock_coding_instance
+        mock_testing_instance = Mock()
+        mock_testing_instance.name = "testing_agent"
+        mock_testing_instance.description = "Testing agent"
+        mock_testing_agent.return_value = mock_testing_instance
+        # Mock the main agent to raise an exception
+        mock_agent_instance = Mock()
+        mock_agent_instance.run.side_effect = Exception("Workflow failed")
+        mock_tool_calling_agent.return_value = mock_agent_instance
+        # Create manager and test workflow
+        manager = GradioManagerAgent()
+        result = manager.develop_application("Create a simple calculator")
+        # Verify the error handling
+        self.assertIsInstance(result, ManagerResult)
+        self.assertFalse(result.success)
+        self.assertEqual(result.iterations, 0)
+        self.assertIn("Manager workflow failed", result.final_message)
+        self.assertIn("Workflow failed", result.error_messages)
+    def test_format_result_as_markdown_success(self):
+        """Test formatting a successful result as markdown."""
+        result = ManagerResult(
+            success=True,
+            planning_result=None,
+            coding_result=None,
+            testing_result=None,
+            iterations=2,
+            final_message="All steps completed successfully",
+            error_messages=[],
+        )
+        manager = GradioManagerAgent()
+        markdown = manager.format_result_as_markdown(result)
+        self.assertIn("Development Workflow ✅", markdown)
+        self.assertIn("Status**: Success", markdown)
+        self.assertIn("Iterations**: 2", markdown)
+        self.assertIn("All steps completed successfully", markdown)
+    def test_format_result_as_markdown_failure(self):
+        """Test formatting a failed result as markdown."""
+        result = ManagerResult(
+            success=False,
+            planning_result=None,
+            coding_result=None,
+            testing_result=None,
+            iterations=1,
+            final_message="Workflow failed at planning stage",
+            error_messages=["Planning agent error", "Configuration issue"],
+        )
+        manager = GradioManagerAgent()
+        markdown = manager.format_result_as_markdown(result)
+        self.assertIn("Development Workflow ❌", markdown)
+        self.assertIn("Status**: Failed", markdown)
+        self.assertIn("Iterations**: 1", markdown)
+        self.assertIn("Workflow failed at planning stage", markdown)
+        self.assertIn("Planning agent error", markdown)
+        self.assertIn("Configuration issue", markdown)
+if __name__ == "__main__":
+    unittest.main()

test_testing_agent.py CHANGED Viewed

@@ -16,7 +16,6 @@ from testing_agent import (
     GradioTestingAgent,
     TestingResult,
     check_app_health,
-    create_gradio_testing_agent,
     run_gradio_app,
     setup_venv_with_uv,
     stop_gradio_processes,
@@ -265,20 +264,5 @@ class TestGradioTestingAgent(unittest.TestCase):
         self.assertIn("/tmp/test.png", report)
-class TestTestingAgentFactory(unittest.TestCase):
-    """Test the factory function for creating testing agents."""
-    @patch("testing_agent.GradioTestingAgent")
-    def test_create_gradio_testing_agent(self, mock_agent_class):
-        """Test creating a testing agent with factory function."""
-        mock_agent = Mock()
-        mock_agent_class.return_value = mock_agent
-        agent = create_gradio_testing_agent()
-        self.assertEqual(agent, mock_agent)
-        mock_agent_class.assert_called_once_with()
 if __name__ == "__main__":
     unittest.main()

     GradioTestingAgent,
     TestingResult,
     check_app_health,
     run_gradio_app,
     setup_venv_with_uv,
     stop_gradio_processes,
         self.assertIn("/tmp/test.png", report)
 if __name__ == "__main__":
     unittest.main()

testing_agent.py CHANGED Viewed

@@ -12,79 +12,13 @@ This module provides a specialized testing agent that can:
 import os
 import subprocess
 import time
-from dataclasses import dataclass
 from pathlib import Path
 from smolagents import LiteLLMModel, ToolCallingAgent, tool
-from coding_agent import CodingResult
 from settings import settings
-@dataclass
-class TestingResult:
-    """Result of the testing agent containing validation details."""
-    success: bool
-    project_path: str
-    setup_successful: bool
-    server_launched: bool
-    ui_accessible: bool
-    test_cases_passed: list[str]
-    test_cases_failed: list[str]
-    error_messages: list[str]
-    screenshots: list[str]
-    performance_metrics: dict[str, float]
-    logs: str
-@tool
-def setup_venv_with_uv(project_path: str) -> str:
-    """
-    Set up a virtual environment using uv for the Gradio project.
-    Args:
-        project_path: Path to the Gradio project directory
-    Returns:
-        Status message indicating success or failure
-    """
-    try:
-        # Change to project directory
-        original_cwd = os.getcwd()
-        project_dir = Path(project_path)
-        if not project_dir.exists():
-            return f"Error: Project directory {project_path} does not exist"
-        os.chdir(project_dir)
-        # Install dependencies using uv
-        result = subprocess.run(
-            ["uv", "sync"],
-            capture_output=True,
-            text=True,
-            timeout=300,  # 5 minutes timeout
-        )
-        os.chdir(original_cwd)
-        if result.returncode == 0:
-            return f"Successfully set up virtual environment for {project_path}"
-        else:
-            return f"Error setting up venv: {result.stderr}"
-    except subprocess.TimeoutExpired:
-        os.chdir(original_cwd)
-        return "Error: uv sync timed out after 5 minutes"
-    except FileNotFoundError:
-        os.chdir(original_cwd)
-        return "Error: uv command not found. Please install uv first."
-    except Exception as e:
-        os.chdir(original_cwd)
-        return f"Unexpected error: {str(e)}"
 @tool
 def run_gradio_app(project_path: str, timeout: int = 30) -> str:
     """
@@ -106,7 +40,7 @@ def run_gradio_app(project_path: str, timeout: int = 30) -> str:
         # Start the Gradio app in background
         process = subprocess.Popen(
-            ["uv", "run", "python", "app.py"],
             cwd=project_dir,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
@@ -291,6 +225,83 @@ def stop_gradio_processes() -> str:
         return f"Error stopping processes: {str(e)}"
 class GradioTestingAgent:
     """
     A specialized ToolCallingAgent for testing Gradio applications.
@@ -317,6 +328,21 @@ class GradioTestingAgent:
             verbosity_level: Level of verbosity for agent output (uses settings if None)
             max_steps: Maximum number of testing steps (uses settings if None)
         """
         # Use settings as defaults, but allow override
         self.model_id = model_id or settings.test_model_id
         self.api_base_url = api_base_url or settings.api_base_url
@@ -333,11 +359,11 @@ class GradioTestingAgent:
         # Define the tools for testing
         testing_tools = [
-            setup_venv_with_uv,
             run_gradio_app,
             check_app_health,
             test_gradio_ui_basic,
             stop_gradio_processes,
         ]
         # Initialize the ToolCallingAgent
@@ -346,270 +372,139 @@ class GradioTestingAgent:
             tools=testing_tools,
             verbosity_level=verbosity_level,
             max_steps=max_steps,
         )
         self.sandbox_path = Path("sandbox")
-    def test_application(self, coding_result: CodingResult) -> TestingResult:
         """
-        Test the Gradio application created by the coding agent.
         Args:
-            coding_result: The result from the coding agent
         Returns:
-            TestingResult containing comprehensive test information
         """
-        if not coding_result.success:
-            return TestingResult(
-                success=False,
-                project_path=coding_result.project_path,
-                setup_successful=False,
-                server_launched=False,
-                ui_accessible=False,
-                test_cases_passed=[],
-                test_cases_failed=["Coding agent failed to create application"],
-                error_messages=coding_result.error_messages,
-                screenshots=[],
-                performance_metrics={},
-                logs="Testing skipped due to coding failure",
-            )
-        project_path = coding_result.project_path
-        # Create comprehensive test prompt
-        test_prompt = f"""
-You are a specialized testing agent for Gradio applications. Your task is to \
-thoroughly test the Gradio application located at: {project_path}
-Please perform the following testing steps in order:
-1. **Environment Setup**: Use setup_venv_with_uv to ensure the virtual environment \
-is properly configured
-2. **Application Launch**: Use run_gradio_app to start the Gradio application
-3. **Health Check**: Use check_app_health to verify the application is responding
-4. **UI Testing**: Use test_gradio_ui_basic to test the user interface components
-5. **Cleanup**: Use stop_gradio_processes to clean up after testing
-For each step, report:
-- Whether the step succeeded or failed
-- Any error messages encountered
-- Performance observations (loading times, responsiveness)
-- Screenshots taken (if any)
-If any critical step fails, still attempt the remaining steps where possible to \
-gather maximum diagnostic information.
-The application should be a functional Gradio app with interactive components. Test for:
-- Proper page loading
-- Presence of Gradio components
-- Interactive elements (buttons, inputs, etc.)
-- Basic functionality
-Provide a comprehensive summary of all test results at the end.
-        """
-        try:
-            # Run the testing workflow
-            result = self.agent.run(test_prompt)
-            # Parse the agent's response to create structured result
-            return self._parse_testing_response(result, project_path)
-        except Exception as e:
-            return TestingResult(
-                success=False,
-                project_path=project_path,
-                setup_successful=False,
-                server_launched=False,
-                ui_accessible=False,
-                test_cases_passed=[],
-                test_cases_failed=["Testing agent execution failed"],
-                error_messages=[str(e)],
-                screenshots=[],
-                performance_metrics={},
-                logs=f"Testing agent error: {str(e)}",
-            )
-    def _parse_testing_response(
-        self, response: str, project_path: str
-    ) -> TestingResult:
-        """
-        Parse the agent's testing response into a structured TestingResult.
-        Args:
-            response: Raw response from the testing agent
-            project_path: Path to the tested project
-        Returns:
-            Structured TestingResult
-        """
-        # Initialize default values
-        setup_successful = False
-        server_launched = False
-        ui_accessible = False
-        test_cases_passed = []
-        test_cases_failed = []
-        error_messages = []
-        screenshots = []
-        performance_metrics = {}
-        # Simple parsing logic based on common success/failure indicators
-        response_lower = response.lower()
-        # Check for setup success
-        if "successfully set up virtual environment" in response_lower:
-            setup_successful = True
-            test_cases_passed.append("Virtual environment setup")
-        elif "error setting up venv" in response_lower:
-            test_cases_failed.append("Virtual environment setup")
-        # Check for server launch
-        if "successfully started gradio app" in response_lower:
-            server_launched = True
-            test_cases_passed.append("Gradio application launch")
-        elif "error running gradio app" in response_lower:
-            test_cases_failed.append("Gradio application launch")
-        # Check for health status
-        if "application is healthy" in response_lower:
-            ui_accessible = True
-            test_cases_passed.append("Application health check")
-        elif "cannot connect to" in response_lower:
-            test_cases_failed.append("Application health check")
-        # Check for UI testing
-        if (
-            "page loaded successfully" in response_lower
-            and "gradio container found" in response_lower
-        ):
-            test_cases_passed.append("UI component testing")
-        elif "error during ui testing" in response_lower:
-            test_cases_failed.append("UI component testing")
-        # Look for screenshots
-        if "screenshot saved" in response_lower:
-            screenshots.append("/tmp/gradio_test_screenshot.png")
-        # Extract performance metrics if mentioned
-        if "response time:" in response_lower:
-            # Simple regex to extract response time
-            import re
-            time_match = re.search(r"response time: ([\d.]+)s", response_lower)
-            if time_match:
-                performance_metrics["response_time_seconds"] = float(
-                    time_match.group(1)
-                )
-        # Determine overall success
-        success = (
-            setup_successful
-            and server_launched
-            and ui_accessible
-            and len(test_cases_failed) == 0
-        )
-        return TestingResult(
-            success=success,
-            project_path=project_path,
-            setup_successful=setup_successful,
-            server_launched=server_launched,
-            ui_accessible=ui_accessible,
-            test_cases_passed=test_cases_passed,
-            test_cases_failed=test_cases_failed,
-            error_messages=error_messages,
-            screenshots=screenshots,
-            performance_metrics=performance_metrics,
-            logs=response,
-        )
-    def generate_test_report(self, testing_result: TestingResult) -> str:
-        """
-        Generate a comprehensive test report in markdown format.
-        Args:
-            testing_result: The result from testing the application
-        Returns:
-            Markdown-formatted test report
-        """
-        status_emoji = "✅" if testing_result.success else "❌"
-        report = f"""
-# Gradio Application Test Report {status_emoji}
-## Summary
-- **Project Path**: `{testing_result.project_path}`
-- **Overall Success**: {testing_result.success}
-- **Environment Setup**: {"✅" if testing_result.setup_successful else "❌"}
-- **Server Launch**: {"✅" if testing_result.server_launched else "❌"}
-- **UI Accessibility**: {"✅" if testing_result.ui_accessible else "❌"}
-## Test Cases
-### Passed ({len(testing_result.test_cases_passed)})
-{chr(10).join(f"- ✅ {case}" for case in testing_result.test_cases_passed)}
-### Failed ({len(testing_result.test_cases_failed)})
-{chr(10).join(f"- ❌ {case}" for case in testing_result.test_cases_failed)}
-## Performance Metrics
-{chr(10).join(f"- **{key}**: {value}" for key, value in \
-testing_result.performance_metrics.items()) if testing_result.performance_metrics else \
-"No performance metrics collected"}
-## Screenshots
-{chr(10).join(f"- {screenshot}" for screenshot in testing_result.screenshots) \
-if testing_result.screenshots else "No screenshots captured"}
-## Error Messages
-{chr(10).join(f"- {error}" for error in testing_result.error_messages) \
-if testing_result.error_messages else "No errors reported"}
-## Detailed Logs
 ```
-{testing_result.logs}
 ```
----
-*Report generated by GradioTestingAgent*
-        """
-        return report.strip()
-def create_gradio_testing_agent() -> GradioTestingAgent:
-    """
-    Create a Gradio testing agent with default settings.
-    Returns:
-        Configured GradioTestingAgent instance
-    """
-    return GradioTestingAgent()
 if __name__ == "__main__":
     # Example usage
-    from coding_agent import create_gradio_coding_agent
     from planning_agent import GradioPlanningAgent
     # Create agents
     planning_agent = GradioPlanningAgent()
-    coding_agent = create_gradio_coding_agent()
-    testing_agent = create_gradio_testing_agent()
-    # Example workflow
-    print("Planning a simple calculator app...")
-    plan = planning_agent.plan_application(
-        "Create a simple calculator with basic arithmetic operations"
     )
-    print("Implementing the application...")
-    implementation = coding_agent.implement_application(plan)
-    print("Testing the application...")
-    test_results = testing_agent.test_application(implementation)
-    print("Test Report:")
-    print(testing_agent.generate_test_report(test_results))

 import os
 import subprocess
 import time
 from pathlib import Path
 from smolagents import LiteLLMModel, ToolCallingAgent, tool
 from settings import settings
 @tool
 def run_gradio_app(project_path: str, timeout: int = 30) -> str:
     """
         # Start the Gradio app in background
         process = subprocess.Popen(
+            ["uv", "run", "gradio", "app.py"],
             cwd=project_dir,
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
         return f"Error stopping processes: {str(e)}"
+@tool
+def uv_add_packages(project_path: str, packages: str) -> str:
+    """
+    Add missing packages to the project using uv add.
+    Args:
+        project_path: Path to the project directory containing pyproject.toml
+        packages: Space-separated list of package names to add \
+        (e.g., "requests pandas numpy")
+    Returns:
+        Status message indicating success or failure of adding packages
+    """
+    try:
+        # Change to project directory
+        original_cwd = os.getcwd()
+        project_dir = Path(project_path)
+        if not project_dir.exists():
+            return f"Error: Project directory {project_path} does not exist"
+        # Check if pyproject.toml exists
+        pyproject_file = project_dir / "pyproject.toml"
+        if not pyproject_file.exists():
+            return f"Error: pyproject.toml not found in {project_path}"
+        os.chdir(project_dir)
+        # Split packages and add them one by one for better error handling
+        package_list = packages.strip().split()
+        if not package_list:
+            return "Error: No packages specified to add"
+        added_packages = []
+        failed_packages = []
+        for package in package_list:
+            if not package.strip():
+                continue
+            result = subprocess.run(
+                ["uv", "add", package.strip()],
+                capture_output=True,
+                text=True,
+                timeout=120,  # 2 minutes timeout per package
+            )
+            if result.returncode == 0:
+                added_packages.append(package.strip())
+            else:
+                failed_packages.append(f"{package.strip()} ({result.stderr.strip()})")
+        os.chdir(original_cwd)
+        # Prepare status message
+        status_parts = []
+        if added_packages:
+            status_parts.append(f"Successfully added: {', '.join(added_packages)}")
+        if failed_packages:
+            status_parts.append(f"Failed to add: {'; '.join(failed_packages)}")
+        if not status_parts:
+            return "No packages were processed"
+        return "; ".join(status_parts)
+    except subprocess.TimeoutExpired:
+        os.chdir(original_cwd)
+        return f"Error: uv add timed out while adding packages: {packages}"
+    except FileNotFoundError:
+        os.chdir(original_cwd)
+        return "Error: uv command not found. Please install uv first."
+    except Exception as e:
+        os.chdir(original_cwd)
+        return f"Unexpected error adding packages: {str(e)}"
 class GradioTestingAgent:
     """
     A specialized ToolCallingAgent for testing Gradio applications.
             verbosity_level: Level of verbosity for agent output (uses settings if None)
             max_steps: Maximum number of testing steps (uses settings if None)
         """
+        self.name = "testing_agent"
+        self.description = """Expert QA engineer specializing in Gradio application \
+testing and validation.
+This agent thoroughly tests Gradio applications by:
+- Setting up virtual environments using uv
+- Launching and health-checking Gradio applications
+- Performing basic UI testing with browser automation
+- Validating functionality and responsiveness
+- Generating comprehensive test reports with screenshots
+- Providing detailed error analysis and debugging information
+Returns structured test results indicating success/failure with specific details \
+about what works and what needs fixing."""
         # Use settings as defaults, but allow override
         self.model_id = model_id or settings.test_model_id
         self.api_base_url = api_base_url or settings.api_base_url
         # Define the tools for testing
         testing_tools = [
             run_gradio_app,
             check_app_health,
             test_gradio_ui_basic,
             stop_gradio_processes,
+            uv_add_packages,
         ]
         # Initialize the ToolCallingAgent
             tools=testing_tools,
             verbosity_level=verbosity_level,
             max_steps=max_steps,
+            name=self.name,
+            description=self.description,
         )
         self.sandbox_path = Path("sandbox")
+    def __call__(self, task: str, **kwargs) -> str:
         """
+        Handle testing tasks as a managed agent.
         Args:
+            task: The coding result or task description
+            **kwargs: Additional keyword arguments (ignored)
         Returns:
+            String response containing the formatted testing result
         """
+        full_prompt = f"""You are an expert QA engineer specializing in \
+Gradio application testing and validation.
+**CONTEXT:**
+You received this message from an expert Python developer:
 ```
+{task}
 ```
+**YOUR MISSION:**
+Perform comprehensive testing of the Gradio application and provide a detailed \
+quality assurance report.
+**TESTING PROTOCOL:**
+1. **Application Launch**: Use `run_gradio_app` to start the application
+2. **Dependency Management**: If missing packages are detected, use `uv_add_packages` \
+to add them
+3. **Health Check**: Use `check_app_health` to verify HTTP response
+4. **UI Testing**: Use `test_gradio_ui_basic` for basic interface validation
+5. **Cleanup**: Use `stop_gradio_processes` to clean up after testing
+**IMPORTANT CONSTRAINTS:**
+- You can ONLY access files in the `./sandbox/` directory
+- All projects to test will be located in subdirectories of `./sandbox/`
+- Use relative paths starting with `./sandbox/[project_name]`
+**REPORT FORMAT:**
+Structure your final report as follows:
+## 🧪 GRADIO APPLICATION TEST REPORT
+### 📋 Test Summary
+- **Application**: [App name/purpose]
+- **Test Status**: ✅ PASSED / ❌ FAILED / ⚠️ PARTIAL
+- **Test Duration**: [Time taken]
+- **Key Findings**: [Brief summary]
+### 🔧 Environment Setup
+- **Virtual Environment**: [Status and details]
+- **Dependencies**: [Installation results]
+- **Setup Issues**: [Any problems encountered]
+### 🚀 Application Launch
+- **Startup Status**: [Success/failure]
+- **Server URL**: [Access URL if successful]
+- **Launch Time**: [Time to start]
+- **Startup Logs**: [Relevant output]
+### 🏥 Health Check
+- **HTTP Response**: [Status code and response time]
+- **Accessibility**: [Can the app be reached]
+- **Performance**: [Response times, any issues]
+### 🖥️ User Interface Testing
+- **Page Load**: [Success/failure]
+- **Gradio Container**: [Found/not found]
+- **Interactive Elements**: [Count and types]
+- **UI Responsiveness**: [Any issues]
+- **Screenshots**: [Paths to saved images]
+### ⚠️ Issues Found
+- [List any problems, bugs, or concerns]
+- [Include severity levels: CRITICAL, HIGH, MEDIUM, LOW]
+- [Provide specific error messages and context]
+### ✅ Recommendations
+- [Suggestions for improvements]
+- [Required fixes for critical issues]
+- [Performance optimization suggestions]
+### 📊 Test Metrics
+- **Total Tests**: [Number]
+- **Passed**: [Number]
+- **Failed**: [Number]
+- **Success Rate**: [Percentage]
+**TESTING GUIDELINES:**
+- Always clean up processes after testing
+- Capture screenshots when possible for documentation
+- Report specific error messages, not just generic failures
+- Distinguish between setup issues vs. application issues
+- Test both functionality and user experience
+- Provide actionable feedback for developers
+**ERROR HANDLING:**
+- If environment setup fails, provide specific uv/dependency guidance
+- If missing packages are detected, use `uv_add_packages` to add them automatically
+- If app won't start, analyze logs for root cause and check for import errors
+- If UI testing fails, check if it's a browser/selenium issue vs. app issue
+- Always attempt cleanup even if earlier steps fail
+Begin testing now and provide your comprehensive report."""
+        try:
+            return self.agent.run(full_prompt)
+        except Exception as e:
+            return f"❌ Testing failed: {str(e)}"
 if __name__ == "__main__":
     # Example usage
+    from coding_agent import GradioCodingAgent
     from planning_agent import GradioPlanningAgent
     # Create agents
     planning_agent = GradioPlanningAgent()
+    coding_agent = GradioCodingAgent()
+    testing_agent = GradioTestingAgent()
+    plan_result = planning_agent(
+        "Create a simple calculator with basic arithmetic operations /no_think"
     )
+    implementation_result = coding_agent(plan_result)
+    test_result = testing_agent(implementation_result)
+    print("=== TEST REPORT ===")
+    print(test_result)

uv.lock CHANGED Viewed

@@ -8,7 +8,7 @@ resolution-markers = [
 [manifest]
 members = [
-    "gradio-app",
     "likable",
 ]
@@ -428,9 +428,9 @@ wheels = [
 ]
 [[package]]
-name = "gradio-app"
 version = "0.1.0"
-source = { virtual = "sandbox/gradio_app" }
 dependencies = [
     { name = "gradio" },
 ]

 [manifest]
 members = [
+    "gradio-calculator",
     "likable",
 ]
 ]
 [[package]]
+name = "gradio-calculator"
 version = "0.1.0"
+source = { virtual = "sandbox/gradio_calculator" }
 dependencies = [
     { name = "gradio" },
 ]