Spaces:
Sleeping
Sleeping
jens.luecke
commited on
Commit
ยท
f224834
1
Parent(s):
dabb6ef
Refactor application architecture to use a manager agent
Browse files- Replaced individual coding and planning agents with a new `GradioManagerAgent` that orchestrates the entire development workflow.
- Updated `app.py` to utilize the manager agent for generating AI responses, streamlining the process from planning to implementation.
- Introduced a new `manager_agent.py` file to handle the coordination of planning, coding, and testing agents.
- Enhanced `settings.py` to include configurations for the manager agent.
- Updated tests to cover the new manager agent functionality and ensure reliability.
- Improved project structure and dependencies in `pyproject.toml` to reflect changes in the agent architecture.
- app.py +13 -143
- coding_agent.py +161 -258
- manager_agent.py +172 -0
- planning_agent.py +42 -166
- pyproject.toml +2 -0
- settings.py +40 -0
- test_manager_agent.py +219 -0
- test_testing_agent.py +0 -16
- testing_agent.py +202 -307
- uv.lock +3 -3
app.py
CHANGED
|
@@ -4,165 +4,35 @@ import sys
|
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
|
| 7 |
-
from
|
| 8 |
-
from planning_agent import GradioPlanningAgent
|
| 9 |
from settings import settings
|
| 10 |
from utils import load_file
|
| 11 |
|
| 12 |
gr.NO_RELOAD = False
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def get_planning_agent():
|
| 20 |
-
"""Get or initialize the planning agent (lazy loading)."""
|
| 21 |
-
global planning_agent
|
| 22 |
-
if planning_agent is None:
|
| 23 |
-
try:
|
| 24 |
-
planning_agent = GradioPlanningAgent()
|
| 25 |
-
except Exception as e:
|
| 26 |
-
print(f"Error initializing planning agent: {e}")
|
| 27 |
-
return None
|
| 28 |
-
return planning_agent
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
def get_coding_agent():
|
| 32 |
-
"""Get or initialize the coding agent (lazy loading)."""
|
| 33 |
-
global coding_agent
|
| 34 |
-
if coding_agent is None:
|
| 35 |
-
try:
|
| 36 |
-
coding_agent = GradioCodingAgent()
|
| 37 |
-
except Exception as e:
|
| 38 |
-
print(f"Error initializing coding agent: {e}")
|
| 39 |
-
return None
|
| 40 |
-
return coding_agent
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
# Enhanced AI response using both planning and coding agents
|
| 44 |
-
def ai_response_with_planning_and_coding(message, history):
|
| 45 |
-
"""Generate AI response using the planning agent for planning and \
|
| 46 |
coding agent for implementation."""
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
|
| 51 |
-
if
|
| 52 |
# Fallback to mock response if planning agent fails to initialize
|
| 53 |
response = (
|
| 54 |
-
"Sorry, the
|
| 55 |
"Please check your API_KEY environment variable."
|
| 56 |
)
|
| 57 |
-
history.append({"role": "user", "content": message})
|
| 58 |
-
history.append({"role": "assistant", "content": response})
|
| 59 |
-
return history, ""
|
| 60 |
-
|
| 61 |
-
if coding_agent_instance is None:
|
| 62 |
-
# Fallback if coding agent fails to initialize
|
| 63 |
-
response = (
|
| 64 |
-
"Sorry, the coding agent is not available. "
|
| 65 |
-
"Planning is available but implementation will be limited."
|
| 66 |
-
)
|
| 67 |
-
history.append({"role": "user", "content": message})
|
| 68 |
history.append({"role": "assistant", "content": response})
|
| 69 |
return history, ""
|
| 70 |
|
| 71 |
try:
|
| 72 |
-
|
| 73 |
-
history.append({"role": "
|
| 74 |
-
history.append(
|
| 75 |
-
{"role": "assistant", "content": "๐ฏ Starting to plan your application..."}
|
| 76 |
-
)
|
| 77 |
-
|
| 78 |
-
planning_result = planning_agent_instance.plan_application(message)
|
| 79 |
-
|
| 80 |
-
# Format the planning response
|
| 81 |
-
action_summary = (
|
| 82 |
-
planning_result.action_plan[:300] + "..."
|
| 83 |
-
if len(planning_result.action_plan) > 300
|
| 84 |
-
else planning_result.action_plan
|
| 85 |
-
)
|
| 86 |
-
|
| 87 |
-
components_list = chr(10).join(
|
| 88 |
-
[f"โข {comp}" for comp in planning_result.gradio_components[:5]]
|
| 89 |
-
)
|
| 90 |
-
dependencies_list = chr(10).join(
|
| 91 |
-
[f"โข {dep}" for dep in planning_result.dependencies[:5]]
|
| 92 |
-
)
|
| 93 |
-
|
| 94 |
-
planning_response = f"""โ
**Planning Complete!**
|
| 95 |
-
|
| 96 |
-
**Complexity**: {planning_result.estimated_complexity}
|
| 97 |
-
|
| 98 |
-
**Key Gradio Components Needed**:
|
| 99 |
-
{components_list}
|
| 100 |
-
|
| 101 |
-
**Dependencies Required**:
|
| 102 |
-
{dependencies_list}
|
| 103 |
-
|
| 104 |
-
**High-Level Action Plan**:
|
| 105 |
-
{action_summary}
|
| 106 |
-
|
| 107 |
-
๐ **Now starting implementation...**"""
|
| 108 |
-
|
| 109 |
-
history.append({"role": "assistant", "content": planning_response})
|
| 110 |
-
|
| 111 |
-
# Step 2: Use the coding agent for implementation
|
| 112 |
-
history.append(
|
| 113 |
-
{
|
| 114 |
-
"role": "assistant",
|
| 115 |
-
"content": "โก Implementing your application with proper \
|
| 116 |
-
project structure...",
|
| 117 |
-
}
|
| 118 |
-
)
|
| 119 |
-
|
| 120 |
-
coding_result = coding_agent_instance.iterative_implementation(planning_result)
|
| 121 |
-
|
| 122 |
-
# Format the implementation response
|
| 123 |
-
if coding_result.success:
|
| 124 |
-
implementation_response = f"""โ
**Implementation Complete!**
|
| 125 |
-
|
| 126 |
-
**Project Created**: `{coding_result.project_path}`
|
| 127 |
-
**Features Implemented**: {len(coding_result.implemented_features)} components
|
| 128 |
-
**Status**: Ready to run!
|
| 129 |
-
|
| 130 |
-
Your Gradio application has been created with:
|
| 131 |
-
- Proper `uv` project structure
|
| 132 |
-
- All required dependencies installed
|
| 133 |
-
- Complete README.md with usage instructions
|
| 134 |
-
- Functional app.py with all requested features
|
| 135 |
-
|
| 136 |
-
You can view and test your app in the **Preview** tab, or check the code in \
|
| 137 |
-
the **Code** tab.
|
| 138 |
-
|
| 139 |
-
To run locally: `cd {coding_result.project_path} && uv run python app.py`"""
|
| 140 |
-
|
| 141 |
-
if coding_result.remaining_tasks:
|
| 142 |
-
implementation_response += f"\n\n**Remaining Tasks**: \
|
| 143 |
-
{chr(10).join([f'โข {task}' for task in coding_result.remaining_tasks])}"
|
| 144 |
-
|
| 145 |
-
else:
|
| 146 |
-
implementation_response = f"""โ ๏ธ **Implementation Partially Complete**
|
| 147 |
-
|
| 148 |
-
**Project Path**: `{coding_result.project_path}`
|
| 149 |
-
**Issues Encountered**: {len(coding_result.error_messages)} errors
|
| 150 |
-
|
| 151 |
-
**Error Messages**:
|
| 152 |
-
{chr(10).join([f'โข {error}' for error in coding_result.error_messages])}
|
| 153 |
-
|
| 154 |
-
**Remaining Tasks**:
|
| 155 |
-
{chr(10).join([f'โข {task}' for task in coding_result.remaining_tasks])}
|
| 156 |
-
|
| 157 |
-
The project structure has been set up, but some features may need manual completion."""
|
| 158 |
-
|
| 159 |
-
history.append({"role": "assistant", "content": implementation_response})
|
| 160 |
|
| 161 |
except Exception as e:
|
| 162 |
-
error_response = (
|
| 163 |
-
f"I encountered an error during planning and implementation: {str(e)}. "
|
| 164 |
-
"Let me try a simpler approach..."
|
| 165 |
-
)
|
| 166 |
history.append({"role": "assistant", "content": error_response})
|
| 167 |
|
| 168 |
return history, ""
|
|
@@ -328,13 +198,13 @@ complete applications*"
|
|
| 328 |
# Event handlers for chat - updated to use the combined planning and
|
| 329 |
# coding function
|
| 330 |
msg_input.submit(
|
| 331 |
-
|
| 332 |
inputs=[msg_input, chatbot],
|
| 333 |
outputs=[chatbot, msg_input],
|
| 334 |
)
|
| 335 |
|
| 336 |
send_btn.click(
|
| 337 |
-
|
| 338 |
inputs=[msg_input, chatbot],
|
| 339 |
outputs=[chatbot, msg_input],
|
| 340 |
)
|
|
|
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
|
| 7 |
+
from manager_agent import GradioManagerAgent
|
|
|
|
| 8 |
from settings import settings
|
| 9 |
from utils import load_file
|
| 10 |
|
| 11 |
gr.NO_RELOAD = False
|
| 12 |
|
| 13 |
+
|
| 14 |
+
def generate_ai_response(message, history):
|
| 15 |
+
"""Generate AI response using the manager agent for planning and \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
coding agent for implementation."""
|
| 17 |
|
| 18 |
+
history.append({"role": "user", "content": message})
|
| 19 |
+
manager_agent_instance = GradioManagerAgent()
|
| 20 |
|
| 21 |
+
if manager_agent_instance is None:
|
| 22 |
# Fallback to mock response if planning agent fails to initialize
|
| 23 |
response = (
|
| 24 |
+
"Sorry, the manager agent is not available. "
|
| 25 |
"Please check your API_KEY environment variable."
|
| 26 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
history.append({"role": "assistant", "content": response})
|
| 28 |
return history, ""
|
| 29 |
|
| 30 |
try:
|
| 31 |
+
manager_result = manager_agent_instance(message)
|
| 32 |
+
history.append({"role": "assistant", "content": manager_result})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
except Exception as e:
|
| 35 |
+
error_response = f"I encountered an error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
| 36 |
history.append({"role": "assistant", "content": error_response})
|
| 37 |
|
| 38 |
return history, ""
|
|
|
|
| 198 |
# Event handlers for chat - updated to use the combined planning and
|
| 199 |
# coding function
|
| 200 |
msg_input.submit(
|
| 201 |
+
generate_ai_response,
|
| 202 |
inputs=[msg_input, chatbot],
|
| 203 |
outputs=[chatbot, msg_input],
|
| 204 |
)
|
| 205 |
|
| 206 |
send_btn.click(
|
| 207 |
+
generate_ai_response,
|
| 208 |
inputs=[msg_input, chatbot],
|
| 209 |
outputs=[chatbot, msg_input],
|
| 210 |
)
|
coding_agent.py
CHANGED
|
@@ -16,11 +16,9 @@ from dataclasses import dataclass
|
|
| 16 |
from pathlib import Path
|
| 17 |
|
| 18 |
from mcp import StdioServerParameters
|
| 19 |
-
from smolagents import LiteLLMModel, MCPClient, ToolCallingAgent
|
| 20 |
|
| 21 |
-
from planning_agent import PlanningResult
|
| 22 |
from settings import settings
|
| 23 |
-
from utils import load_file
|
| 24 |
|
| 25 |
|
| 26 |
@dataclass
|
|
@@ -35,6 +33,72 @@ class CodingResult:
|
|
| 35 |
final_app_code: str
|
| 36 |
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
class GradioCodingAgent:
|
| 39 |
"""
|
| 40 |
A specialized CodeAgent for implementing Gradio applications.
|
|
@@ -61,6 +125,22 @@ class GradioCodingAgent:
|
|
| 61 |
verbosity_level: Level of verbosity for agent output (uses settings if None)
|
| 62 |
max_steps: Maximum number of coding steps (uses settings if None)
|
| 63 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
# Use settings as defaults, but allow override
|
| 65 |
self.model_id = model_id or settings.code_model_id
|
| 66 |
self.api_base_url = api_base_url or settings.api_base_url
|
|
@@ -86,14 +166,19 @@ class GradioCodingAgent:
|
|
| 86 |
|
| 87 |
self.mcp_client = MCPClient(server_parameters)
|
| 88 |
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# Initialize the CodeAgent with tools for file operations and project setup
|
| 92 |
self.agent = ToolCallingAgent(
|
| 93 |
model=self.model,
|
| 94 |
-
tools=
|
| 95 |
verbosity_level=verbosity_level,
|
| 96 |
max_steps=max_steps,
|
|
|
|
|
|
|
| 97 |
)
|
| 98 |
|
| 99 |
self.sandbox_path = Path("sandbox")
|
|
@@ -121,255 +206,79 @@ class GradioCodingAgent:
|
|
| 121 |
except Exception:
|
| 122 |
pass
|
| 123 |
|
| 124 |
-
def
|
| 125 |
-
"""
|
| 126 |
-
Set up the initial project structure using uv.
|
| 127 |
-
|
| 128 |
-
Args:
|
| 129 |
-
project_name: Name of the project
|
| 130 |
-
|
| 131 |
-
Returns:
|
| 132 |
-
bool: True if setup was successful
|
| 133 |
-
"""
|
| 134 |
-
try:
|
| 135 |
-
# Ensure sandbox directory exists and is clean
|
| 136 |
-
if self.sandbox_path.exists():
|
| 137 |
-
shutil.rmtree(self.sandbox_path)
|
| 138 |
-
self.sandbox_path.mkdir(exist_ok=True)
|
| 139 |
-
|
| 140 |
-
# Change to sandbox directory
|
| 141 |
-
os.chdir(self.sandbox_path)
|
| 142 |
-
|
| 143 |
-
# Initialize with uv
|
| 144 |
-
subprocess.run(
|
| 145 |
-
["uv", "init", project_name],
|
| 146 |
-
capture_output=True,
|
| 147 |
-
text=True,
|
| 148 |
-
check=True,
|
| 149 |
-
)
|
| 150 |
-
|
| 151 |
-
# Change to project directory
|
| 152 |
-
os.chdir(project_name)
|
| 153 |
-
|
| 154 |
-
# Add gradio as a dependency
|
| 155 |
-
subprocess.run(
|
| 156 |
-
["uv", "add", "gradio"],
|
| 157 |
-
capture_output=True,
|
| 158 |
-
text=True,
|
| 159 |
-
check=True,
|
| 160 |
-
)
|
| 161 |
-
|
| 162 |
-
# Change back to workspace root
|
| 163 |
-
os.chdir("../..")
|
| 164 |
-
|
| 165 |
-
return True
|
| 166 |
-
|
| 167 |
-
except subprocess.CalledProcessError as e:
|
| 168 |
-
print(f"Error setting up project structure: {e}")
|
| 169 |
-
print(f"stdout: {e.stdout}")
|
| 170 |
-
print(f"stderr: {e.stderr}")
|
| 171 |
-
return False
|
| 172 |
-
except Exception as e:
|
| 173 |
-
print(f"Unexpected error setting up project: {e}")
|
| 174 |
-
return False
|
| 175 |
-
|
| 176 |
-
def implement_application(self, planning_result: PlanningResult) -> CodingResult:
|
| 177 |
"""
|
| 178 |
-
|
| 179 |
|
| 180 |
Args:
|
| 181 |
-
|
|
|
|
| 182 |
|
| 183 |
Returns:
|
| 184 |
-
|
| 185 |
"""
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
-
|
| 219 |
-
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
-
|
| 233 |
-
-
|
| 234 |
-
-
|
| 235 |
-
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
GRADIO COMPONENTS TO IMPLEMENT:
|
| 244 |
-
{gradio_components}
|
| 245 |
-
|
| 246 |
-
DEPENDENCIES TO ADD:
|
| 247 |
-
{dependencies}
|
| 248 |
-
|
| 249 |
-
ACTION PLAN TO FOLLOW:
|
| 250 |
-
{planning_result.action_plan}
|
| 251 |
-
|
| 252 |
-
IMPLEMENTATION PLAN TO FOLLOW:
|
| 253 |
-
{planning_result.implementation_plan}
|
| 254 |
-
|
| 255 |
-
TESTING PLAN TO CONSIDER:
|
| 256 |
-
{planning_result.testing_plan}
|
| 257 |
-
|
| 258 |
-
You must implement the complete application and ensure it works properly.
|
| 259 |
-
Use subprocess to run `uv add` commands to install any needed packages.
|
| 260 |
-
Create all necessary files and make sure the application runs without errors.
|
| 261 |
-
|
| 262 |
-
Please implement the complete Gradio application based on the planning result.
|
| 263 |
-
|
| 264 |
-
The application should be fully functional and implement all the features
|
| 265 |
-
described in the plans.
|
| 266 |
-
|
| 267 |
-
Working directory: {project_path}
|
| 268 |
-
|
| 269 |
-
Please:
|
| 270 |
-
1. Start by creating/updating the README.md file with project description
|
| 271 |
-
and usage instructions
|
| 272 |
-
2. Add any additional dependencies needed using `uv add package_name`
|
| 273 |
-
3. Create the complete app.py file with all the Gradio components and
|
| 274 |
-
functionality
|
| 275 |
-
4. Test the implementation to ensure it works
|
| 276 |
-
5. Fix any issues that arise during testing
|
| 277 |
-
|
| 278 |
-
Make sure the final application is complete and functional.
|
| 279 |
-
/no_think
|
| 280 |
-
"""
|
| 281 |
|
| 282 |
try:
|
| 283 |
-
|
| 284 |
-
self.agent.run(
|
| 285 |
-
user_prompt,
|
| 286 |
-
additional_args={
|
| 287 |
-
"current_app_py": load_file(str(Path(project_path) / "app.py")),
|
| 288 |
-
},
|
| 289 |
-
)
|
| 290 |
-
|
| 291 |
-
# Check if the implementation was successful
|
| 292 |
-
app_file = Path(project_path) / "app.py"
|
| 293 |
-
if app_file.exists():
|
| 294 |
-
with open(app_file, encoding="utf-8") as f:
|
| 295 |
-
final_app_code = f.read()
|
| 296 |
-
|
| 297 |
-
return CodingResult(
|
| 298 |
-
success=True,
|
| 299 |
-
project_path=project_path,
|
| 300 |
-
implemented_features=planning_result.gradio_components,
|
| 301 |
-
remaining_tasks=[],
|
| 302 |
-
error_messages=[],
|
| 303 |
-
final_app_code=final_app_code,
|
| 304 |
-
)
|
| 305 |
-
else:
|
| 306 |
-
return CodingResult(
|
| 307 |
-
success=False,
|
| 308 |
-
project_path=project_path,
|
| 309 |
-
implemented_features=[],
|
| 310 |
-
remaining_tasks=["Main app.py file was not created"],
|
| 311 |
-
error_messages=["Implementation failed to create app.py"],
|
| 312 |
-
final_app_code="",
|
| 313 |
-
)
|
| 314 |
|
| 315 |
except Exception as e:
|
| 316 |
-
return
|
| 317 |
-
success=False,
|
| 318 |
-
project_path=project_path,
|
| 319 |
-
implemented_features=[],
|
| 320 |
-
remaining_tasks=["Complete implementation"],
|
| 321 |
-
error_messages=[f"Coding agent error: {str(e)}"],
|
| 322 |
-
final_app_code="",
|
| 323 |
-
)
|
| 324 |
-
|
| 325 |
-
def iterative_implementation(
|
| 326 |
-
self, planning_result: PlanningResult, max_iterations: int = 3
|
| 327 |
-
) -> CodingResult:
|
| 328 |
-
"""
|
| 329 |
-
Implement the application with iterative refinement.
|
| 330 |
-
|
| 331 |
-
Args:
|
| 332 |
-
planning_result: The planning result from the planning agent
|
| 333 |
-
max_iterations: Maximum number of implementation iterations
|
| 334 |
-
|
| 335 |
-
Returns:
|
| 336 |
-
CodingResult containing final implementation details
|
| 337 |
-
"""
|
| 338 |
-
last_result = None
|
| 339 |
-
|
| 340 |
-
for iteration in range(max_iterations):
|
| 341 |
-
print(f"๐ Implementation iteration {iteration + 1}/{max_iterations}")
|
| 342 |
-
|
| 343 |
-
# Implement or refine the application
|
| 344 |
-
result = self.implement_application(planning_result)
|
| 345 |
-
|
| 346 |
-
if result.success and not result.remaining_tasks:
|
| 347 |
-
print(f"โ
Implementation successful in {iteration + 1} iteration(s)")
|
| 348 |
-
return result
|
| 349 |
-
|
| 350 |
-
last_result = result
|
| 351 |
-
|
| 352 |
-
if iteration < max_iterations - 1:
|
| 353 |
-
print(f"โ ๏ธ Iteration {iteration + 1} incomplete. Refining...")
|
| 354 |
-
# For subsequent iterations, we could modify the prompt to focus
|
| 355 |
-
# on remaining tasks. This is a simplified version - in practice,
|
| 356 |
-
# you'd want more sophisticated iteration logic
|
| 357 |
-
|
| 358 |
-
print(f"โ ๏ธ Implementation completed with {max_iterations} iterations")
|
| 359 |
-
return last_result or CodingResult(
|
| 360 |
-
success=False,
|
| 361 |
-
project_path="",
|
| 362 |
-
implemented_features=[],
|
| 363 |
-
remaining_tasks=["Complete implementation failed"],
|
| 364 |
-
error_messages=["Maximum iterations reached without completion"],
|
| 365 |
-
final_app_code="",
|
| 366 |
-
)
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
# Convenience function for the main app
|
| 370 |
-
def create_gradio_coding_agent() -> GradioCodingAgent:
|
| 371 |
-
"""Create a GradioCodingAgent with default settings."""
|
| 372 |
-
return GradioCodingAgent()
|
| 373 |
|
| 374 |
|
| 375 |
if __name__ == "__main__":
|
|
@@ -378,17 +287,11 @@ if __name__ == "__main__":
|
|
| 378 |
|
| 379 |
# Test with a simple planning result
|
| 380 |
planning_agent = GradioPlanningAgent()
|
| 381 |
-
planning_result = planning_agent
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
print("Coding Result:")
|
| 390 |
-
print(f"Success: {coding_result.success}")
|
| 391 |
-
print(f"Project Path: {coding_result.project_path}")
|
| 392 |
-
print(f"Implemented Features: {coding_result.implemented_features}")
|
| 393 |
-
print(f"Remaining Tasks: {coding_result.remaining_tasks}")
|
| 394 |
-
print(f"Error Messages: {coding_result.error_messages}")
|
|
|
|
| 16 |
from pathlib import Path
|
| 17 |
|
| 18 |
from mcp import StdioServerParameters
|
| 19 |
+
from smolagents import LiteLLMModel, MCPClient, ToolCallingAgent, tool
|
| 20 |
|
|
|
|
| 21 |
from settings import settings
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
@dataclass
|
|
|
|
| 33 |
final_app_code: str
|
| 34 |
|
| 35 |
|
| 36 |
+
@tool
|
| 37 |
+
def setup_project_structure(project_name: str = "gradio_app") -> str:
|
| 38 |
+
"""
|
| 39 |
+
Set up the initial project structure using uv.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
project_name: Name of the project
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
Status message indicating success or failure
|
| 46 |
+
"""
|
| 47 |
+
try:
|
| 48 |
+
sandbox_path = Path("sandbox")
|
| 49 |
+
|
| 50 |
+
# Ensure sandbox directory exists and is clean
|
| 51 |
+
if sandbox_path.exists():
|
| 52 |
+
shutil.rmtree(sandbox_path)
|
| 53 |
+
sandbox_path.mkdir(exist_ok=True)
|
| 54 |
+
|
| 55 |
+
# Store original working directory
|
| 56 |
+
original_cwd = os.getcwd()
|
| 57 |
+
|
| 58 |
+
# Change to sandbox directory
|
| 59 |
+
os.chdir(sandbox_path)
|
| 60 |
+
|
| 61 |
+
# Initialize with uv
|
| 62 |
+
subprocess.run(
|
| 63 |
+
["uv", "init", project_name],
|
| 64 |
+
capture_output=True,
|
| 65 |
+
text=True,
|
| 66 |
+
check=True,
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# Change to project directory
|
| 70 |
+
os.chdir(project_name)
|
| 71 |
+
|
| 72 |
+
# Add gradio as a dependency
|
| 73 |
+
subprocess.run(
|
| 74 |
+
["uv", "add", "gradio"],
|
| 75 |
+
capture_output=True,
|
| 76 |
+
text=True,
|
| 77 |
+
check=True,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# Change back to workspace root
|
| 81 |
+
os.chdir(original_cwd)
|
| 82 |
+
|
| 83 |
+
return f"Successfully set up project structure for {project_name} \
|
| 84 |
+
in sandbox/{project_name}"
|
| 85 |
+
|
| 86 |
+
except subprocess.CalledProcessError as e:
|
| 87 |
+
# Restore working directory on error
|
| 88 |
+
try:
|
| 89 |
+
os.chdir(original_cwd)
|
| 90 |
+
except NameError:
|
| 91 |
+
pass
|
| 92 |
+
return f"Error setting up project structure: {e.stderr}"
|
| 93 |
+
except Exception as e:
|
| 94 |
+
# Restore working directory on error
|
| 95 |
+
try:
|
| 96 |
+
os.chdir(original_cwd)
|
| 97 |
+
except NameError:
|
| 98 |
+
pass
|
| 99 |
+
return f"Unexpected error setting up project: {str(e)}"
|
| 100 |
+
|
| 101 |
+
|
| 102 |
class GradioCodingAgent:
|
| 103 |
"""
|
| 104 |
A specialized CodeAgent for implementing Gradio applications.
|
|
|
|
| 125 |
verbosity_level: Level of verbosity for agent output (uses settings if None)
|
| 126 |
max_steps: Maximum number of coding steps (uses settings if None)
|
| 127 |
"""
|
| 128 |
+
self.name = "coding_agent"
|
| 129 |
+
self.description = """Expert Python developer specializing in Gradio \
|
| 130 |
+
application implementation.
|
| 131 |
+
|
| 132 |
+
This agent takes planning results and creates complete, working Gradio \
|
| 133 |
+
applications with:
|
| 134 |
+
- Proper project structure using uv for package management
|
| 135 |
+
- Complete implementation of all planned features
|
| 136 |
+
- Working app.py file with functional Gradio interface
|
| 137 |
+
- Proper dependency management and documentation
|
| 138 |
+
- Error handling and iterative development approach
|
| 139 |
+
|
| 140 |
+
The agent only exits when the full plan is implemented successfully.
|
| 141 |
+
Handles complex applications and follows best practices for Python/Gradio \
|
| 142 |
+
development."""
|
| 143 |
+
|
| 144 |
# Use settings as defaults, but allow override
|
| 145 |
self.model_id = model_id or settings.code_model_id
|
| 146 |
self.api_base_url = api_base_url or settings.api_base_url
|
|
|
|
| 166 |
|
| 167 |
self.mcp_client = MCPClient(server_parameters)
|
| 168 |
|
| 169 |
+
# Get MCP tools and add our custom tools
|
| 170 |
+
mcp_tools = self.mcp_client.get_tools()
|
| 171 |
+
custom_tools = [setup_project_structure]
|
| 172 |
+
all_tools = list(mcp_tools) + custom_tools
|
| 173 |
|
| 174 |
# Initialize the CodeAgent with tools for file operations and project setup
|
| 175 |
self.agent = ToolCallingAgent(
|
| 176 |
model=self.model,
|
| 177 |
+
tools=all_tools,
|
| 178 |
verbosity_level=verbosity_level,
|
| 179 |
max_steps=max_steps,
|
| 180 |
+
name=self.name,
|
| 181 |
+
description=self.description,
|
| 182 |
)
|
| 183 |
|
| 184 |
self.sandbox_path = Path("sandbox")
|
|
|
|
| 206 |
except Exception:
|
| 207 |
pass
|
| 208 |
|
| 209 |
+
def __call__(self, task: str, **kwargs) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
"""
|
| 211 |
+
Handle coding tasks as a managed agent.
|
| 212 |
|
| 213 |
Args:
|
| 214 |
+
task: The planning result or task description
|
| 215 |
+
**kwargs: Additional keyword arguments (ignored)
|
| 216 |
|
| 217 |
Returns:
|
| 218 |
+
String response containing the formatted coding result
|
| 219 |
"""
|
| 220 |
+
full_prompt = f"""You are an expert Python developer specializing in \
|
| 221 |
+
Gradio application implementation.
|
| 222 |
+
|
| 223 |
+
Your mission is to implement a complete, working Gradio application based \
|
| 224 |
+
on the following architectural plan:
|
| 225 |
+
|
| 226 |
+
```
|
| 227 |
+
{task}
|
| 228 |
+
```
|
| 229 |
+
|
| 230 |
+
## Implementation Guidelines:
|
| 231 |
+
|
| 232 |
+
### 1. Project Setup
|
| 233 |
+
- ALWAYS start by calling setup_project_structure() to create the \
|
| 234 |
+
proper project structure
|
| 235 |
+
- Use uv for package management (already configured)
|
| 236 |
+
- The project will be created in ./sandbox/ directory
|
| 237 |
+
|
| 238 |
+
### 2. Implementation Requirements
|
| 239 |
+
- Create a complete, functional Gradio application
|
| 240 |
+
- Implement ALL features described in the plan
|
| 241 |
+
- Write clean, well-documented Python code
|
| 242 |
+
- Follow best practices for Gradio development
|
| 243 |
+
- Ensure proper error handling and user feedback
|
| 244 |
+
|
| 245 |
+
### 3. File Structure
|
| 246 |
+
- Create app.py as the main application file
|
| 247 |
+
- Add any necessary helper modules or utilities
|
| 248 |
+
- Include proper imports and dependencies
|
| 249 |
+
- Document code with comments and docstrings
|
| 250 |
+
|
| 251 |
+
### 4. Gradio Interface Guidelines
|
| 252 |
+
- Create an intuitive and user-friendly interface
|
| 253 |
+
- Use appropriate Gradio components for each feature
|
| 254 |
+
- Implement proper input validation and error handling
|
| 255 |
+
- Ensure responsive design and good UX practices
|
| 256 |
+
- Add helpful descriptions and examples where needed
|
| 257 |
+
|
| 258 |
+
### 5. Quality Standards
|
| 259 |
+
- Test your implementation thoroughly
|
| 260 |
+
- Handle edge cases and error scenarios
|
| 261 |
+
- Provide clear feedback to users
|
| 262 |
+
- Ensure the app runs without errors
|
| 263 |
+
- Follow Python coding standards (PEP 8)
|
| 264 |
+
|
| 265 |
+
### 6. Completion Criteria
|
| 266 |
+
- All planned features are fully implemented
|
| 267 |
+
- The application runs successfully with `python app.py`
|
| 268 |
+
- Users can interact with all described functionality
|
| 269 |
+
- Code is clean, documented, and maintainable
|
| 270 |
+
|
| 271 |
+
Remember: You can ONLY access files in the ./sandbox directory.
|
| 272 |
+
Do not attempt to access files outside this sandbox environment.
|
| 273 |
+
|
| 274 |
+
Start by setting up the project structure, then implement each feature \
|
| 275 |
+
systematically until the complete application is ready."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
try:
|
| 278 |
+
return self.agent.run(full_prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
except Exception as e:
|
| 281 |
+
return f"โ Implementation failed: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
|
| 283 |
|
| 284 |
if __name__ == "__main__":
|
|
|
|
| 287 |
|
| 288 |
# Test with a simple planning result
|
| 289 |
planning_agent = GradioPlanningAgent()
|
| 290 |
+
planning_result = planning_agent("Create a simple calculator app")
|
| 291 |
+
|
| 292 |
+
# Create coding agent and implement using managed agent approach
|
| 293 |
+
coding_agent = GradioCodingAgent()
|
| 294 |
+
coding_result_str = coding_agent(planning_result)
|
| 295 |
+
|
| 296 |
+
print("=== CODING RESULT ===")
|
| 297 |
+
print(coding_result_str)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
manager_agent.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Smolagents ToolCallingAgent for managing a multi-agent development workflow.
|
| 3 |
+
|
| 4 |
+
This module provides a manager agent that orchestrates:
|
| 5 |
+
- Planning Agent: Creates comprehensive plans for Gradio applications
|
| 6 |
+
- Coding Agent: Implements the planned applications with proper project structure
|
| 7 |
+
- Testing Agent: Tests and validates the implemented applications
|
| 8 |
+
|
| 9 |
+
The manager follows this workflow:
|
| 10 |
+
1. Receives user prompt
|
| 11 |
+
2. Hands prompt to Planning Agent โ gets PlanningResult
|
| 12 |
+
3. Hands planning result to Coding Agent โ gets CodingResult
|
| 13 |
+
4. Hands coding result to Testing Agent โ gets TestingResult
|
| 14 |
+
5. If testing fails, hands errors back to Coding Agent for fixes
|
| 15 |
+
6. Continues until testing passes or max iterations reached
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from smolagents import CodeAgent, LiteLLMModel
|
| 19 |
+
|
| 20 |
+
from coding_agent import GradioCodingAgent
|
| 21 |
+
from planning_agent import GradioPlanningAgent
|
| 22 |
+
from settings import settings
|
| 23 |
+
from testing_agent import GradioTestingAgent
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class GradioManagerAgent:
|
| 27 |
+
"""
|
| 28 |
+
A manager agent that orchestrates the planning, coding, and testing workflow.
|
| 29 |
+
|
| 30 |
+
This agent coordinates the entire development process from initial planning
|
| 31 |
+
through implementation to final testing and validation.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
def __init__(
|
| 35 |
+
self,
|
| 36 |
+
model_id: str | None = None,
|
| 37 |
+
api_base_url: str | None = None,
|
| 38 |
+
api_key: str | None = None,
|
| 39 |
+
verbosity_level: int | None = None,
|
| 40 |
+
max_steps: int | None = None,
|
| 41 |
+
max_iterations: int = 3,
|
| 42 |
+
):
|
| 43 |
+
"""
|
| 44 |
+
Initialize the Gradio Manager Agent.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
model_id: Model ID to use for management (uses settings if None)
|
| 48 |
+
api_base_url: API base URL (uses settings if None)
|
| 49 |
+
api_key: API key (uses settings if None)
|
| 50 |
+
verbosity_level: Level of verbosity for agent output (uses settings if None)
|
| 51 |
+
max_steps: Maximum number of management steps (uses settings if None)
|
| 52 |
+
max_iterations: Maximum number of coding/testing iterations
|
| 53 |
+
"""
|
| 54 |
+
self.name = "manager_agent"
|
| 55 |
+
self.description = """Expert development manager coordinating multi-agent \
|
| 56 |
+
Gradio application development.
|
| 57 |
+
|
| 58 |
+
This agent orchestrates a complete development workflow by managing:
|
| 59 |
+
- Planning Agent: Creates comprehensive application plans
|
| 60 |
+
- Coding Agent: Implements planned applications with proper structure
|
| 61 |
+
- Testing Agent: Validates and tests implemented applications
|
| 62 |
+
|
| 63 |
+
Coordinates iterative development cycles until applications are fully working \
|
| 64 |
+
and tested.
|
| 65 |
+
Provides comprehensive workflow management and detailed progress reporting."""
|
| 66 |
+
|
| 67 |
+
# Use settings as defaults, but allow override
|
| 68 |
+
self.model_id = model_id or settings.manager_model_id
|
| 69 |
+
self.api_base_url = api_base_url or settings.api_base_url
|
| 70 |
+
self.api_key = api_key or settings.api_key
|
| 71 |
+
verbosity_level = verbosity_level or settings.manager_verbosity
|
| 72 |
+
max_steps = max_steps or settings.max_manager_steps
|
| 73 |
+
self.max_iterations = max_iterations
|
| 74 |
+
|
| 75 |
+
# Initialize the language model
|
| 76 |
+
self.model = LiteLLMModel(
|
| 77 |
+
model_id=self.model_id,
|
| 78 |
+
api_base=self.api_base_url,
|
| 79 |
+
api_key=self.api_key,
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# Create managed agent instances
|
| 83 |
+
self.planning_agent = GradioPlanningAgent()
|
| 84 |
+
self.coding_agent = GradioCodingAgent()
|
| 85 |
+
self.testing_agent = GradioTestingAgent()
|
| 86 |
+
|
| 87 |
+
# Initialize the main ToolCallingAgent with the managed agents
|
| 88 |
+
self.agent = CodeAgent(
|
| 89 |
+
model=self.model,
|
| 90 |
+
tools=[], # No tools needed, only managed agents
|
| 91 |
+
managed_agents=[
|
| 92 |
+
self.planning_agent,
|
| 93 |
+
self.coding_agent,
|
| 94 |
+
self.testing_agent,
|
| 95 |
+
],
|
| 96 |
+
verbosity_level=verbosity_level,
|
| 97 |
+
max_steps=max_steps,
|
| 98 |
+
name=self.name,
|
| 99 |
+
description=self.description,
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
def __call__(self, task: str, **kwargs) -> str:
|
| 103 |
+
"""
|
| 104 |
+
Handle development management tasks as a managed agent.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
task: The user's description of the application to build
|
| 108 |
+
**kwargs: Additional keyword arguments (ignored)
|
| 109 |
+
|
| 110 |
+
Returns:
|
| 111 |
+
String response containing the formatted workflow result
|
| 112 |
+
"""
|
| 113 |
+
try:
|
| 114 |
+
# Run the development workflow
|
| 115 |
+
result = self.develop_application(task)
|
| 116 |
+
|
| 117 |
+
# Format the result for managed agent workflow
|
| 118 |
+
return self.format_result_as_markdown(result)
|
| 119 |
+
|
| 120 |
+
except Exception as e:
|
| 121 |
+
return f"โ Development workflow failed: {str(e)}"
|
| 122 |
+
|
| 123 |
+
def develop_application(self, prompt: str) -> str:
|
| 124 |
+
"""
|
| 125 |
+
Manage the full development workflow from planning to testing.
|
| 126 |
+
|
| 127 |
+
Args:
|
| 128 |
+
prompt: User's description of the application to build
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
String containing the complete workflow results
|
| 132 |
+
"""
|
| 133 |
+
try:
|
| 134 |
+
# Create comprehensive task for the manager workflow
|
| 135 |
+
manager_task = f"""You are a development manager coordinating a \
|
| 136 |
+
team of specialists to build a Gradio application.
|
| 137 |
+
|
| 138 |
+
The user wants: {prompt}
|
| 139 |
+
|
| 140 |
+
Please coordinate the following workflow:
|
| 141 |
+
|
| 142 |
+
1. **PLANNING PHASE**: Call the planning_agent to create a comprehensive \
|
| 143 |
+
plan for this application
|
| 144 |
+
2. **IMPLEMENTATION PHASE**: Call the coding_agent with the planning results \
|
| 145 |
+
to implement the application
|
| 146 |
+
3. **TESTING PHASE**: Call the testing_agent with the implementation results \
|
| 147 |
+
to test the application
|
| 148 |
+
4. **ITERATION**: If testing fails, call the coding_agent again with the \
|
| 149 |
+
error details to fix issues
|
| 150 |
+
5. **COMPLETION**: Continue until testing passes or maximum iterations reached
|
| 151 |
+
|
| 152 |
+
Start by calling the planning_agent with the user's request."""
|
| 153 |
+
|
| 154 |
+
# Run the coordinated workflow
|
| 155 |
+
result = self.agent.run(manager_task)
|
| 156 |
+
|
| 157 |
+
# Return successful result with agent's response
|
| 158 |
+
return str(result)
|
| 159 |
+
|
| 160 |
+
except Exception as e:
|
| 161 |
+
return f"Manager workflow failed: {str(e)}"
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
if __name__ == "__main__":
|
| 165 |
+
# Example usage
|
| 166 |
+
manager = GradioManagerAgent()
|
| 167 |
+
|
| 168 |
+
# Test the manager workflow using managed agent approach
|
| 169 |
+
result = manager("Create a simple calculator with basic arithmetic operations")
|
| 170 |
+
|
| 171 |
+
print("=== MANAGER RESULT ===")
|
| 172 |
+
print(result)
|
planning_agent.py
CHANGED
|
@@ -7,25 +7,11 @@ This module provides a specialized planning agent that can:
|
|
| 7 |
- Return an action, implementation and testing plan
|
| 8 |
"""
|
| 9 |
|
| 10 |
-
from
|
| 11 |
-
|
| 12 |
-
from smolagents import LiteLLMModel
|
| 13 |
|
| 14 |
from settings import settings
|
| 15 |
|
| 16 |
|
| 17 |
-
@dataclass
|
| 18 |
-
class PlanningResult:
|
| 19 |
-
"""Result of the planning agent containing structured plans."""
|
| 20 |
-
|
| 21 |
-
action_plan: str
|
| 22 |
-
implementation_plan: str
|
| 23 |
-
testing_plan: str
|
| 24 |
-
gradio_components: list[str]
|
| 25 |
-
estimated_complexity: str
|
| 26 |
-
dependencies: list[str]
|
| 27 |
-
|
| 28 |
-
|
| 29 |
class GradioPlanningAgent:
|
| 30 |
"""
|
| 31 |
A specialized CodeAgent for planning Gradio applications.
|
|
@@ -50,6 +36,24 @@ class GradioPlanningAgent:
|
|
| 50 |
api_key: API key (uses settings if None)
|
| 51 |
verbosity_level: Level of verbosity for agent output (uses settings if None)
|
| 52 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
# Use settings as defaults, but allow override
|
| 54 |
self.model_id = model_id or settings.model_id
|
| 55 |
self.api_base_url = api_base_url or settings.api_base_url
|
|
@@ -63,7 +67,15 @@ class GradioPlanningAgent:
|
|
| 63 |
api_key=self.api_key,
|
| 64 |
)
|
| 65 |
|
| 66 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
application developer. Your role is to create comprehensive, detailed plans \
|
| 68 |
for building Gradio applications based on user requirements.
|
| 69 |
|
|
@@ -110,168 +122,32 @@ Be thorough, practical, and consider real-world constraints. Focus on creating \
|
|
| 110 |
maintainable, user-friendly Gradio applications. Remember: NO CODE IMPLEMENTATION \
|
| 111 |
at this stage - only architectural planning and structural design."""
|
| 112 |
|
| 113 |
-
def
|
| 114 |
"""
|
| 115 |
-
|
| 116 |
|
| 117 |
Args:
|
| 118 |
-
|
|
|
|
| 119 |
|
| 120 |
Returns:
|
| 121 |
-
|
| 122 |
"""
|
|
|
|
| 123 |
|
| 124 |
-
# Enhanced prompt for the agent
|
| 125 |
-
user_prompt = f"""
|
| 126 |
Create a comprehensive plan for building the following Gradio application:
|
| 127 |
|
| 128 |
-
{
|
| 129 |
|
| 130 |
Please provide detailed ACTION, IMPLEMENTATION, and TESTING plans following the \
|
| 131 |
specified format. Consider all aspects of the application including UI/UX, \
|
| 132 |
-
functionality, error handling, and deployment.
|
| 133 |
-
"""
|
| 134 |
-
|
| 135 |
-
messages = [
|
| 136 |
-
{"role": "system", "content": self.planning_prompt},
|
| 137 |
-
{"role": "user", "content": user_prompt},
|
| 138 |
-
]
|
| 139 |
-
response = self.model.generate(messages)
|
| 140 |
-
|
| 141 |
-
# Parse the response into structured result
|
| 142 |
-
return self._parse_planning_response(response.content)
|
| 143 |
-
|
| 144 |
-
def _parse_planning_response(self, response: str) -> PlanningResult:
|
| 145 |
-
"""
|
| 146 |
-
Parse the agent's response into a structured PlanningResult.
|
| 147 |
-
|
| 148 |
-
Args:
|
| 149 |
-
response: Raw response from the planning agent
|
| 150 |
-
|
| 151 |
-
Returns:
|
| 152 |
-
Structured PlanningResult
|
| 153 |
-
"""
|
| 154 |
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
implementation_plan = ""
|
| 158 |
-
testing_plan = ""
|
| 159 |
-
gradio_components = []
|
| 160 |
-
estimated_complexity = "Medium"
|
| 161 |
-
dependencies = ["gradio"]
|
| 162 |
-
|
| 163 |
-
# Parse sections from the response
|
| 164 |
-
sections = self._extract_sections(response)
|
| 165 |
-
|
| 166 |
-
action_plan = sections.get("ACTION PLAN", "")
|
| 167 |
-
implementation_plan = sections.get("IMPLEMENTATION PLAN", "")
|
| 168 |
-
testing_plan = sections.get("TESTING PLAN", "")
|
| 169 |
-
|
| 170 |
-
# Parse gradio components list
|
| 171 |
-
components_text = sections.get("GRADIO COMPONENTS", "")
|
| 172 |
-
if components_text:
|
| 173 |
-
gradio_components = self._extract_list_items(components_text)
|
| 174 |
-
|
| 175 |
-
# Parse complexity
|
| 176 |
-
complexity_text = sections.get("ESTIMATED COMPLEXITY", "")
|
| 177 |
-
if complexity_text:
|
| 178 |
-
estimated_complexity = complexity_text.strip()
|
| 179 |
-
|
| 180 |
-
# Parse dependencies
|
| 181 |
-
deps_text = sections.get("DEPENDENCIES", "")
|
| 182 |
-
if deps_text:
|
| 183 |
-
dependencies = ["gradio"] + self._extract_list_items(deps_text)
|
| 184 |
-
# Remove duplicates while preserving order
|
| 185 |
-
dependencies = list(dict.fromkeys(dependencies))
|
| 186 |
-
|
| 187 |
-
return PlanningResult(
|
| 188 |
-
action_plan=action_plan,
|
| 189 |
-
implementation_plan=implementation_plan,
|
| 190 |
-
testing_plan=testing_plan,
|
| 191 |
-
gradio_components=gradio_components,
|
| 192 |
-
estimated_complexity=estimated_complexity,
|
| 193 |
-
dependencies=dependencies,
|
| 194 |
-
)
|
| 195 |
-
|
| 196 |
-
def _extract_sections(self, text: str) -> dict[str, str]:
|
| 197 |
-
"""Extract sections from markdown-formatted text."""
|
| 198 |
-
sections = {}
|
| 199 |
-
current_section = None
|
| 200 |
-
current_content = []
|
| 201 |
-
|
| 202 |
-
for line in text.split("\n"):
|
| 203 |
-
line = line.strip()
|
| 204 |
-
|
| 205 |
-
# Check if line is a section header
|
| 206 |
-
if line.startswith("## "):
|
| 207 |
-
# Save previous section if exists
|
| 208 |
-
if current_section and current_content:
|
| 209 |
-
sections[current_section] = "\n".join(current_content).strip()
|
| 210 |
-
|
| 211 |
-
# Start new section
|
| 212 |
-
current_section = line[3:].strip()
|
| 213 |
-
current_content = []
|
| 214 |
-
elif current_section:
|
| 215 |
-
current_content.append(line)
|
| 216 |
-
|
| 217 |
-
# Save last section
|
| 218 |
-
if current_section and current_content:
|
| 219 |
-
sections[current_section] = "\n".join(current_content).strip()
|
| 220 |
-
|
| 221 |
-
return sections
|
| 222 |
-
|
| 223 |
-
def _extract_list_items(self, text: str) -> list[str]:
|
| 224 |
-
"""Extract list items from text (handles bullet points, numbered lists, etc.)"""
|
| 225 |
-
items = []
|
| 226 |
-
for line in text.split("\n"):
|
| 227 |
-
line = line.strip()
|
| 228 |
-
if line:
|
| 229 |
-
# Remove common list prefixes
|
| 230 |
-
if line.startswith("- "):
|
| 231 |
-
line = line[2:].strip()
|
| 232 |
-
elif line.startswith("* "):
|
| 233 |
-
line = line[2:].strip()
|
| 234 |
-
elif ". " in line and line.split(".")[0].isdigit():
|
| 235 |
-
line = line.split(".", 1)[1].strip()
|
| 236 |
-
|
| 237 |
-
if line:
|
| 238 |
-
items.append(line)
|
| 239 |
-
|
| 240 |
-
return items
|
| 241 |
-
|
| 242 |
-
def format_plan_as_markdown(self, result: PlanningResult) -> str:
|
| 243 |
-
"""
|
| 244 |
-
Format the planning result as a well-structured markdown document.
|
| 245 |
-
|
| 246 |
-
Args:
|
| 247 |
-
result: PlanningResult to format
|
| 248 |
-
|
| 249 |
-
Returns:
|
| 250 |
-
Markdown-formatted string
|
| 251 |
-
"""
|
| 252 |
-
|
| 253 |
-
markdown = f"""# Gradio Application Plan
|
| 254 |
-
|
| 255 |
-
## ๐ Action Plan
|
| 256 |
-
{result.action_plan}
|
| 257 |
-
|
| 258 |
-
## ๐ง Implementation Plan
|
| 259 |
-
{result.implementation_plan}
|
| 260 |
-
|
| 261 |
-
## ๐งช Testing Plan
|
| 262 |
-
{result.testing_plan}
|
| 263 |
-
|
| 264 |
-
## ๐จ Gradio Components
|
| 265 |
-
{chr(10).join([f"- {component}" for component in result.gradio_components])}
|
| 266 |
-
|
| 267 |
-
## โก Estimated Complexity
|
| 268 |
-
{result.estimated_complexity}
|
| 269 |
-
|
| 270 |
-
## ๐ฆ Dependencies
|
| 271 |
-
{chr(10).join([f"- {dep}" for dep in result.dependencies])}
|
| 272 |
-
"""
|
| 273 |
|
| 274 |
-
|
|
|
|
| 275 |
|
| 276 |
|
| 277 |
# Example usage and testing
|
|
@@ -280,9 +156,9 @@ if __name__ == "__main__":
|
|
| 280 |
agent = GradioPlanningAgent()
|
| 281 |
|
| 282 |
# Test with a simple calculator example
|
| 283 |
-
result = agent
|
| 284 |
"Write a simple calculator app that can perform basic arithmetic operations"
|
| 285 |
)
|
| 286 |
|
| 287 |
print("=== PLANNING RESULT ===")
|
| 288 |
-
print(
|
|
|
|
| 7 |
- Return an action, implementation and testing plan
|
| 8 |
"""
|
| 9 |
|
| 10 |
+
from smolagents import LiteLLMModel, ToolCallingAgent
|
|
|
|
|
|
|
| 11 |
|
| 12 |
from settings import settings
|
| 13 |
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
class GradioPlanningAgent:
|
| 16 |
"""
|
| 17 |
A specialized CodeAgent for planning Gradio applications.
|
|
|
|
| 36 |
api_key: API key (uses settings if None)
|
| 37 |
verbosity_level: Level of verbosity for agent output (uses settings if None)
|
| 38 |
"""
|
| 39 |
+
self.name = "planning_agent"
|
| 40 |
+
self.description = """Expert software architect specializing in Gradio \
|
| 41 |
+
application planning.
|
| 42 |
+
|
| 43 |
+
This agent creates comprehensive, detailed plans for building Gradio applications \
|
| 44 |
+
based on user requirements.
|
| 45 |
+
It provides:
|
| 46 |
+
- High-level action plans breaking down the implementation steps
|
| 47 |
+
- Detailed technical implementation plans using Python and Gradio
|
| 48 |
+
- Comprehensive testing strategies
|
| 49 |
+
- Analysis of required Gradio components and dependencies
|
| 50 |
+
- Complexity estimation for the project
|
| 51 |
+
|
| 52 |
+
The agent focuses purely on planning and architecture - no actual code \
|
| 53 |
+
implementation.
|
| 54 |
+
Perfect for getting structured, well-thought-out plans before development \
|
| 55 |
+
begins."""
|
| 56 |
+
|
| 57 |
# Use settings as defaults, but allow override
|
| 58 |
self.model_id = model_id or settings.model_id
|
| 59 |
self.api_base_url = api_base_url or settings.api_base_url
|
|
|
|
| 67 |
api_key=self.api_key,
|
| 68 |
)
|
| 69 |
|
| 70 |
+
self.agent = ToolCallingAgent(
|
| 71 |
+
model=self.model,
|
| 72 |
+
tools=[],
|
| 73 |
+
verbosity_level=verbosity_level,
|
| 74 |
+
name=self.name,
|
| 75 |
+
description=self.description,
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
self.system_prompt = """You are an expert software architect and Gradio \
|
| 79 |
application developer. Your role is to create comprehensive, detailed plans \
|
| 80 |
for building Gradio applications based on user requirements.
|
| 81 |
|
|
|
|
| 122 |
maintainable, user-friendly Gradio applications. Remember: NO CODE IMPLEMENTATION \
|
| 123 |
at this stage - only architectural planning and structural design."""
|
| 124 |
|
| 125 |
+
def __call__(self, task: str, **kwargs) -> str:
|
| 126 |
"""
|
| 127 |
+
Handle planning tasks as a managed agent.
|
| 128 |
|
| 129 |
Args:
|
| 130 |
+
task: The user's description of the application to build
|
| 131 |
+
**kwargs: Additional keyword arguments (ignored)
|
| 132 |
|
| 133 |
Returns:
|
| 134 |
+
String response containing the formatted planning result
|
| 135 |
"""
|
| 136 |
+
full_prompt = f"""{self.system_prompt}
|
| 137 |
|
|
|
|
|
|
|
| 138 |
Create a comprehensive plan for building the following Gradio application:
|
| 139 |
|
| 140 |
+
{task}
|
| 141 |
|
| 142 |
Please provide detailed ACTION, IMPLEMENTATION, and TESTING plans following the \
|
| 143 |
specified format. Consider all aspects of the application including UI/UX, \
|
| 144 |
+
functionality, error handling, and deployment. /no_think"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
+
try:
|
| 147 |
+
return self.agent.run(full_prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
+
except Exception as e:
|
| 150 |
+
return f"โ Planning failed: {str(e)}"
|
| 151 |
|
| 152 |
|
| 153 |
# Example usage and testing
|
|
|
|
| 156 |
agent = GradioPlanningAgent()
|
| 157 |
|
| 158 |
# Test with a simple calculator example
|
| 159 |
+
result = agent(
|
| 160 |
"Write a simple calculator app that can perform basic arithmetic operations"
|
| 161 |
)
|
| 162 |
|
| 163 |
print("=== PLANNING RESULT ===")
|
| 164 |
+
print(result)
|
pyproject.toml
CHANGED
|
@@ -46,4 +46,6 @@ members = [
|
|
| 46 |
"sandbox/gradio_app",
|
| 47 |
"sandbox/sandbox/gradio_app",
|
| 48 |
"test_sandbox/test_project",
|
|
|
|
|
|
|
| 49 |
]
|
|
|
|
| 46 |
"sandbox/gradio_app",
|
| 47 |
"sandbox/sandbox/gradio_app",
|
| 48 |
"test_sandbox/test_project",
|
| 49 |
+
"sandbox/calculator_app",
|
| 50 |
+
"sandbox/gradio_calculator",
|
| 51 |
]
|
settings.py
CHANGED
|
@@ -23,6 +23,11 @@ class Settings:
|
|
| 23 |
self.api_base_url: str | None = os.getenv("API_BASE_URL")
|
| 24 |
self.api_key: str | None = os.getenv("API_KEY")
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# Coding Agent Settings
|
| 27 |
self.code_model_id: str = os.getenv("CODE_MODEL_ID", self.model_id)
|
| 28 |
self.coding_verbosity: int = int(os.getenv("CODING_VERBOSITY", "2"))
|
|
@@ -57,6 +62,14 @@ without a valid API key."
|
|
| 57 |
print(" Set it in your .env file or as an environment variable.")
|
| 58 |
print()
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
if self.planning_verbosity not in [0, 1, 2]:
|
| 61 |
print(
|
| 62 |
f"โ ๏ธ Warning: PLANNING_VERBOSITY={self.planning_verbosity} is not \
|
|
@@ -92,6 +105,17 @@ in valid range [0, 1, 2]"
|
|
| 92 |
|
| 93 |
return config
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
def get_code_model_config(self) -> dict:
|
| 96 |
"""Get model configuration for the coding agent."""
|
| 97 |
config = {"model_id": self.code_model_id, "api_key": self.api_key}
|
|
@@ -111,6 +135,13 @@ in valid range [0, 1, 2]"
|
|
| 111 |
"debug": self.gradio_debug,
|
| 112 |
}
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
def get_planning_config(self) -> dict:
|
| 115 |
"""Get planning agent configuration."""
|
| 116 |
return {
|
|
@@ -147,6 +178,7 @@ in valid range [0, 1, 2]"
|
|
| 147 |
"""String representation of settings (excluding sensitive data)."""
|
| 148 |
return f"""Settings(
|
| 149 |
model_id='{self.model_id}',
|
|
|
|
| 150 |
code_model_id='{self.code_model_id}',
|
| 151 |
test_model_id='{self.test_model_id}',
|
| 152 |
api_key={'***' if self.api_key else 'None'},
|
|
@@ -154,6 +186,8 @@ in valid range [0, 1, 2]"
|
|
| 154 |
gradio_host='{self.gradio_host}',
|
| 155 |
gradio_port={self.gradio_port},
|
| 156 |
gradio_debug={self.gradio_debug},
|
|
|
|
|
|
|
| 157 |
planning_verbosity={self.planning_verbosity},
|
| 158 |
max_planning_steps={self.max_planning_steps},
|
| 159 |
coding_verbosity={self.coding_verbosity},
|
|
@@ -186,12 +220,18 @@ if __name__ == "__main__":
|
|
| 186 |
print("Model Config:")
|
| 187 |
print(settings.get_model_config())
|
| 188 |
print()
|
|
|
|
|
|
|
|
|
|
| 189 |
print("Code Model Config:")
|
| 190 |
print(settings.get_code_model_config())
|
| 191 |
print()
|
| 192 |
print("Gradio Config:")
|
| 193 |
print(settings.get_gradio_config())
|
| 194 |
print()
|
|
|
|
|
|
|
|
|
|
| 195 |
print("Planning Config:")
|
| 196 |
print(settings.get_planning_config())
|
| 197 |
print()
|
|
|
|
| 23 |
self.api_base_url: str | None = os.getenv("API_BASE_URL")
|
| 24 |
self.api_key: str | None = os.getenv("API_KEY")
|
| 25 |
|
| 26 |
+
# Manager Agent Settings
|
| 27 |
+
self.manager_model_id: str = os.getenv("MANAGER_MODEL_ID", self.model_id)
|
| 28 |
+
self.manager_verbosity: int = int(os.getenv("MANAGER_VERBOSITY", "1"))
|
| 29 |
+
self.max_manager_steps: int = int(os.getenv("MAX_MANAGER_STEPS", "15"))
|
| 30 |
+
|
| 31 |
# Coding Agent Settings
|
| 32 |
self.code_model_id: str = os.getenv("CODE_MODEL_ID", self.model_id)
|
| 33 |
self.coding_verbosity: int = int(os.getenv("CODING_VERBOSITY", "2"))
|
|
|
|
| 62 |
print(" Set it in your .env file or as an environment variable.")
|
| 63 |
print()
|
| 64 |
|
| 65 |
+
if self.manager_verbosity not in [0, 1, 2]:
|
| 66 |
+
print(
|
| 67 |
+
f"โ ๏ธ Warning: MANAGER_VERBOSITY={self.manager_verbosity} is not \
|
| 68 |
+
in valid range [0, 1, 2]"
|
| 69 |
+
)
|
| 70 |
+
print(" Using default value of 1")
|
| 71 |
+
self.manager_verbosity = 1
|
| 72 |
+
|
| 73 |
if self.planning_verbosity not in [0, 1, 2]:
|
| 74 |
print(
|
| 75 |
f"โ ๏ธ Warning: PLANNING_VERBOSITY={self.planning_verbosity} is not \
|
|
|
|
| 105 |
|
| 106 |
return config
|
| 107 |
|
| 108 |
+
def get_manager_model_config(self) -> dict:
|
| 109 |
+
"""Get model configuration for the manager agent."""
|
| 110 |
+
config = {"model_id": self.manager_model_id, "api_key": self.api_key}
|
| 111 |
+
|
| 112 |
+
if self.api_base_url:
|
| 113 |
+
config["api_base_url"] = self.api_base_url
|
| 114 |
+
if self.api_key:
|
| 115 |
+
config["api_key"] = self.api_key
|
| 116 |
+
|
| 117 |
+
return config
|
| 118 |
+
|
| 119 |
def get_code_model_config(self) -> dict:
|
| 120 |
"""Get model configuration for the coding agent."""
|
| 121 |
config = {"model_id": self.code_model_id, "api_key": self.api_key}
|
|
|
|
| 135 |
"debug": self.gradio_debug,
|
| 136 |
}
|
| 137 |
|
| 138 |
+
def get_manager_config(self) -> dict:
|
| 139 |
+
"""Get manager agent configuration."""
|
| 140 |
+
return {
|
| 141 |
+
"verbosity_level": self.manager_verbosity,
|
| 142 |
+
"max_steps": self.max_manager_steps,
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
def get_planning_config(self) -> dict:
|
| 146 |
"""Get planning agent configuration."""
|
| 147 |
return {
|
|
|
|
| 178 |
"""String representation of settings (excluding sensitive data)."""
|
| 179 |
return f"""Settings(
|
| 180 |
model_id='{self.model_id}',
|
| 181 |
+
manager_model_id='{self.manager_model_id}',
|
| 182 |
code_model_id='{self.code_model_id}',
|
| 183 |
test_model_id='{self.test_model_id}',
|
| 184 |
api_key={'***' if self.api_key else 'None'},
|
|
|
|
| 186 |
gradio_host='{self.gradio_host}',
|
| 187 |
gradio_port={self.gradio_port},
|
| 188 |
gradio_debug={self.gradio_debug},
|
| 189 |
+
manager_verbosity={self.manager_verbosity},
|
| 190 |
+
max_manager_steps={self.max_manager_steps},
|
| 191 |
planning_verbosity={self.planning_verbosity},
|
| 192 |
max_planning_steps={self.max_planning_steps},
|
| 193 |
coding_verbosity={self.coding_verbosity},
|
|
|
|
| 220 |
print("Model Config:")
|
| 221 |
print(settings.get_model_config())
|
| 222 |
print()
|
| 223 |
+
print("Manager Model Config:")
|
| 224 |
+
print(settings.get_manager_model_config())
|
| 225 |
+
print()
|
| 226 |
print("Code Model Config:")
|
| 227 |
print(settings.get_code_model_config())
|
| 228 |
print()
|
| 229 |
print("Gradio Config:")
|
| 230 |
print(settings.get_gradio_config())
|
| 231 |
print()
|
| 232 |
+
print("Manager Config:")
|
| 233 |
+
print(settings.get_manager_config())
|
| 234 |
+
print()
|
| 235 |
print("Planning Config:")
|
| 236 |
print(settings.get_planning_config())
|
| 237 |
print()
|
test_manager_agent.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Test cases for the Gradio Manager Agent.
|
| 3 |
+
|
| 4 |
+
This module contains unit tests and integration tests for the manager agent
|
| 5 |
+
functionality, including managed agent coordination and workflow testing.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import unittest
|
| 9 |
+
from unittest.mock import Mock, patch
|
| 10 |
+
|
| 11 |
+
from manager_agent import (
|
| 12 |
+
GradioManagerAgent,
|
| 13 |
+
ManagerResult,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class TestGradioManagerAgent(unittest.TestCase):
|
| 18 |
+
"""Test the main GradioManagerAgent class."""
|
| 19 |
+
|
| 20 |
+
def setUp(self):
|
| 21 |
+
"""Set up test fixtures."""
|
| 22 |
+
# Mock settings
|
| 23 |
+
self.mock_settings_patcher = patch("manager_agent.settings")
|
| 24 |
+
self.mock_settings = self.mock_settings_patcher.start()
|
| 25 |
+
|
| 26 |
+
# Set up mock settings
|
| 27 |
+
self.mock_settings.manager_model_id = "test-manager-model"
|
| 28 |
+
self.mock_settings.model_id = "test-model"
|
| 29 |
+
self.mock_settings.code_model_id = "test-code-model"
|
| 30 |
+
self.mock_settings.test_model_id = "test-test-model"
|
| 31 |
+
self.mock_settings.api_base_url = "http://test.api"
|
| 32 |
+
self.mock_settings.api_key = "test-key"
|
| 33 |
+
self.mock_settings.manager_verbosity = 1
|
| 34 |
+
self.mock_settings.planning_verbosity = 1
|
| 35 |
+
self.mock_settings.coding_verbosity = 1
|
| 36 |
+
self.mock_settings.testing_verbosity = 1
|
| 37 |
+
self.mock_settings.max_manager_steps = 10
|
| 38 |
+
self.mock_settings.max_coding_steps = 15
|
| 39 |
+
self.mock_settings.max_testing_steps = 10
|
| 40 |
+
|
| 41 |
+
def tearDown(self):
|
| 42 |
+
"""Clean up test fixtures."""
|
| 43 |
+
self.mock_settings_patcher.stop()
|
| 44 |
+
|
| 45 |
+
@patch("manager_agent.LiteLLMModel")
|
| 46 |
+
@patch("manager_agent.ToolCallingAgent")
|
| 47 |
+
@patch("manager_agent.GradioPlanningAgent")
|
| 48 |
+
@patch("manager_agent.GradioCodingAgent")
|
| 49 |
+
@patch("manager_agent.GradioTestingAgent")
|
| 50 |
+
def test_manager_agent_initialization(
|
| 51 |
+
self,
|
| 52 |
+
mock_testing_agent,
|
| 53 |
+
mock_coding_agent,
|
| 54 |
+
mock_planning_agent,
|
| 55 |
+
mock_tool_calling_agent,
|
| 56 |
+
mock_litellm_model,
|
| 57 |
+
):
|
| 58 |
+
"""Test manager agent initialization."""
|
| 59 |
+
# Mock the managed agents
|
| 60 |
+
mock_planning_instance = Mock()
|
| 61 |
+
mock_planning_instance.name = "planning_agent"
|
| 62 |
+
mock_planning_instance.description = "Planning agent"
|
| 63 |
+
mock_planning_agent.return_value = mock_planning_instance
|
| 64 |
+
|
| 65 |
+
mock_coding_instance = Mock()
|
| 66 |
+
mock_coding_instance.name = "coding_agent"
|
| 67 |
+
mock_coding_instance.description = "Coding agent"
|
| 68 |
+
mock_coding_agent.return_value = mock_coding_instance
|
| 69 |
+
|
| 70 |
+
mock_testing_instance = Mock()
|
| 71 |
+
mock_testing_instance.name = "testing_agent"
|
| 72 |
+
mock_testing_instance.description = "Testing agent"
|
| 73 |
+
mock_testing_agent.return_value = mock_testing_instance
|
| 74 |
+
|
| 75 |
+
# Create manager agent
|
| 76 |
+
manager = GradioManagerAgent()
|
| 77 |
+
|
| 78 |
+
# Verify initialization
|
| 79 |
+
self.assertIsInstance(manager, GradioManagerAgent)
|
| 80 |
+
self.assertEqual(manager.max_iterations, 3)
|
| 81 |
+
mock_litellm_model.assert_called_once()
|
| 82 |
+
mock_tool_calling_agent.assert_called_once()
|
| 83 |
+
|
| 84 |
+
@patch("manager_agent.LiteLLMModel")
|
| 85 |
+
@patch("manager_agent.ToolCallingAgent")
|
| 86 |
+
@patch("manager_agent.GradioPlanningAgent")
|
| 87 |
+
@patch("manager_agent.GradioCodingAgent")
|
| 88 |
+
@patch("manager_agent.GradioTestingAgent")
|
| 89 |
+
def test_develop_application_success(
|
| 90 |
+
self,
|
| 91 |
+
mock_testing_agent,
|
| 92 |
+
mock_coding_agent,
|
| 93 |
+
mock_planning_agent,
|
| 94 |
+
mock_tool_calling_agent,
|
| 95 |
+
mock_litellm_model,
|
| 96 |
+
):
|
| 97 |
+
"""Test successful application development workflow."""
|
| 98 |
+
# Mock the managed agents
|
| 99 |
+
mock_planning_instance = Mock()
|
| 100 |
+
mock_planning_instance.name = "planning_agent"
|
| 101 |
+
mock_planning_instance.description = "Planning agent"
|
| 102 |
+
mock_planning_agent.return_value = mock_planning_instance
|
| 103 |
+
|
| 104 |
+
mock_coding_instance = Mock()
|
| 105 |
+
mock_coding_instance.name = "coding_agent"
|
| 106 |
+
mock_coding_instance.description = "Coding agent"
|
| 107 |
+
mock_coding_agent.return_value = mock_coding_instance
|
| 108 |
+
|
| 109 |
+
mock_testing_instance = Mock()
|
| 110 |
+
mock_testing_instance.name = "testing_agent"
|
| 111 |
+
mock_testing_instance.description = "Testing agent"
|
| 112 |
+
mock_testing_agent.return_value = mock_testing_instance
|
| 113 |
+
|
| 114 |
+
# Mock the main agent
|
| 115 |
+
mock_agent_instance = Mock()
|
| 116 |
+
mock_agent_instance.run.return_value = "Workflow completed successfully"
|
| 117 |
+
mock_tool_calling_agent.return_value = mock_agent_instance
|
| 118 |
+
|
| 119 |
+
# Create manager and test workflow
|
| 120 |
+
manager = GradioManagerAgent()
|
| 121 |
+
result = manager.develop_application("Create a simple calculator")
|
| 122 |
+
|
| 123 |
+
# Verify the result
|
| 124 |
+
self.assertIsInstance(result, ManagerResult)
|
| 125 |
+
self.assertTrue(result.success)
|
| 126 |
+
self.assertEqual(result.iterations, 1)
|
| 127 |
+
self.assertIn("Workflow completed successfully", result.final_message)
|
| 128 |
+
|
| 129 |
+
@patch("manager_agent.LiteLLMModel")
|
| 130 |
+
@patch("manager_agent.ToolCallingAgent")
|
| 131 |
+
@patch("manager_agent.GradioPlanningAgent")
|
| 132 |
+
@patch("manager_agent.GradioCodingAgent")
|
| 133 |
+
@patch("manager_agent.GradioTestingAgent")
|
| 134 |
+
def test_develop_application_failure(
|
| 135 |
+
self,
|
| 136 |
+
mock_testing_agent,
|
| 137 |
+
mock_coding_agent,
|
| 138 |
+
mock_planning_agent,
|
| 139 |
+
mock_tool_calling_agent,
|
| 140 |
+
mock_litellm_model,
|
| 141 |
+
):
|
| 142 |
+
"""Test application development workflow failure handling."""
|
| 143 |
+
# Mock the managed agents
|
| 144 |
+
mock_planning_instance = Mock()
|
| 145 |
+
mock_planning_instance.name = "planning_agent"
|
| 146 |
+
mock_planning_instance.description = "Planning agent"
|
| 147 |
+
mock_planning_agent.return_value = mock_planning_instance
|
| 148 |
+
|
| 149 |
+
mock_coding_instance = Mock()
|
| 150 |
+
mock_coding_instance.name = "coding_agent"
|
| 151 |
+
mock_coding_instance.description = "Coding agent"
|
| 152 |
+
mock_coding_agent.return_value = mock_coding_instance
|
| 153 |
+
|
| 154 |
+
mock_testing_instance = Mock()
|
| 155 |
+
mock_testing_instance.name = "testing_agent"
|
| 156 |
+
mock_testing_instance.description = "Testing agent"
|
| 157 |
+
mock_testing_agent.return_value = mock_testing_instance
|
| 158 |
+
|
| 159 |
+
# Mock the main agent to raise an exception
|
| 160 |
+
mock_agent_instance = Mock()
|
| 161 |
+
mock_agent_instance.run.side_effect = Exception("Workflow failed")
|
| 162 |
+
mock_tool_calling_agent.return_value = mock_agent_instance
|
| 163 |
+
|
| 164 |
+
# Create manager and test workflow
|
| 165 |
+
manager = GradioManagerAgent()
|
| 166 |
+
result = manager.develop_application("Create a simple calculator")
|
| 167 |
+
|
| 168 |
+
# Verify the error handling
|
| 169 |
+
self.assertIsInstance(result, ManagerResult)
|
| 170 |
+
self.assertFalse(result.success)
|
| 171 |
+
self.assertEqual(result.iterations, 0)
|
| 172 |
+
self.assertIn("Manager workflow failed", result.final_message)
|
| 173 |
+
self.assertIn("Workflow failed", result.error_messages)
|
| 174 |
+
|
| 175 |
+
def test_format_result_as_markdown_success(self):
|
| 176 |
+
"""Test formatting a successful result as markdown."""
|
| 177 |
+
result = ManagerResult(
|
| 178 |
+
success=True,
|
| 179 |
+
planning_result=None,
|
| 180 |
+
coding_result=None,
|
| 181 |
+
testing_result=None,
|
| 182 |
+
iterations=2,
|
| 183 |
+
final_message="All steps completed successfully",
|
| 184 |
+
error_messages=[],
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
manager = GradioManagerAgent()
|
| 188 |
+
markdown = manager.format_result_as_markdown(result)
|
| 189 |
+
|
| 190 |
+
self.assertIn("Development Workflow โ
", markdown)
|
| 191 |
+
self.assertIn("Status**: Success", markdown)
|
| 192 |
+
self.assertIn("Iterations**: 2", markdown)
|
| 193 |
+
self.assertIn("All steps completed successfully", markdown)
|
| 194 |
+
|
| 195 |
+
def test_format_result_as_markdown_failure(self):
|
| 196 |
+
"""Test formatting a failed result as markdown."""
|
| 197 |
+
result = ManagerResult(
|
| 198 |
+
success=False,
|
| 199 |
+
planning_result=None,
|
| 200 |
+
coding_result=None,
|
| 201 |
+
testing_result=None,
|
| 202 |
+
iterations=1,
|
| 203 |
+
final_message="Workflow failed at planning stage",
|
| 204 |
+
error_messages=["Planning agent error", "Configuration issue"],
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
manager = GradioManagerAgent()
|
| 208 |
+
markdown = manager.format_result_as_markdown(result)
|
| 209 |
+
|
| 210 |
+
self.assertIn("Development Workflow โ", markdown)
|
| 211 |
+
self.assertIn("Status**: Failed", markdown)
|
| 212 |
+
self.assertIn("Iterations**: 1", markdown)
|
| 213 |
+
self.assertIn("Workflow failed at planning stage", markdown)
|
| 214 |
+
self.assertIn("Planning agent error", markdown)
|
| 215 |
+
self.assertIn("Configuration issue", markdown)
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
if __name__ == "__main__":
|
| 219 |
+
unittest.main()
|
test_testing_agent.py
CHANGED
|
@@ -16,7 +16,6 @@ from testing_agent import (
|
|
| 16 |
GradioTestingAgent,
|
| 17 |
TestingResult,
|
| 18 |
check_app_health,
|
| 19 |
-
create_gradio_testing_agent,
|
| 20 |
run_gradio_app,
|
| 21 |
setup_venv_with_uv,
|
| 22 |
stop_gradio_processes,
|
|
@@ -265,20 +264,5 @@ class TestGradioTestingAgent(unittest.TestCase):
|
|
| 265 |
self.assertIn("/tmp/test.png", report)
|
| 266 |
|
| 267 |
|
| 268 |
-
class TestTestingAgentFactory(unittest.TestCase):
|
| 269 |
-
"""Test the factory function for creating testing agents."""
|
| 270 |
-
|
| 271 |
-
@patch("testing_agent.GradioTestingAgent")
|
| 272 |
-
def test_create_gradio_testing_agent(self, mock_agent_class):
|
| 273 |
-
"""Test creating a testing agent with factory function."""
|
| 274 |
-
mock_agent = Mock()
|
| 275 |
-
mock_agent_class.return_value = mock_agent
|
| 276 |
-
|
| 277 |
-
agent = create_gradio_testing_agent()
|
| 278 |
-
|
| 279 |
-
self.assertEqual(agent, mock_agent)
|
| 280 |
-
mock_agent_class.assert_called_once_with()
|
| 281 |
-
|
| 282 |
-
|
| 283 |
if __name__ == "__main__":
|
| 284 |
unittest.main()
|
|
|
|
| 16 |
GradioTestingAgent,
|
| 17 |
TestingResult,
|
| 18 |
check_app_health,
|
|
|
|
| 19 |
run_gradio_app,
|
| 20 |
setup_venv_with_uv,
|
| 21 |
stop_gradio_processes,
|
|
|
|
| 264 |
self.assertIn("/tmp/test.png", report)
|
| 265 |
|
| 266 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
if __name__ == "__main__":
|
| 268 |
unittest.main()
|
testing_agent.py
CHANGED
|
@@ -12,79 +12,13 @@ This module provides a specialized testing agent that can:
|
|
| 12 |
import os
|
| 13 |
import subprocess
|
| 14 |
import time
|
| 15 |
-
from dataclasses import dataclass
|
| 16 |
from pathlib import Path
|
| 17 |
|
| 18 |
from smolagents import LiteLLMModel, ToolCallingAgent, tool
|
| 19 |
|
| 20 |
-
from coding_agent import CodingResult
|
| 21 |
from settings import settings
|
| 22 |
|
| 23 |
|
| 24 |
-
@dataclass
|
| 25 |
-
class TestingResult:
|
| 26 |
-
"""Result of the testing agent containing validation details."""
|
| 27 |
-
|
| 28 |
-
success: bool
|
| 29 |
-
project_path: str
|
| 30 |
-
setup_successful: bool
|
| 31 |
-
server_launched: bool
|
| 32 |
-
ui_accessible: bool
|
| 33 |
-
test_cases_passed: list[str]
|
| 34 |
-
test_cases_failed: list[str]
|
| 35 |
-
error_messages: list[str]
|
| 36 |
-
screenshots: list[str]
|
| 37 |
-
performance_metrics: dict[str, float]
|
| 38 |
-
logs: str
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
@tool
|
| 42 |
-
def setup_venv_with_uv(project_path: str) -> str:
|
| 43 |
-
"""
|
| 44 |
-
Set up a virtual environment using uv for the Gradio project.
|
| 45 |
-
|
| 46 |
-
Args:
|
| 47 |
-
project_path: Path to the Gradio project directory
|
| 48 |
-
|
| 49 |
-
Returns:
|
| 50 |
-
Status message indicating success or failure
|
| 51 |
-
"""
|
| 52 |
-
try:
|
| 53 |
-
# Change to project directory
|
| 54 |
-
original_cwd = os.getcwd()
|
| 55 |
-
project_dir = Path(project_path)
|
| 56 |
-
|
| 57 |
-
if not project_dir.exists():
|
| 58 |
-
return f"Error: Project directory {project_path} does not exist"
|
| 59 |
-
|
| 60 |
-
os.chdir(project_dir)
|
| 61 |
-
|
| 62 |
-
# Install dependencies using uv
|
| 63 |
-
result = subprocess.run(
|
| 64 |
-
["uv", "sync"],
|
| 65 |
-
capture_output=True,
|
| 66 |
-
text=True,
|
| 67 |
-
timeout=300, # 5 minutes timeout
|
| 68 |
-
)
|
| 69 |
-
|
| 70 |
-
os.chdir(original_cwd)
|
| 71 |
-
|
| 72 |
-
if result.returncode == 0:
|
| 73 |
-
return f"Successfully set up virtual environment for {project_path}"
|
| 74 |
-
else:
|
| 75 |
-
return f"Error setting up venv: {result.stderr}"
|
| 76 |
-
|
| 77 |
-
except subprocess.TimeoutExpired:
|
| 78 |
-
os.chdir(original_cwd)
|
| 79 |
-
return "Error: uv sync timed out after 5 minutes"
|
| 80 |
-
except FileNotFoundError:
|
| 81 |
-
os.chdir(original_cwd)
|
| 82 |
-
return "Error: uv command not found. Please install uv first."
|
| 83 |
-
except Exception as e:
|
| 84 |
-
os.chdir(original_cwd)
|
| 85 |
-
return f"Unexpected error: {str(e)}"
|
| 86 |
-
|
| 87 |
-
|
| 88 |
@tool
|
| 89 |
def run_gradio_app(project_path: str, timeout: int = 30) -> str:
|
| 90 |
"""
|
|
@@ -106,7 +40,7 @@ def run_gradio_app(project_path: str, timeout: int = 30) -> str:
|
|
| 106 |
|
| 107 |
# Start the Gradio app in background
|
| 108 |
process = subprocess.Popen(
|
| 109 |
-
["uv", "run", "
|
| 110 |
cwd=project_dir,
|
| 111 |
stdout=subprocess.PIPE,
|
| 112 |
stderr=subprocess.PIPE,
|
|
@@ -291,6 +225,83 @@ def stop_gradio_processes() -> str:
|
|
| 291 |
return f"Error stopping processes: {str(e)}"
|
| 292 |
|
| 293 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
class GradioTestingAgent:
|
| 295 |
"""
|
| 296 |
A specialized ToolCallingAgent for testing Gradio applications.
|
|
@@ -317,6 +328,21 @@ class GradioTestingAgent:
|
|
| 317 |
verbosity_level: Level of verbosity for agent output (uses settings if None)
|
| 318 |
max_steps: Maximum number of testing steps (uses settings if None)
|
| 319 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
# Use settings as defaults, but allow override
|
| 321 |
self.model_id = model_id or settings.test_model_id
|
| 322 |
self.api_base_url = api_base_url or settings.api_base_url
|
|
@@ -333,11 +359,11 @@ class GradioTestingAgent:
|
|
| 333 |
|
| 334 |
# Define the tools for testing
|
| 335 |
testing_tools = [
|
| 336 |
-
setup_venv_with_uv,
|
| 337 |
run_gradio_app,
|
| 338 |
check_app_health,
|
| 339 |
test_gradio_ui_basic,
|
| 340 |
stop_gradio_processes,
|
|
|
|
| 341 |
]
|
| 342 |
|
| 343 |
# Initialize the ToolCallingAgent
|
|
@@ -346,270 +372,139 @@ class GradioTestingAgent:
|
|
| 346 |
tools=testing_tools,
|
| 347 |
verbosity_level=verbosity_level,
|
| 348 |
max_steps=max_steps,
|
|
|
|
|
|
|
| 349 |
)
|
| 350 |
|
| 351 |
self.sandbox_path = Path("sandbox")
|
| 352 |
|
| 353 |
-
def
|
| 354 |
"""
|
| 355 |
-
|
| 356 |
|
| 357 |
Args:
|
| 358 |
-
|
|
|
|
| 359 |
|
| 360 |
Returns:
|
| 361 |
-
|
| 362 |
"""
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
success=False,
|
| 366 |
-
project_path=coding_result.project_path,
|
| 367 |
-
setup_successful=False,
|
| 368 |
-
server_launched=False,
|
| 369 |
-
ui_accessible=False,
|
| 370 |
-
test_cases_passed=[],
|
| 371 |
-
test_cases_failed=["Coding agent failed to create application"],
|
| 372 |
-
error_messages=coding_result.error_messages,
|
| 373 |
-
screenshots=[],
|
| 374 |
-
performance_metrics={},
|
| 375 |
-
logs="Testing skipped due to coding failure",
|
| 376 |
-
)
|
| 377 |
-
|
| 378 |
-
project_path = coding_result.project_path
|
| 379 |
-
|
| 380 |
-
# Create comprehensive test prompt
|
| 381 |
-
test_prompt = f"""
|
| 382 |
-
You are a specialized testing agent for Gradio applications. Your task is to \
|
| 383 |
-
thoroughly test the Gradio application located at: {project_path}
|
| 384 |
-
|
| 385 |
-
Please perform the following testing steps in order:
|
| 386 |
-
|
| 387 |
-
1. **Environment Setup**: Use setup_venv_with_uv to ensure the virtual environment \
|
| 388 |
-
is properly configured
|
| 389 |
-
2. **Application Launch**: Use run_gradio_app to start the Gradio application
|
| 390 |
-
3. **Health Check**: Use check_app_health to verify the application is responding
|
| 391 |
-
4. **UI Testing**: Use test_gradio_ui_basic to test the user interface components
|
| 392 |
-
5. **Cleanup**: Use stop_gradio_processes to clean up after testing
|
| 393 |
-
|
| 394 |
-
For each step, report:
|
| 395 |
-
- Whether the step succeeded or failed
|
| 396 |
-
- Any error messages encountered
|
| 397 |
-
- Performance observations (loading times, responsiveness)
|
| 398 |
-
- Screenshots taken (if any)
|
| 399 |
-
|
| 400 |
-
If any critical step fails, still attempt the remaining steps where possible to \
|
| 401 |
-
gather maximum diagnostic information.
|
| 402 |
-
|
| 403 |
-
The application should be a functional Gradio app with interactive components. Test for:
|
| 404 |
-
- Proper page loading
|
| 405 |
-
- Presence of Gradio components
|
| 406 |
-
- Interactive elements (buttons, inputs, etc.)
|
| 407 |
-
- Basic functionality
|
| 408 |
-
|
| 409 |
-
Provide a comprehensive summary of all test results at the end.
|
| 410 |
-
"""
|
| 411 |
-
|
| 412 |
-
try:
|
| 413 |
-
# Run the testing workflow
|
| 414 |
-
result = self.agent.run(test_prompt)
|
| 415 |
-
|
| 416 |
-
# Parse the agent's response to create structured result
|
| 417 |
-
return self._parse_testing_response(result, project_path)
|
| 418 |
-
|
| 419 |
-
except Exception as e:
|
| 420 |
-
return TestingResult(
|
| 421 |
-
success=False,
|
| 422 |
-
project_path=project_path,
|
| 423 |
-
setup_successful=False,
|
| 424 |
-
server_launched=False,
|
| 425 |
-
ui_accessible=False,
|
| 426 |
-
test_cases_passed=[],
|
| 427 |
-
test_cases_failed=["Testing agent execution failed"],
|
| 428 |
-
error_messages=[str(e)],
|
| 429 |
-
screenshots=[],
|
| 430 |
-
performance_metrics={},
|
| 431 |
-
logs=f"Testing agent error: {str(e)}",
|
| 432 |
-
)
|
| 433 |
-
|
| 434 |
-
def _parse_testing_response(
|
| 435 |
-
self, response: str, project_path: str
|
| 436 |
-
) -> TestingResult:
|
| 437 |
-
"""
|
| 438 |
-
Parse the agent's testing response into a structured TestingResult.
|
| 439 |
-
|
| 440 |
-
Args:
|
| 441 |
-
response: Raw response from the testing agent
|
| 442 |
-
project_path: Path to the tested project
|
| 443 |
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
"""
|
| 447 |
-
# Initialize default values
|
| 448 |
-
setup_successful = False
|
| 449 |
-
server_launched = False
|
| 450 |
-
ui_accessible = False
|
| 451 |
-
test_cases_passed = []
|
| 452 |
-
test_cases_failed = []
|
| 453 |
-
error_messages = []
|
| 454 |
-
screenshots = []
|
| 455 |
-
performance_metrics = {}
|
| 456 |
-
|
| 457 |
-
# Simple parsing logic based on common success/failure indicators
|
| 458 |
-
response_lower = response.lower()
|
| 459 |
-
|
| 460 |
-
# Check for setup success
|
| 461 |
-
if "successfully set up virtual environment" in response_lower:
|
| 462 |
-
setup_successful = True
|
| 463 |
-
test_cases_passed.append("Virtual environment setup")
|
| 464 |
-
elif "error setting up venv" in response_lower:
|
| 465 |
-
test_cases_failed.append("Virtual environment setup")
|
| 466 |
-
|
| 467 |
-
# Check for server launch
|
| 468 |
-
if "successfully started gradio app" in response_lower:
|
| 469 |
-
server_launched = True
|
| 470 |
-
test_cases_passed.append("Gradio application launch")
|
| 471 |
-
elif "error running gradio app" in response_lower:
|
| 472 |
-
test_cases_failed.append("Gradio application launch")
|
| 473 |
-
|
| 474 |
-
# Check for health status
|
| 475 |
-
if "application is healthy" in response_lower:
|
| 476 |
-
ui_accessible = True
|
| 477 |
-
test_cases_passed.append("Application health check")
|
| 478 |
-
elif "cannot connect to" in response_lower:
|
| 479 |
-
test_cases_failed.append("Application health check")
|
| 480 |
-
|
| 481 |
-
# Check for UI testing
|
| 482 |
-
if (
|
| 483 |
-
"page loaded successfully" in response_lower
|
| 484 |
-
and "gradio container found" in response_lower
|
| 485 |
-
):
|
| 486 |
-
test_cases_passed.append("UI component testing")
|
| 487 |
-
elif "error during ui testing" in response_lower:
|
| 488 |
-
test_cases_failed.append("UI component testing")
|
| 489 |
-
|
| 490 |
-
# Look for screenshots
|
| 491 |
-
if "screenshot saved" in response_lower:
|
| 492 |
-
screenshots.append("/tmp/gradio_test_screenshot.png")
|
| 493 |
-
|
| 494 |
-
# Extract performance metrics if mentioned
|
| 495 |
-
if "response time:" in response_lower:
|
| 496 |
-
# Simple regex to extract response time
|
| 497 |
-
import re
|
| 498 |
-
|
| 499 |
-
time_match = re.search(r"response time: ([\d.]+)s", response_lower)
|
| 500 |
-
if time_match:
|
| 501 |
-
performance_metrics["response_time_seconds"] = float(
|
| 502 |
-
time_match.group(1)
|
| 503 |
-
)
|
| 504 |
-
|
| 505 |
-
# Determine overall success
|
| 506 |
-
success = (
|
| 507 |
-
setup_successful
|
| 508 |
-
and server_launched
|
| 509 |
-
and ui_accessible
|
| 510 |
-
and len(test_cases_failed) == 0
|
| 511 |
-
)
|
| 512 |
-
|
| 513 |
-
return TestingResult(
|
| 514 |
-
success=success,
|
| 515 |
-
project_path=project_path,
|
| 516 |
-
setup_successful=setup_successful,
|
| 517 |
-
server_launched=server_launched,
|
| 518 |
-
ui_accessible=ui_accessible,
|
| 519 |
-
test_cases_passed=test_cases_passed,
|
| 520 |
-
test_cases_failed=test_cases_failed,
|
| 521 |
-
error_messages=error_messages,
|
| 522 |
-
screenshots=screenshots,
|
| 523 |
-
performance_metrics=performance_metrics,
|
| 524 |
-
logs=response,
|
| 525 |
-
)
|
| 526 |
-
|
| 527 |
-
def generate_test_report(self, testing_result: TestingResult) -> str:
|
| 528 |
-
"""
|
| 529 |
-
Generate a comprehensive test report in markdown format.
|
| 530 |
-
|
| 531 |
-
Args:
|
| 532 |
-
testing_result: The result from testing the application
|
| 533 |
-
|
| 534 |
-
Returns:
|
| 535 |
-
Markdown-formatted test report
|
| 536 |
-
"""
|
| 537 |
-
status_emoji = "โ
" if testing_result.success else "โ"
|
| 538 |
-
|
| 539 |
-
report = f"""
|
| 540 |
-
# Gradio Application Test Report {status_emoji}
|
| 541 |
-
|
| 542 |
-
## Summary
|
| 543 |
-
- **Project Path**: `{testing_result.project_path}`
|
| 544 |
-
- **Overall Success**: {testing_result.success}
|
| 545 |
-
- **Environment Setup**: {"โ
" if testing_result.setup_successful else "โ"}
|
| 546 |
-
- **Server Launch**: {"โ
" if testing_result.server_launched else "โ"}
|
| 547 |
-
- **UI Accessibility**: {"โ
" if testing_result.ui_accessible else "โ"}
|
| 548 |
-
|
| 549 |
-
## Test Cases
|
| 550 |
-
|
| 551 |
-
### Passed ({len(testing_result.test_cases_passed)})
|
| 552 |
-
{chr(10).join(f"- โ
{case}" for case in testing_result.test_cases_passed)}
|
| 553 |
-
|
| 554 |
-
### Failed ({len(testing_result.test_cases_failed)})
|
| 555 |
-
{chr(10).join(f"- โ {case}" for case in testing_result.test_cases_failed)}
|
| 556 |
-
|
| 557 |
-
## Performance Metrics
|
| 558 |
-
{chr(10).join(f"- **{key}**: {value}" for key, value in \
|
| 559 |
-
testing_result.performance_metrics.items()) if testing_result.performance_metrics else \
|
| 560 |
-
"No performance metrics collected"}
|
| 561 |
-
|
| 562 |
-
## Screenshots
|
| 563 |
-
{chr(10).join(f"- {screenshot}" for screenshot in testing_result.screenshots) \
|
| 564 |
-
if testing_result.screenshots else "No screenshots captured"}
|
| 565 |
-
|
| 566 |
-
## Error Messages
|
| 567 |
-
{chr(10).join(f"- {error}" for error in testing_result.error_messages) \
|
| 568 |
-
if testing_result.error_messages else "No errors reported"}
|
| 569 |
-
|
| 570 |
-
## Detailed Logs
|
| 571 |
```
|
| 572 |
-
{
|
| 573 |
```
|
| 574 |
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 585 |
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
"""
|
| 589 |
-
return GradioTestingAgent()
|
| 590 |
|
| 591 |
|
| 592 |
if __name__ == "__main__":
|
| 593 |
# Example usage
|
| 594 |
-
from coding_agent import
|
| 595 |
from planning_agent import GradioPlanningAgent
|
| 596 |
|
| 597 |
# Create agents
|
| 598 |
planning_agent = GradioPlanningAgent()
|
| 599 |
-
coding_agent =
|
| 600 |
-
testing_agent =
|
| 601 |
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
plan = planning_agent.plan_application(
|
| 605 |
-
"Create a simple calculator with basic arithmetic operations"
|
| 606 |
)
|
| 607 |
|
| 608 |
-
|
| 609 |
-
implementation = coding_agent.implement_application(plan)
|
| 610 |
|
| 611 |
-
|
| 612 |
-
test_results = testing_agent.test_application(implementation)
|
| 613 |
|
| 614 |
-
print("
|
| 615 |
-
print(
|
|
|
|
| 12 |
import os
|
| 13 |
import subprocess
|
| 14 |
import time
|
|
|
|
| 15 |
from pathlib import Path
|
| 16 |
|
| 17 |
from smolagents import LiteLLMModel, ToolCallingAgent, tool
|
| 18 |
|
|
|
|
| 19 |
from settings import settings
|
| 20 |
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
@tool
|
| 23 |
def run_gradio_app(project_path: str, timeout: int = 30) -> str:
|
| 24 |
"""
|
|
|
|
| 40 |
|
| 41 |
# Start the Gradio app in background
|
| 42 |
process = subprocess.Popen(
|
| 43 |
+
["uv", "run", "gradio", "app.py"],
|
| 44 |
cwd=project_dir,
|
| 45 |
stdout=subprocess.PIPE,
|
| 46 |
stderr=subprocess.PIPE,
|
|
|
|
| 225 |
return f"Error stopping processes: {str(e)}"
|
| 226 |
|
| 227 |
|
| 228 |
+
@tool
|
| 229 |
+
def uv_add_packages(project_path: str, packages: str) -> str:
|
| 230 |
+
"""
|
| 231 |
+
Add missing packages to the project using uv add.
|
| 232 |
+
|
| 233 |
+
Args:
|
| 234 |
+
project_path: Path to the project directory containing pyproject.toml
|
| 235 |
+
packages: Space-separated list of package names to add \
|
| 236 |
+
(e.g., "requests pandas numpy")
|
| 237 |
+
|
| 238 |
+
Returns:
|
| 239 |
+
Status message indicating success or failure of adding packages
|
| 240 |
+
"""
|
| 241 |
+
try:
|
| 242 |
+
# Change to project directory
|
| 243 |
+
original_cwd = os.getcwd()
|
| 244 |
+
project_dir = Path(project_path)
|
| 245 |
+
|
| 246 |
+
if not project_dir.exists():
|
| 247 |
+
return f"Error: Project directory {project_path} does not exist"
|
| 248 |
+
|
| 249 |
+
# Check if pyproject.toml exists
|
| 250 |
+
pyproject_file = project_dir / "pyproject.toml"
|
| 251 |
+
if not pyproject_file.exists():
|
| 252 |
+
return f"Error: pyproject.toml not found in {project_path}"
|
| 253 |
+
|
| 254 |
+
os.chdir(project_dir)
|
| 255 |
+
|
| 256 |
+
# Split packages and add them one by one for better error handling
|
| 257 |
+
package_list = packages.strip().split()
|
| 258 |
+
if not package_list:
|
| 259 |
+
return "Error: No packages specified to add"
|
| 260 |
+
|
| 261 |
+
added_packages = []
|
| 262 |
+
failed_packages = []
|
| 263 |
+
|
| 264 |
+
for package in package_list:
|
| 265 |
+
if not package.strip():
|
| 266 |
+
continue
|
| 267 |
+
|
| 268 |
+
result = subprocess.run(
|
| 269 |
+
["uv", "add", package.strip()],
|
| 270 |
+
capture_output=True,
|
| 271 |
+
text=True,
|
| 272 |
+
timeout=120, # 2 minutes timeout per package
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
if result.returncode == 0:
|
| 276 |
+
added_packages.append(package.strip())
|
| 277 |
+
else:
|
| 278 |
+
failed_packages.append(f"{package.strip()} ({result.stderr.strip()})")
|
| 279 |
+
|
| 280 |
+
os.chdir(original_cwd)
|
| 281 |
+
|
| 282 |
+
# Prepare status message
|
| 283 |
+
status_parts = []
|
| 284 |
+
if added_packages:
|
| 285 |
+
status_parts.append(f"Successfully added: {', '.join(added_packages)}")
|
| 286 |
+
if failed_packages:
|
| 287 |
+
status_parts.append(f"Failed to add: {'; '.join(failed_packages)}")
|
| 288 |
+
|
| 289 |
+
if not status_parts:
|
| 290 |
+
return "No packages were processed"
|
| 291 |
+
|
| 292 |
+
return "; ".join(status_parts)
|
| 293 |
+
|
| 294 |
+
except subprocess.TimeoutExpired:
|
| 295 |
+
os.chdir(original_cwd)
|
| 296 |
+
return f"Error: uv add timed out while adding packages: {packages}"
|
| 297 |
+
except FileNotFoundError:
|
| 298 |
+
os.chdir(original_cwd)
|
| 299 |
+
return "Error: uv command not found. Please install uv first."
|
| 300 |
+
except Exception as e:
|
| 301 |
+
os.chdir(original_cwd)
|
| 302 |
+
return f"Unexpected error adding packages: {str(e)}"
|
| 303 |
+
|
| 304 |
+
|
| 305 |
class GradioTestingAgent:
|
| 306 |
"""
|
| 307 |
A specialized ToolCallingAgent for testing Gradio applications.
|
|
|
|
| 328 |
verbosity_level: Level of verbosity for agent output (uses settings if None)
|
| 329 |
max_steps: Maximum number of testing steps (uses settings if None)
|
| 330 |
"""
|
| 331 |
+
self.name = "testing_agent"
|
| 332 |
+
self.description = """Expert QA engineer specializing in Gradio application \
|
| 333 |
+
testing and validation.
|
| 334 |
+
|
| 335 |
+
This agent thoroughly tests Gradio applications by:
|
| 336 |
+
- Setting up virtual environments using uv
|
| 337 |
+
- Launching and health-checking Gradio applications
|
| 338 |
+
- Performing basic UI testing with browser automation
|
| 339 |
+
- Validating functionality and responsiveness
|
| 340 |
+
- Generating comprehensive test reports with screenshots
|
| 341 |
+
- Providing detailed error analysis and debugging information
|
| 342 |
+
|
| 343 |
+
Returns structured test results indicating success/failure with specific details \
|
| 344 |
+
about what works and what needs fixing."""
|
| 345 |
+
|
| 346 |
# Use settings as defaults, but allow override
|
| 347 |
self.model_id = model_id or settings.test_model_id
|
| 348 |
self.api_base_url = api_base_url or settings.api_base_url
|
|
|
|
| 359 |
|
| 360 |
# Define the tools for testing
|
| 361 |
testing_tools = [
|
|
|
|
| 362 |
run_gradio_app,
|
| 363 |
check_app_health,
|
| 364 |
test_gradio_ui_basic,
|
| 365 |
stop_gradio_processes,
|
| 366 |
+
uv_add_packages,
|
| 367 |
]
|
| 368 |
|
| 369 |
# Initialize the ToolCallingAgent
|
|
|
|
| 372 |
tools=testing_tools,
|
| 373 |
verbosity_level=verbosity_level,
|
| 374 |
max_steps=max_steps,
|
| 375 |
+
name=self.name,
|
| 376 |
+
description=self.description,
|
| 377 |
)
|
| 378 |
|
| 379 |
self.sandbox_path = Path("sandbox")
|
| 380 |
|
| 381 |
+
def __call__(self, task: str, **kwargs) -> str:
|
| 382 |
"""
|
| 383 |
+
Handle testing tasks as a managed agent.
|
| 384 |
|
| 385 |
Args:
|
| 386 |
+
task: The coding result or task description
|
| 387 |
+
**kwargs: Additional keyword arguments (ignored)
|
| 388 |
|
| 389 |
Returns:
|
| 390 |
+
String response containing the formatted testing result
|
| 391 |
"""
|
| 392 |
+
full_prompt = f"""You are an expert QA engineer specializing in \
|
| 393 |
+
Gradio application testing and validation.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
|
| 395 |
+
**CONTEXT:**
|
| 396 |
+
You received this message from an expert Python developer:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
```
|
| 398 |
+
{task}
|
| 399 |
```
|
| 400 |
|
| 401 |
+
**YOUR MISSION:**
|
| 402 |
+
Perform comprehensive testing of the Gradio application and provide a detailed \
|
| 403 |
+
quality assurance report.
|
| 404 |
+
|
| 405 |
+
**TESTING PROTOCOL:**
|
| 406 |
+
1. **Application Launch**: Use `run_gradio_app` to start the application
|
| 407 |
+
2. **Dependency Management**: If missing packages are detected, use `uv_add_packages` \
|
| 408 |
+
to add them
|
| 409 |
+
3. **Health Check**: Use `check_app_health` to verify HTTP response
|
| 410 |
+
4. **UI Testing**: Use `test_gradio_ui_basic` for basic interface validation
|
| 411 |
+
5. **Cleanup**: Use `stop_gradio_processes` to clean up after testing
|
| 412 |
+
|
| 413 |
+
**IMPORTANT CONSTRAINTS:**
|
| 414 |
+
- You can ONLY access files in the `./sandbox/` directory
|
| 415 |
+
- All projects to test will be located in subdirectories of `./sandbox/`
|
| 416 |
+
- Use relative paths starting with `./sandbox/[project_name]`
|
| 417 |
+
|
| 418 |
+
**REPORT FORMAT:**
|
| 419 |
+
Structure your final report as follows:
|
| 420 |
+
|
| 421 |
+
## ๐งช GRADIO APPLICATION TEST REPORT
|
| 422 |
+
|
| 423 |
+
### ๐ Test Summary
|
| 424 |
+
- **Application**: [App name/purpose]
|
| 425 |
+
- **Test Status**: โ
PASSED / โ FAILED / โ ๏ธ PARTIAL
|
| 426 |
+
- **Test Duration**: [Time taken]
|
| 427 |
+
- **Key Findings**: [Brief summary]
|
| 428 |
+
|
| 429 |
+
### ๐ง Environment Setup
|
| 430 |
+
- **Virtual Environment**: [Status and details]
|
| 431 |
+
- **Dependencies**: [Installation results]
|
| 432 |
+
- **Setup Issues**: [Any problems encountered]
|
| 433 |
+
|
| 434 |
+
### ๐ Application Launch
|
| 435 |
+
- **Startup Status**: [Success/failure]
|
| 436 |
+
- **Server URL**: [Access URL if successful]
|
| 437 |
+
- **Launch Time**: [Time to start]
|
| 438 |
+
- **Startup Logs**: [Relevant output]
|
| 439 |
+
|
| 440 |
+
### ๐ฅ Health Check
|
| 441 |
+
- **HTTP Response**: [Status code and response time]
|
| 442 |
+
- **Accessibility**: [Can the app be reached]
|
| 443 |
+
- **Performance**: [Response times, any issues]
|
| 444 |
+
|
| 445 |
+
### ๐ฅ๏ธ User Interface Testing
|
| 446 |
+
- **Page Load**: [Success/failure]
|
| 447 |
+
- **Gradio Container**: [Found/not found]
|
| 448 |
+
- **Interactive Elements**: [Count and types]
|
| 449 |
+
- **UI Responsiveness**: [Any issues]
|
| 450 |
+
- **Screenshots**: [Paths to saved images]
|
| 451 |
+
|
| 452 |
+
### โ ๏ธ Issues Found
|
| 453 |
+
- [List any problems, bugs, or concerns]
|
| 454 |
+
- [Include severity levels: CRITICAL, HIGH, MEDIUM, LOW]
|
| 455 |
+
- [Provide specific error messages and context]
|
| 456 |
+
|
| 457 |
+
### โ
Recommendations
|
| 458 |
+
- [Suggestions for improvements]
|
| 459 |
+
- [Required fixes for critical issues]
|
| 460 |
+
- [Performance optimization suggestions]
|
| 461 |
+
|
| 462 |
+
### ๐ Test Metrics
|
| 463 |
+
- **Total Tests**: [Number]
|
| 464 |
+
- **Passed**: [Number]
|
| 465 |
+
- **Failed**: [Number]
|
| 466 |
+
- **Success Rate**: [Percentage]
|
| 467 |
+
|
| 468 |
+
**TESTING GUIDELINES:**
|
| 469 |
+
- Always clean up processes after testing
|
| 470 |
+
- Capture screenshots when possible for documentation
|
| 471 |
+
- Report specific error messages, not just generic failures
|
| 472 |
+
- Distinguish between setup issues vs. application issues
|
| 473 |
+
- Test both functionality and user experience
|
| 474 |
+
- Provide actionable feedback for developers
|
| 475 |
+
|
| 476 |
+
**ERROR HANDLING:**
|
| 477 |
+
- If environment setup fails, provide specific uv/dependency guidance
|
| 478 |
+
- If missing packages are detected, use `uv_add_packages` to add them automatically
|
| 479 |
+
- If app won't start, analyze logs for root cause and check for import errors
|
| 480 |
+
- If UI testing fails, check if it's a browser/selenium issue vs. app issue
|
| 481 |
+
- Always attempt cleanup even if earlier steps fail
|
| 482 |
+
|
| 483 |
+
Begin testing now and provide your comprehensive report."""
|
| 484 |
+
try:
|
| 485 |
+
return self.agent.run(full_prompt)
|
| 486 |
|
| 487 |
+
except Exception as e:
|
| 488 |
+
return f"โ Testing failed: {str(e)}"
|
|
|
|
|
|
|
| 489 |
|
| 490 |
|
| 491 |
if __name__ == "__main__":
|
| 492 |
# Example usage
|
| 493 |
+
from coding_agent import GradioCodingAgent
|
| 494 |
from planning_agent import GradioPlanningAgent
|
| 495 |
|
| 496 |
# Create agents
|
| 497 |
planning_agent = GradioPlanningAgent()
|
| 498 |
+
coding_agent = GradioCodingAgent()
|
| 499 |
+
testing_agent = GradioTestingAgent()
|
| 500 |
|
| 501 |
+
plan_result = planning_agent(
|
| 502 |
+
"Create a simple calculator with basic arithmetic operations /no_think"
|
|
|
|
|
|
|
| 503 |
)
|
| 504 |
|
| 505 |
+
implementation_result = coding_agent(plan_result)
|
|
|
|
| 506 |
|
| 507 |
+
test_result = testing_agent(implementation_result)
|
|
|
|
| 508 |
|
| 509 |
+
print("=== TEST REPORT ===")
|
| 510 |
+
print(test_result)
|
uv.lock
CHANGED
|
@@ -8,7 +8,7 @@ resolution-markers = [
|
|
| 8 |
|
| 9 |
[manifest]
|
| 10 |
members = [
|
| 11 |
-
"gradio-
|
| 12 |
"likable",
|
| 13 |
]
|
| 14 |
|
|
@@ -428,9 +428,9 @@ wheels = [
|
|
| 428 |
]
|
| 429 |
|
| 430 |
[[package]]
|
| 431 |
-
name = "gradio-
|
| 432 |
version = "0.1.0"
|
| 433 |
-
source = { virtual = "sandbox/
|
| 434 |
dependencies = [
|
| 435 |
{ name = "gradio" },
|
| 436 |
]
|
|
|
|
| 8 |
|
| 9 |
[manifest]
|
| 10 |
members = [
|
| 11 |
+
"gradio-calculator",
|
| 12 |
"likable",
|
| 13 |
]
|
| 14 |
|
|
|
|
| 428 |
]
|
| 429 |
|
| 430 |
[[package]]
|
| 431 |
+
name = "gradio-calculator"
|
| 432 |
version = "0.1.0"
|
| 433 |
+
source = { virtual = "sandbox/gradio_calculator" }
|
| 434 |
dependencies = [
|
| 435 |
{ name = "gradio" },
|
| 436 |
]
|