jens.luecke commited on
Commit
f224834
ยท
1 Parent(s): dabb6ef

Refactor application architecture to use a manager agent

Browse files

- Replaced individual coding and planning agents with a new `GradioManagerAgent` that orchestrates the entire development workflow.
- Updated `app.py` to utilize the manager agent for generating AI responses, streamlining the process from planning to implementation.
- Introduced a new `manager_agent.py` file to handle the coordination of planning, coding, and testing agents.
- Enhanced `settings.py` to include configurations for the manager agent.
- Updated tests to cover the new manager agent functionality and ensure reliability.
- Improved project structure and dependencies in `pyproject.toml` to reflect changes in the agent architecture.

Files changed (10) hide show
  1. app.py +13 -143
  2. coding_agent.py +161 -258
  3. manager_agent.py +172 -0
  4. planning_agent.py +42 -166
  5. pyproject.toml +2 -0
  6. settings.py +40 -0
  7. test_manager_agent.py +219 -0
  8. test_testing_agent.py +0 -16
  9. testing_agent.py +202 -307
  10. uv.lock +3 -3
app.py CHANGED
@@ -4,165 +4,35 @@ import sys
4
 
5
  import gradio as gr
6
 
7
- from coding_agent import GradioCodingAgent
8
- from planning_agent import GradioPlanningAgent
9
  from settings import settings
10
  from utils import load_file
11
 
12
  gr.NO_RELOAD = False
13
 
14
- # Initialize the agents globally
15
- planning_agent = None
16
- coding_agent = None
17
-
18
-
19
- def get_planning_agent():
20
- """Get or initialize the planning agent (lazy loading)."""
21
- global planning_agent
22
- if planning_agent is None:
23
- try:
24
- planning_agent = GradioPlanningAgent()
25
- except Exception as e:
26
- print(f"Error initializing planning agent: {e}")
27
- return None
28
- return planning_agent
29
-
30
-
31
- def get_coding_agent():
32
- """Get or initialize the coding agent (lazy loading)."""
33
- global coding_agent
34
- if coding_agent is None:
35
- try:
36
- coding_agent = GradioCodingAgent()
37
- except Exception as e:
38
- print(f"Error initializing coding agent: {e}")
39
- return None
40
- return coding_agent
41
-
42
-
43
- # Enhanced AI response using both planning and coding agents
44
- def ai_response_with_planning_and_coding(message, history):
45
- """Generate AI response using the planning agent for planning and \
46
  coding agent for implementation."""
47
 
48
- planning_agent_instance = get_planning_agent()
49
- coding_agent_instance = get_coding_agent()
50
 
51
- if planning_agent_instance is None:
52
  # Fallback to mock response if planning agent fails to initialize
53
  response = (
54
- "Sorry, the planning agent is not available. "
55
  "Please check your API_KEY environment variable."
56
  )
57
- history.append({"role": "user", "content": message})
58
- history.append({"role": "assistant", "content": response})
59
- return history, ""
60
-
61
- if coding_agent_instance is None:
62
- # Fallback if coding agent fails to initialize
63
- response = (
64
- "Sorry, the coding agent is not available. "
65
- "Planning is available but implementation will be limited."
66
- )
67
- history.append({"role": "user", "content": message})
68
  history.append({"role": "assistant", "content": response})
69
  return history, ""
70
 
71
  try:
72
- # Step 1: Use the planning agent for planning
73
- history.append({"role": "user", "content": message})
74
- history.append(
75
- {"role": "assistant", "content": "๐ŸŽฏ Starting to plan your application..."}
76
- )
77
-
78
- planning_result = planning_agent_instance.plan_application(message)
79
-
80
- # Format the planning response
81
- action_summary = (
82
- planning_result.action_plan[:300] + "..."
83
- if len(planning_result.action_plan) > 300
84
- else planning_result.action_plan
85
- )
86
-
87
- components_list = chr(10).join(
88
- [f"โ€ข {comp}" for comp in planning_result.gradio_components[:5]]
89
- )
90
- dependencies_list = chr(10).join(
91
- [f"โ€ข {dep}" for dep in planning_result.dependencies[:5]]
92
- )
93
-
94
- planning_response = f"""โœ… **Planning Complete!**
95
-
96
- **Complexity**: {planning_result.estimated_complexity}
97
-
98
- **Key Gradio Components Needed**:
99
- {components_list}
100
-
101
- **Dependencies Required**:
102
- {dependencies_list}
103
-
104
- **High-Level Action Plan**:
105
- {action_summary}
106
-
107
- ๐Ÿš€ **Now starting implementation...**"""
108
-
109
- history.append({"role": "assistant", "content": planning_response})
110
-
111
- # Step 2: Use the coding agent for implementation
112
- history.append(
113
- {
114
- "role": "assistant",
115
- "content": "โšก Implementing your application with proper \
116
- project structure...",
117
- }
118
- )
119
-
120
- coding_result = coding_agent_instance.iterative_implementation(planning_result)
121
-
122
- # Format the implementation response
123
- if coding_result.success:
124
- implementation_response = f"""โœ… **Implementation Complete!**
125
-
126
- **Project Created**: `{coding_result.project_path}`
127
- **Features Implemented**: {len(coding_result.implemented_features)} components
128
- **Status**: Ready to run!
129
-
130
- Your Gradio application has been created with:
131
- - Proper `uv` project structure
132
- - All required dependencies installed
133
- - Complete README.md with usage instructions
134
- - Functional app.py with all requested features
135
-
136
- You can view and test your app in the **Preview** tab, or check the code in \
137
- the **Code** tab.
138
-
139
- To run locally: `cd {coding_result.project_path} && uv run python app.py`"""
140
-
141
- if coding_result.remaining_tasks:
142
- implementation_response += f"\n\n**Remaining Tasks**: \
143
- {chr(10).join([f'โ€ข {task}' for task in coding_result.remaining_tasks])}"
144
-
145
- else:
146
- implementation_response = f"""โš ๏ธ **Implementation Partially Complete**
147
-
148
- **Project Path**: `{coding_result.project_path}`
149
- **Issues Encountered**: {len(coding_result.error_messages)} errors
150
-
151
- **Error Messages**:
152
- {chr(10).join([f'โ€ข {error}' for error in coding_result.error_messages])}
153
-
154
- **Remaining Tasks**:
155
- {chr(10).join([f'โ€ข {task}' for task in coding_result.remaining_tasks])}
156
-
157
- The project structure has been set up, but some features may need manual completion."""
158
-
159
- history.append({"role": "assistant", "content": implementation_response})
160
 
161
  except Exception as e:
162
- error_response = (
163
- f"I encountered an error during planning and implementation: {str(e)}. "
164
- "Let me try a simpler approach..."
165
- )
166
  history.append({"role": "assistant", "content": error_response})
167
 
168
  return history, ""
@@ -328,13 +198,13 @@ complete applications*"
328
  # Event handlers for chat - updated to use the combined planning and
329
  # coding function
330
  msg_input.submit(
331
- ai_response_with_planning_and_coding,
332
  inputs=[msg_input, chatbot],
333
  outputs=[chatbot, msg_input],
334
  )
335
 
336
  send_btn.click(
337
- ai_response_with_planning_and_coding,
338
  inputs=[msg_input, chatbot],
339
  outputs=[chatbot, msg_input],
340
  )
 
4
 
5
  import gradio as gr
6
 
7
+ from manager_agent import GradioManagerAgent
 
8
  from settings import settings
9
  from utils import load_file
10
 
11
  gr.NO_RELOAD = False
12
 
13
+
14
+ def generate_ai_response(message, history):
15
+ """Generate AI response using the manager agent for planning and \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  coding agent for implementation."""
17
 
18
+ history.append({"role": "user", "content": message})
19
+ manager_agent_instance = GradioManagerAgent()
20
 
21
+ if manager_agent_instance is None:
22
  # Fallback to mock response if planning agent fails to initialize
23
  response = (
24
+ "Sorry, the manager agent is not available. "
25
  "Please check your API_KEY environment variable."
26
  )
 
 
 
 
 
 
 
 
 
 
 
27
  history.append({"role": "assistant", "content": response})
28
  return history, ""
29
 
30
  try:
31
+ manager_result = manager_agent_instance(message)
32
+ history.append({"role": "assistant", "content": manager_result})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  except Exception as e:
35
+ error_response = f"I encountered an error: {str(e)}"
 
 
 
36
  history.append({"role": "assistant", "content": error_response})
37
 
38
  return history, ""
 
198
  # Event handlers for chat - updated to use the combined planning and
199
  # coding function
200
  msg_input.submit(
201
+ generate_ai_response,
202
  inputs=[msg_input, chatbot],
203
  outputs=[chatbot, msg_input],
204
  )
205
 
206
  send_btn.click(
207
+ generate_ai_response,
208
  inputs=[msg_input, chatbot],
209
  outputs=[chatbot, msg_input],
210
  )
coding_agent.py CHANGED
@@ -16,11 +16,9 @@ from dataclasses import dataclass
16
  from pathlib import Path
17
 
18
  from mcp import StdioServerParameters
19
- from smolagents import LiteLLMModel, MCPClient, ToolCallingAgent
20
 
21
- from planning_agent import PlanningResult
22
  from settings import settings
23
- from utils import load_file
24
 
25
 
26
  @dataclass
@@ -35,6 +33,72 @@ class CodingResult:
35
  final_app_code: str
36
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  class GradioCodingAgent:
39
  """
40
  A specialized CodeAgent for implementing Gradio applications.
@@ -61,6 +125,22 @@ class GradioCodingAgent:
61
  verbosity_level: Level of verbosity for agent output (uses settings if None)
62
  max_steps: Maximum number of coding steps (uses settings if None)
63
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  # Use settings as defaults, but allow override
65
  self.model_id = model_id or settings.code_model_id
66
  self.api_base_url = api_base_url or settings.api_base_url
@@ -86,14 +166,19 @@ class GradioCodingAgent:
86
 
87
  self.mcp_client = MCPClient(server_parameters)
88
 
89
- tool_collection = self.mcp_client.get_tools()
 
 
 
90
 
91
  # Initialize the CodeAgent with tools for file operations and project setup
92
  self.agent = ToolCallingAgent(
93
  model=self.model,
94
- tools=tool_collection,
95
  verbosity_level=verbosity_level,
96
  max_steps=max_steps,
 
 
97
  )
98
 
99
  self.sandbox_path = Path("sandbox")
@@ -121,255 +206,79 @@ class GradioCodingAgent:
121
  except Exception:
122
  pass
123
 
124
- def setup_project_structure(self, project_name: str = "gradio_app") -> bool:
125
- """
126
- Set up the initial project structure using uv.
127
-
128
- Args:
129
- project_name: Name of the project
130
-
131
- Returns:
132
- bool: True if setup was successful
133
- """
134
- try:
135
- # Ensure sandbox directory exists and is clean
136
- if self.sandbox_path.exists():
137
- shutil.rmtree(self.sandbox_path)
138
- self.sandbox_path.mkdir(exist_ok=True)
139
-
140
- # Change to sandbox directory
141
- os.chdir(self.sandbox_path)
142
-
143
- # Initialize with uv
144
- subprocess.run(
145
- ["uv", "init", project_name],
146
- capture_output=True,
147
- text=True,
148
- check=True,
149
- )
150
-
151
- # Change to project directory
152
- os.chdir(project_name)
153
-
154
- # Add gradio as a dependency
155
- subprocess.run(
156
- ["uv", "add", "gradio"],
157
- capture_output=True,
158
- text=True,
159
- check=True,
160
- )
161
-
162
- # Change back to workspace root
163
- os.chdir("../..")
164
-
165
- return True
166
-
167
- except subprocess.CalledProcessError as e:
168
- print(f"Error setting up project structure: {e}")
169
- print(f"stdout: {e.stdout}")
170
- print(f"stderr: {e.stderr}")
171
- return False
172
- except Exception as e:
173
- print(f"Unexpected error setting up project: {e}")
174
- return False
175
-
176
- def implement_application(self, planning_result: PlanningResult) -> CodingResult:
177
  """
178
- Implement the full Gradio application based on the planning result.
179
 
180
  Args:
181
- planning_result: The planning result from the planning agent
 
182
 
183
  Returns:
184
- CodingResult containing implementation details
185
  """
186
- # Set up project structure
187
- project_name = "gradio_app"
188
- if not self.setup_project_structure(project_name):
189
- return CodingResult(
190
- success=False,
191
- project_path="",
192
- implemented_features=[],
193
- remaining_tasks=["Failed to set up project structure"],
194
- error_messages=["Could not initialize uv project"],
195
- final_app_code="",
196
- )
197
-
198
- project_path = str(self.sandbox_path / project_name)
199
-
200
- # Create comprehensive prompt for implementation
201
- gradio_components = chr(10).join(
202
- [f"- {comp}" for comp in planning_result.gradio_components]
203
- )
204
- dependencies = chr(10).join(
205
- [f"- {dep}" for dep in planning_result.dependencies if dep != "gradio"]
206
- )
207
-
208
- # Create the user prompt for the specific implementation
209
- user_prompt = f"""You are an expert Python developer and Gradio \
210
- application architect.
211
-
212
- Your task is to implement a complete, working Gradio application based on \
213
- the provided plan.
214
-
215
- PROJECT SETUP:
216
- - You are working in the directory: {project_path}
217
- - The project has been initialized with `uv` and `gradio` is already installed
218
- - Use proper Python project structure with a main app.py file
219
- - Add any additional dependencies needed using `uv add package_name`
220
-
221
- IMPLEMENTATION REQUIREMENTS:
222
- 1. Create a complete, functional Gradio application in app.py
223
- 2. Follow the provided action plan and implementation plan exactly
224
- 3. Implement ALL gradio components mentioned in the plan
225
- 4. Add proper error handling and user feedback
226
- 5. Create a comprehensive README.md with usage instructions
227
- 6. Add all required dependencies to the project using `uv add`
228
- 7. Make sure the app can be run with `uv run python app.py`
229
- 8. Test the implementation and fix any issues
230
-
231
- QUALITY STANDARDS:
232
- - Write clean, well-documented code
233
- - Use proper type hints where appropriate
234
- - Follow Python best practices
235
- - Add docstrings to functions and classes
236
- - Handle edge cases and errors gracefully
237
- - Make the UI intuitive and user-friendly
238
- - When using multiline strings within multiline strings, properly escape them \
239
- using triple quotes
240
- Example: Instead of using f\"\"\"...\"\"\", use f'''...''' or escape inner quotes \
241
- like f\"\"\"...\\\"\\\"\\\"...\\\"\\\"\\\"...\"\"\"
242
-
243
- GRADIO COMPONENTS TO IMPLEMENT:
244
- {gradio_components}
245
-
246
- DEPENDENCIES TO ADD:
247
- {dependencies}
248
-
249
- ACTION PLAN TO FOLLOW:
250
- {planning_result.action_plan}
251
-
252
- IMPLEMENTATION PLAN TO FOLLOW:
253
- {planning_result.implementation_plan}
254
-
255
- TESTING PLAN TO CONSIDER:
256
- {planning_result.testing_plan}
257
-
258
- You must implement the complete application and ensure it works properly.
259
- Use subprocess to run `uv add` commands to install any needed packages.
260
- Create all necessary files and make sure the application runs without errors.
261
-
262
- Please implement the complete Gradio application based on the planning result.
263
-
264
- The application should be fully functional and implement all the features
265
- described in the plans.
266
-
267
- Working directory: {project_path}
268
-
269
- Please:
270
- 1. Start by creating/updating the README.md file with project description
271
- and usage instructions
272
- 2. Add any additional dependencies needed using `uv add package_name`
273
- 3. Create the complete app.py file with all the Gradio components and
274
- functionality
275
- 4. Test the implementation to ensure it works
276
- 5. Fix any issues that arise during testing
277
-
278
- Make sure the final application is complete and functional.
279
- /no_think
280
- """
281
 
282
  try:
283
- # Run the coding agent to implement the application
284
- self.agent.run(
285
- user_prompt,
286
- additional_args={
287
- "current_app_py": load_file(str(Path(project_path) / "app.py")),
288
- },
289
- )
290
-
291
- # Check if the implementation was successful
292
- app_file = Path(project_path) / "app.py"
293
- if app_file.exists():
294
- with open(app_file, encoding="utf-8") as f:
295
- final_app_code = f.read()
296
-
297
- return CodingResult(
298
- success=True,
299
- project_path=project_path,
300
- implemented_features=planning_result.gradio_components,
301
- remaining_tasks=[],
302
- error_messages=[],
303
- final_app_code=final_app_code,
304
- )
305
- else:
306
- return CodingResult(
307
- success=False,
308
- project_path=project_path,
309
- implemented_features=[],
310
- remaining_tasks=["Main app.py file was not created"],
311
- error_messages=["Implementation failed to create app.py"],
312
- final_app_code="",
313
- )
314
 
315
  except Exception as e:
316
- return CodingResult(
317
- success=False,
318
- project_path=project_path,
319
- implemented_features=[],
320
- remaining_tasks=["Complete implementation"],
321
- error_messages=[f"Coding agent error: {str(e)}"],
322
- final_app_code="",
323
- )
324
-
325
- def iterative_implementation(
326
- self, planning_result: PlanningResult, max_iterations: int = 3
327
- ) -> CodingResult:
328
- """
329
- Implement the application with iterative refinement.
330
-
331
- Args:
332
- planning_result: The planning result from the planning agent
333
- max_iterations: Maximum number of implementation iterations
334
-
335
- Returns:
336
- CodingResult containing final implementation details
337
- """
338
- last_result = None
339
-
340
- for iteration in range(max_iterations):
341
- print(f"๐Ÿ”„ Implementation iteration {iteration + 1}/{max_iterations}")
342
-
343
- # Implement or refine the application
344
- result = self.implement_application(planning_result)
345
-
346
- if result.success and not result.remaining_tasks:
347
- print(f"โœ… Implementation successful in {iteration + 1} iteration(s)")
348
- return result
349
-
350
- last_result = result
351
-
352
- if iteration < max_iterations - 1:
353
- print(f"โš ๏ธ Iteration {iteration + 1} incomplete. Refining...")
354
- # For subsequent iterations, we could modify the prompt to focus
355
- # on remaining tasks. This is a simplified version - in practice,
356
- # you'd want more sophisticated iteration logic
357
-
358
- print(f"โš ๏ธ Implementation completed with {max_iterations} iterations")
359
- return last_result or CodingResult(
360
- success=False,
361
- project_path="",
362
- implemented_features=[],
363
- remaining_tasks=["Complete implementation failed"],
364
- error_messages=["Maximum iterations reached without completion"],
365
- final_app_code="",
366
- )
367
-
368
-
369
- # Convenience function for the main app
370
- def create_gradio_coding_agent() -> GradioCodingAgent:
371
- """Create a GradioCodingAgent with default settings."""
372
- return GradioCodingAgent()
373
 
374
 
375
  if __name__ == "__main__":
@@ -378,17 +287,11 @@ if __name__ == "__main__":
378
 
379
  # Test with a simple planning result
380
  planning_agent = GradioPlanningAgent()
381
- planning_result = planning_agent.plan_application(
382
- "Create a simple text-to-text translator app"
383
- )
384
-
385
- # Create coding agent and implement
386
- coding_agent = create_gradio_coding_agent()
387
- coding_result = coding_agent.iterative_implementation(planning_result)
388
-
389
- print("Coding Result:")
390
- print(f"Success: {coding_result.success}")
391
- print(f"Project Path: {coding_result.project_path}")
392
- print(f"Implemented Features: {coding_result.implemented_features}")
393
- print(f"Remaining Tasks: {coding_result.remaining_tasks}")
394
- print(f"Error Messages: {coding_result.error_messages}")
 
16
  from pathlib import Path
17
 
18
  from mcp import StdioServerParameters
19
+ from smolagents import LiteLLMModel, MCPClient, ToolCallingAgent, tool
20
 
 
21
  from settings import settings
 
22
 
23
 
24
  @dataclass
 
33
  final_app_code: str
34
 
35
 
36
+ @tool
37
+ def setup_project_structure(project_name: str = "gradio_app") -> str:
38
+ """
39
+ Set up the initial project structure using uv.
40
+
41
+ Args:
42
+ project_name: Name of the project
43
+
44
+ Returns:
45
+ Status message indicating success or failure
46
+ """
47
+ try:
48
+ sandbox_path = Path("sandbox")
49
+
50
+ # Ensure sandbox directory exists and is clean
51
+ if sandbox_path.exists():
52
+ shutil.rmtree(sandbox_path)
53
+ sandbox_path.mkdir(exist_ok=True)
54
+
55
+ # Store original working directory
56
+ original_cwd = os.getcwd()
57
+
58
+ # Change to sandbox directory
59
+ os.chdir(sandbox_path)
60
+
61
+ # Initialize with uv
62
+ subprocess.run(
63
+ ["uv", "init", project_name],
64
+ capture_output=True,
65
+ text=True,
66
+ check=True,
67
+ )
68
+
69
+ # Change to project directory
70
+ os.chdir(project_name)
71
+
72
+ # Add gradio as a dependency
73
+ subprocess.run(
74
+ ["uv", "add", "gradio"],
75
+ capture_output=True,
76
+ text=True,
77
+ check=True,
78
+ )
79
+
80
+ # Change back to workspace root
81
+ os.chdir(original_cwd)
82
+
83
+ return f"Successfully set up project structure for {project_name} \
84
+ in sandbox/{project_name}"
85
+
86
+ except subprocess.CalledProcessError as e:
87
+ # Restore working directory on error
88
+ try:
89
+ os.chdir(original_cwd)
90
+ except NameError:
91
+ pass
92
+ return f"Error setting up project structure: {e.stderr}"
93
+ except Exception as e:
94
+ # Restore working directory on error
95
+ try:
96
+ os.chdir(original_cwd)
97
+ except NameError:
98
+ pass
99
+ return f"Unexpected error setting up project: {str(e)}"
100
+
101
+
102
  class GradioCodingAgent:
103
  """
104
  A specialized CodeAgent for implementing Gradio applications.
 
125
  verbosity_level: Level of verbosity for agent output (uses settings if None)
126
  max_steps: Maximum number of coding steps (uses settings if None)
127
  """
128
+ self.name = "coding_agent"
129
+ self.description = """Expert Python developer specializing in Gradio \
130
+ application implementation.
131
+
132
+ This agent takes planning results and creates complete, working Gradio \
133
+ applications with:
134
+ - Proper project structure using uv for package management
135
+ - Complete implementation of all planned features
136
+ - Working app.py file with functional Gradio interface
137
+ - Proper dependency management and documentation
138
+ - Error handling and iterative development approach
139
+
140
+ The agent only exits when the full plan is implemented successfully.
141
+ Handles complex applications and follows best practices for Python/Gradio \
142
+ development."""
143
+
144
  # Use settings as defaults, but allow override
145
  self.model_id = model_id or settings.code_model_id
146
  self.api_base_url = api_base_url or settings.api_base_url
 
166
 
167
  self.mcp_client = MCPClient(server_parameters)
168
 
169
+ # Get MCP tools and add our custom tools
170
+ mcp_tools = self.mcp_client.get_tools()
171
+ custom_tools = [setup_project_structure]
172
+ all_tools = list(mcp_tools) + custom_tools
173
 
174
  # Initialize the CodeAgent with tools for file operations and project setup
175
  self.agent = ToolCallingAgent(
176
  model=self.model,
177
+ tools=all_tools,
178
  verbosity_level=verbosity_level,
179
  max_steps=max_steps,
180
+ name=self.name,
181
+ description=self.description,
182
  )
183
 
184
  self.sandbox_path = Path("sandbox")
 
206
  except Exception:
207
  pass
208
 
209
+ def __call__(self, task: str, **kwargs) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  """
211
+ Handle coding tasks as a managed agent.
212
 
213
  Args:
214
+ task: The planning result or task description
215
+ **kwargs: Additional keyword arguments (ignored)
216
 
217
  Returns:
218
+ String response containing the formatted coding result
219
  """
220
+ full_prompt = f"""You are an expert Python developer specializing in \
221
+ Gradio application implementation.
222
+
223
+ Your mission is to implement a complete, working Gradio application based \
224
+ on the following architectural plan:
225
+
226
+ ```
227
+ {task}
228
+ ```
229
+
230
+ ## Implementation Guidelines:
231
+
232
+ ### 1. Project Setup
233
+ - ALWAYS start by calling setup_project_structure() to create the \
234
+ proper project structure
235
+ - Use uv for package management (already configured)
236
+ - The project will be created in ./sandbox/ directory
237
+
238
+ ### 2. Implementation Requirements
239
+ - Create a complete, functional Gradio application
240
+ - Implement ALL features described in the plan
241
+ - Write clean, well-documented Python code
242
+ - Follow best practices for Gradio development
243
+ - Ensure proper error handling and user feedback
244
+
245
+ ### 3. File Structure
246
+ - Create app.py as the main application file
247
+ - Add any necessary helper modules or utilities
248
+ - Include proper imports and dependencies
249
+ - Document code with comments and docstrings
250
+
251
+ ### 4. Gradio Interface Guidelines
252
+ - Create an intuitive and user-friendly interface
253
+ - Use appropriate Gradio components for each feature
254
+ - Implement proper input validation and error handling
255
+ - Ensure responsive design and good UX practices
256
+ - Add helpful descriptions and examples where needed
257
+
258
+ ### 5. Quality Standards
259
+ - Test your implementation thoroughly
260
+ - Handle edge cases and error scenarios
261
+ - Provide clear feedback to users
262
+ - Ensure the app runs without errors
263
+ - Follow Python coding standards (PEP 8)
264
+
265
+ ### 6. Completion Criteria
266
+ - All planned features are fully implemented
267
+ - The application runs successfully with `python app.py`
268
+ - Users can interact with all described functionality
269
+ - Code is clean, documented, and maintainable
270
+
271
+ Remember: You can ONLY access files in the ./sandbox directory.
272
+ Do not attempt to access files outside this sandbox environment.
273
+
274
+ Start by setting up the project structure, then implement each feature \
275
+ systematically until the complete application is ready."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
  try:
278
+ return self.agent.run(full_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
  except Exception as e:
281
+ return f"โŒ Implementation failed: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
 
284
  if __name__ == "__main__":
 
287
 
288
  # Test with a simple planning result
289
  planning_agent = GradioPlanningAgent()
290
+ planning_result = planning_agent("Create a simple calculator app")
291
+
292
+ # Create coding agent and implement using managed agent approach
293
+ coding_agent = GradioCodingAgent()
294
+ coding_result_str = coding_agent(planning_result)
295
+
296
+ print("=== CODING RESULT ===")
297
+ print(coding_result_str)
 
 
 
 
 
 
manager_agent.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Smolagents ToolCallingAgent for managing a multi-agent development workflow.
3
+
4
+ This module provides a manager agent that orchestrates:
5
+ - Planning Agent: Creates comprehensive plans for Gradio applications
6
+ - Coding Agent: Implements the planned applications with proper project structure
7
+ - Testing Agent: Tests and validates the implemented applications
8
+
9
+ The manager follows this workflow:
10
+ 1. Receives user prompt
11
+ 2. Hands prompt to Planning Agent โ†’ gets PlanningResult
12
+ 3. Hands planning result to Coding Agent โ†’ gets CodingResult
13
+ 4. Hands coding result to Testing Agent โ†’ gets TestingResult
14
+ 5. If testing fails, hands errors back to Coding Agent for fixes
15
+ 6. Continues until testing passes or max iterations reached
16
+ """
17
+
18
+ from smolagents import CodeAgent, LiteLLMModel
19
+
20
+ from coding_agent import GradioCodingAgent
21
+ from planning_agent import GradioPlanningAgent
22
+ from settings import settings
23
+ from testing_agent import GradioTestingAgent
24
+
25
+
26
+ class GradioManagerAgent:
27
+ """
28
+ A manager agent that orchestrates the planning, coding, and testing workflow.
29
+
30
+ This agent coordinates the entire development process from initial planning
31
+ through implementation to final testing and validation.
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ model_id: str | None = None,
37
+ api_base_url: str | None = None,
38
+ api_key: str | None = None,
39
+ verbosity_level: int | None = None,
40
+ max_steps: int | None = None,
41
+ max_iterations: int = 3,
42
+ ):
43
+ """
44
+ Initialize the Gradio Manager Agent.
45
+
46
+ Args:
47
+ model_id: Model ID to use for management (uses settings if None)
48
+ api_base_url: API base URL (uses settings if None)
49
+ api_key: API key (uses settings if None)
50
+ verbosity_level: Level of verbosity for agent output (uses settings if None)
51
+ max_steps: Maximum number of management steps (uses settings if None)
52
+ max_iterations: Maximum number of coding/testing iterations
53
+ """
54
+ self.name = "manager_agent"
55
+ self.description = """Expert development manager coordinating multi-agent \
56
+ Gradio application development.
57
+
58
+ This agent orchestrates a complete development workflow by managing:
59
+ - Planning Agent: Creates comprehensive application plans
60
+ - Coding Agent: Implements planned applications with proper structure
61
+ - Testing Agent: Validates and tests implemented applications
62
+
63
+ Coordinates iterative development cycles until applications are fully working \
64
+ and tested.
65
+ Provides comprehensive workflow management and detailed progress reporting."""
66
+
67
+ # Use settings as defaults, but allow override
68
+ self.model_id = model_id or settings.manager_model_id
69
+ self.api_base_url = api_base_url or settings.api_base_url
70
+ self.api_key = api_key or settings.api_key
71
+ verbosity_level = verbosity_level or settings.manager_verbosity
72
+ max_steps = max_steps or settings.max_manager_steps
73
+ self.max_iterations = max_iterations
74
+
75
+ # Initialize the language model
76
+ self.model = LiteLLMModel(
77
+ model_id=self.model_id,
78
+ api_base=self.api_base_url,
79
+ api_key=self.api_key,
80
+ )
81
+
82
+ # Create managed agent instances
83
+ self.planning_agent = GradioPlanningAgent()
84
+ self.coding_agent = GradioCodingAgent()
85
+ self.testing_agent = GradioTestingAgent()
86
+
87
+ # Initialize the main ToolCallingAgent with the managed agents
88
+ self.agent = CodeAgent(
89
+ model=self.model,
90
+ tools=[], # No tools needed, only managed agents
91
+ managed_agents=[
92
+ self.planning_agent,
93
+ self.coding_agent,
94
+ self.testing_agent,
95
+ ],
96
+ verbosity_level=verbosity_level,
97
+ max_steps=max_steps,
98
+ name=self.name,
99
+ description=self.description,
100
+ )
101
+
102
+ def __call__(self, task: str, **kwargs) -> str:
103
+ """
104
+ Handle development management tasks as a managed agent.
105
+
106
+ Args:
107
+ task: The user's description of the application to build
108
+ **kwargs: Additional keyword arguments (ignored)
109
+
110
+ Returns:
111
+ String response containing the formatted workflow result
112
+ """
113
+ try:
114
+ # Run the development workflow
115
+ result = self.develop_application(task)
116
+
117
+ # Format the result for managed agent workflow
118
+ return self.format_result_as_markdown(result)
119
+
120
+ except Exception as e:
121
+ return f"โŒ Development workflow failed: {str(e)}"
122
+
123
+ def develop_application(self, prompt: str) -> str:
124
+ """
125
+ Manage the full development workflow from planning to testing.
126
+
127
+ Args:
128
+ prompt: User's description of the application to build
129
+
130
+ Returns:
131
+ String containing the complete workflow results
132
+ """
133
+ try:
134
+ # Create comprehensive task for the manager workflow
135
+ manager_task = f"""You are a development manager coordinating a \
136
+ team of specialists to build a Gradio application.
137
+
138
+ The user wants: {prompt}
139
+
140
+ Please coordinate the following workflow:
141
+
142
+ 1. **PLANNING PHASE**: Call the planning_agent to create a comprehensive \
143
+ plan for this application
144
+ 2. **IMPLEMENTATION PHASE**: Call the coding_agent with the planning results \
145
+ to implement the application
146
+ 3. **TESTING PHASE**: Call the testing_agent with the implementation results \
147
+ to test the application
148
+ 4. **ITERATION**: If testing fails, call the coding_agent again with the \
149
+ error details to fix issues
150
+ 5. **COMPLETION**: Continue until testing passes or maximum iterations reached
151
+
152
+ Start by calling the planning_agent with the user's request."""
153
+
154
+ # Run the coordinated workflow
155
+ result = self.agent.run(manager_task)
156
+
157
+ # Return successful result with agent's response
158
+ return str(result)
159
+
160
+ except Exception as e:
161
+ return f"Manager workflow failed: {str(e)}"
162
+
163
+
164
+ if __name__ == "__main__":
165
+ # Example usage
166
+ manager = GradioManagerAgent()
167
+
168
+ # Test the manager workflow using managed agent approach
169
+ result = manager("Create a simple calculator with basic arithmetic operations")
170
+
171
+ print("=== MANAGER RESULT ===")
172
+ print(result)
planning_agent.py CHANGED
@@ -7,25 +7,11 @@ This module provides a specialized planning agent that can:
7
  - Return an action, implementation and testing plan
8
  """
9
 
10
- from dataclasses import dataclass
11
-
12
- from smolagents import LiteLLMModel
13
 
14
  from settings import settings
15
 
16
 
17
- @dataclass
18
- class PlanningResult:
19
- """Result of the planning agent containing structured plans."""
20
-
21
- action_plan: str
22
- implementation_plan: str
23
- testing_plan: str
24
- gradio_components: list[str]
25
- estimated_complexity: str
26
- dependencies: list[str]
27
-
28
-
29
  class GradioPlanningAgent:
30
  """
31
  A specialized CodeAgent for planning Gradio applications.
@@ -50,6 +36,24 @@ class GradioPlanningAgent:
50
  api_key: API key (uses settings if None)
51
  verbosity_level: Level of verbosity for agent output (uses settings if None)
52
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Use settings as defaults, but allow override
54
  self.model_id = model_id or settings.model_id
55
  self.api_base_url = api_base_url or settings.api_base_url
@@ -63,7 +67,15 @@ class GradioPlanningAgent:
63
  api_key=self.api_key,
64
  )
65
 
66
- self.planning_prompt = """You are an expert software architect and Gradio \
 
 
 
 
 
 
 
 
67
  application developer. Your role is to create comprehensive, detailed plans \
68
  for building Gradio applications based on user requirements.
69
 
@@ -110,168 +122,32 @@ Be thorough, practical, and consider real-world constraints. Focus on creating \
110
  maintainable, user-friendly Gradio applications. Remember: NO CODE IMPLEMENTATION \
111
  at this stage - only architectural planning and structural design."""
112
 
113
- def plan_application(self, prompt: str) -> PlanningResult:
114
  """
115
- Create a comprehensive plan for a Gradio application based on the prompt.
116
 
117
  Args:
118
- prompt: Natural language description of the program to build
 
119
 
120
  Returns:
121
- PlanningResult containing structured plans
122
  """
 
123
 
124
- # Enhanced prompt for the agent
125
- user_prompt = f"""
126
  Create a comprehensive plan for building the following Gradio application:
127
 
128
- {prompt}
129
 
130
  Please provide detailed ACTION, IMPLEMENTATION, and TESTING plans following the \
131
  specified format. Consider all aspects of the application including UI/UX, \
132
- functionality, error handling, and deployment.
133
- """
134
-
135
- messages = [
136
- {"role": "system", "content": self.planning_prompt},
137
- {"role": "user", "content": user_prompt},
138
- ]
139
- response = self.model.generate(messages)
140
-
141
- # Parse the response into structured result
142
- return self._parse_planning_response(response.content)
143
-
144
- def _parse_planning_response(self, response: str) -> PlanningResult:
145
- """
146
- Parse the agent's response into a structured PlanningResult.
147
-
148
- Args:
149
- response: Raw response from the planning agent
150
-
151
- Returns:
152
- Structured PlanningResult
153
- """
154
 
155
- # Initialize default values
156
- action_plan = ""
157
- implementation_plan = ""
158
- testing_plan = ""
159
- gradio_components = []
160
- estimated_complexity = "Medium"
161
- dependencies = ["gradio"]
162
-
163
- # Parse sections from the response
164
- sections = self._extract_sections(response)
165
-
166
- action_plan = sections.get("ACTION PLAN", "")
167
- implementation_plan = sections.get("IMPLEMENTATION PLAN", "")
168
- testing_plan = sections.get("TESTING PLAN", "")
169
-
170
- # Parse gradio components list
171
- components_text = sections.get("GRADIO COMPONENTS", "")
172
- if components_text:
173
- gradio_components = self._extract_list_items(components_text)
174
-
175
- # Parse complexity
176
- complexity_text = sections.get("ESTIMATED COMPLEXITY", "")
177
- if complexity_text:
178
- estimated_complexity = complexity_text.strip()
179
-
180
- # Parse dependencies
181
- deps_text = sections.get("DEPENDENCIES", "")
182
- if deps_text:
183
- dependencies = ["gradio"] + self._extract_list_items(deps_text)
184
- # Remove duplicates while preserving order
185
- dependencies = list(dict.fromkeys(dependencies))
186
-
187
- return PlanningResult(
188
- action_plan=action_plan,
189
- implementation_plan=implementation_plan,
190
- testing_plan=testing_plan,
191
- gradio_components=gradio_components,
192
- estimated_complexity=estimated_complexity,
193
- dependencies=dependencies,
194
- )
195
-
196
- def _extract_sections(self, text: str) -> dict[str, str]:
197
- """Extract sections from markdown-formatted text."""
198
- sections = {}
199
- current_section = None
200
- current_content = []
201
-
202
- for line in text.split("\n"):
203
- line = line.strip()
204
-
205
- # Check if line is a section header
206
- if line.startswith("## "):
207
- # Save previous section if exists
208
- if current_section and current_content:
209
- sections[current_section] = "\n".join(current_content).strip()
210
-
211
- # Start new section
212
- current_section = line[3:].strip()
213
- current_content = []
214
- elif current_section:
215
- current_content.append(line)
216
-
217
- # Save last section
218
- if current_section and current_content:
219
- sections[current_section] = "\n".join(current_content).strip()
220
-
221
- return sections
222
-
223
- def _extract_list_items(self, text: str) -> list[str]:
224
- """Extract list items from text (handles bullet points, numbered lists, etc.)"""
225
- items = []
226
- for line in text.split("\n"):
227
- line = line.strip()
228
- if line:
229
- # Remove common list prefixes
230
- if line.startswith("- "):
231
- line = line[2:].strip()
232
- elif line.startswith("* "):
233
- line = line[2:].strip()
234
- elif ". " in line and line.split(".")[0].isdigit():
235
- line = line.split(".", 1)[1].strip()
236
-
237
- if line:
238
- items.append(line)
239
-
240
- return items
241
-
242
- def format_plan_as_markdown(self, result: PlanningResult) -> str:
243
- """
244
- Format the planning result as a well-structured markdown document.
245
-
246
- Args:
247
- result: PlanningResult to format
248
-
249
- Returns:
250
- Markdown-formatted string
251
- """
252
-
253
- markdown = f"""# Gradio Application Plan
254
-
255
- ## ๐Ÿ“‹ Action Plan
256
- {result.action_plan}
257
-
258
- ## ๐Ÿ”ง Implementation Plan
259
- {result.implementation_plan}
260
-
261
- ## ๐Ÿงช Testing Plan
262
- {result.testing_plan}
263
-
264
- ## ๐ŸŽจ Gradio Components
265
- {chr(10).join([f"- {component}" for component in result.gradio_components])}
266
-
267
- ## โšก Estimated Complexity
268
- {result.estimated_complexity}
269
-
270
- ## ๐Ÿ“ฆ Dependencies
271
- {chr(10).join([f"- {dep}" for dep in result.dependencies])}
272
- """
273
 
274
- return markdown
 
275
 
276
 
277
  # Example usage and testing
@@ -280,9 +156,9 @@ if __name__ == "__main__":
280
  agent = GradioPlanningAgent()
281
 
282
  # Test with a simple calculator example
283
- result = agent.plan_application(
284
  "Write a simple calculator app that can perform basic arithmetic operations"
285
  )
286
 
287
  print("=== PLANNING RESULT ===")
288
- print(agent.format_plan_as_markdown(result))
 
7
  - Return an action, implementation and testing plan
8
  """
9
 
10
+ from smolagents import LiteLLMModel, ToolCallingAgent
 
 
11
 
12
  from settings import settings
13
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  class GradioPlanningAgent:
16
  """
17
  A specialized CodeAgent for planning Gradio applications.
 
36
  api_key: API key (uses settings if None)
37
  verbosity_level: Level of verbosity for agent output (uses settings if None)
38
  """
39
+ self.name = "planning_agent"
40
+ self.description = """Expert software architect specializing in Gradio \
41
+ application planning.
42
+
43
+ This agent creates comprehensive, detailed plans for building Gradio applications \
44
+ based on user requirements.
45
+ It provides:
46
+ - High-level action plans breaking down the implementation steps
47
+ - Detailed technical implementation plans using Python and Gradio
48
+ - Comprehensive testing strategies
49
+ - Analysis of required Gradio components and dependencies
50
+ - Complexity estimation for the project
51
+
52
+ The agent focuses purely on planning and architecture - no actual code \
53
+ implementation.
54
+ Perfect for getting structured, well-thought-out plans before development \
55
+ begins."""
56
+
57
  # Use settings as defaults, but allow override
58
  self.model_id = model_id or settings.model_id
59
  self.api_base_url = api_base_url or settings.api_base_url
 
67
  api_key=self.api_key,
68
  )
69
 
70
+ self.agent = ToolCallingAgent(
71
+ model=self.model,
72
+ tools=[],
73
+ verbosity_level=verbosity_level,
74
+ name=self.name,
75
+ description=self.description,
76
+ )
77
+
78
+ self.system_prompt = """You are an expert software architect and Gradio \
79
  application developer. Your role is to create comprehensive, detailed plans \
80
  for building Gradio applications based on user requirements.
81
 
 
122
  maintainable, user-friendly Gradio applications. Remember: NO CODE IMPLEMENTATION \
123
  at this stage - only architectural planning and structural design."""
124
 
125
+ def __call__(self, task: str, **kwargs) -> str:
126
  """
127
+ Handle planning tasks as a managed agent.
128
 
129
  Args:
130
+ task: The user's description of the application to build
131
+ **kwargs: Additional keyword arguments (ignored)
132
 
133
  Returns:
134
+ String response containing the formatted planning result
135
  """
136
+ full_prompt = f"""{self.system_prompt}
137
 
 
 
138
  Create a comprehensive plan for building the following Gradio application:
139
 
140
+ {task}
141
 
142
  Please provide detailed ACTION, IMPLEMENTATION, and TESTING plans following the \
143
  specified format. Consider all aspects of the application including UI/UX, \
144
+ functionality, error handling, and deployment. /no_think"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
+ try:
147
+ return self.agent.run(full_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ except Exception as e:
150
+ return f"โŒ Planning failed: {str(e)}"
151
 
152
 
153
  # Example usage and testing
 
156
  agent = GradioPlanningAgent()
157
 
158
  # Test with a simple calculator example
159
+ result = agent(
160
  "Write a simple calculator app that can perform basic arithmetic operations"
161
  )
162
 
163
  print("=== PLANNING RESULT ===")
164
+ print(result)
pyproject.toml CHANGED
@@ -46,4 +46,6 @@ members = [
46
  "sandbox/gradio_app",
47
  "sandbox/sandbox/gradio_app",
48
  "test_sandbox/test_project",
 
 
49
  ]
 
46
  "sandbox/gradio_app",
47
  "sandbox/sandbox/gradio_app",
48
  "test_sandbox/test_project",
49
+ "sandbox/calculator_app",
50
+ "sandbox/gradio_calculator",
51
  ]
settings.py CHANGED
@@ -23,6 +23,11 @@ class Settings:
23
  self.api_base_url: str | None = os.getenv("API_BASE_URL")
24
  self.api_key: str | None = os.getenv("API_KEY")
25
 
 
 
 
 
 
26
  # Coding Agent Settings
27
  self.code_model_id: str = os.getenv("CODE_MODEL_ID", self.model_id)
28
  self.coding_verbosity: int = int(os.getenv("CODING_VERBOSITY", "2"))
@@ -57,6 +62,14 @@ without a valid API key."
57
  print(" Set it in your .env file or as an environment variable.")
58
  print()
59
 
 
 
 
 
 
 
 
 
60
  if self.planning_verbosity not in [0, 1, 2]:
61
  print(
62
  f"โš ๏ธ Warning: PLANNING_VERBOSITY={self.planning_verbosity} is not \
@@ -92,6 +105,17 @@ in valid range [0, 1, 2]"
92
 
93
  return config
94
 
 
 
 
 
 
 
 
 
 
 
 
95
  def get_code_model_config(self) -> dict:
96
  """Get model configuration for the coding agent."""
97
  config = {"model_id": self.code_model_id, "api_key": self.api_key}
@@ -111,6 +135,13 @@ in valid range [0, 1, 2]"
111
  "debug": self.gradio_debug,
112
  }
113
 
 
 
 
 
 
 
 
114
  def get_planning_config(self) -> dict:
115
  """Get planning agent configuration."""
116
  return {
@@ -147,6 +178,7 @@ in valid range [0, 1, 2]"
147
  """String representation of settings (excluding sensitive data)."""
148
  return f"""Settings(
149
  model_id='{self.model_id}',
 
150
  code_model_id='{self.code_model_id}',
151
  test_model_id='{self.test_model_id}',
152
  api_key={'***' if self.api_key else 'None'},
@@ -154,6 +186,8 @@ in valid range [0, 1, 2]"
154
  gradio_host='{self.gradio_host}',
155
  gradio_port={self.gradio_port},
156
  gradio_debug={self.gradio_debug},
 
 
157
  planning_verbosity={self.planning_verbosity},
158
  max_planning_steps={self.max_planning_steps},
159
  coding_verbosity={self.coding_verbosity},
@@ -186,12 +220,18 @@ if __name__ == "__main__":
186
  print("Model Config:")
187
  print(settings.get_model_config())
188
  print()
 
 
 
189
  print("Code Model Config:")
190
  print(settings.get_code_model_config())
191
  print()
192
  print("Gradio Config:")
193
  print(settings.get_gradio_config())
194
  print()
 
 
 
195
  print("Planning Config:")
196
  print(settings.get_planning_config())
197
  print()
 
23
  self.api_base_url: str | None = os.getenv("API_BASE_URL")
24
  self.api_key: str | None = os.getenv("API_KEY")
25
 
26
+ # Manager Agent Settings
27
+ self.manager_model_id: str = os.getenv("MANAGER_MODEL_ID", self.model_id)
28
+ self.manager_verbosity: int = int(os.getenv("MANAGER_VERBOSITY", "1"))
29
+ self.max_manager_steps: int = int(os.getenv("MAX_MANAGER_STEPS", "15"))
30
+
31
  # Coding Agent Settings
32
  self.code_model_id: str = os.getenv("CODE_MODEL_ID", self.model_id)
33
  self.coding_verbosity: int = int(os.getenv("CODING_VERBOSITY", "2"))
 
62
  print(" Set it in your .env file or as an environment variable.")
63
  print()
64
 
65
+ if self.manager_verbosity not in [0, 1, 2]:
66
+ print(
67
+ f"โš ๏ธ Warning: MANAGER_VERBOSITY={self.manager_verbosity} is not \
68
+ in valid range [0, 1, 2]"
69
+ )
70
+ print(" Using default value of 1")
71
+ self.manager_verbosity = 1
72
+
73
  if self.planning_verbosity not in [0, 1, 2]:
74
  print(
75
  f"โš ๏ธ Warning: PLANNING_VERBOSITY={self.planning_verbosity} is not \
 
105
 
106
  return config
107
 
108
+ def get_manager_model_config(self) -> dict:
109
+ """Get model configuration for the manager agent."""
110
+ config = {"model_id": self.manager_model_id, "api_key": self.api_key}
111
+
112
+ if self.api_base_url:
113
+ config["api_base_url"] = self.api_base_url
114
+ if self.api_key:
115
+ config["api_key"] = self.api_key
116
+
117
+ return config
118
+
119
  def get_code_model_config(self) -> dict:
120
  """Get model configuration for the coding agent."""
121
  config = {"model_id": self.code_model_id, "api_key": self.api_key}
 
135
  "debug": self.gradio_debug,
136
  }
137
 
138
+ def get_manager_config(self) -> dict:
139
+ """Get manager agent configuration."""
140
+ return {
141
+ "verbosity_level": self.manager_verbosity,
142
+ "max_steps": self.max_manager_steps,
143
+ }
144
+
145
  def get_planning_config(self) -> dict:
146
  """Get planning agent configuration."""
147
  return {
 
178
  """String representation of settings (excluding sensitive data)."""
179
  return f"""Settings(
180
  model_id='{self.model_id}',
181
+ manager_model_id='{self.manager_model_id}',
182
  code_model_id='{self.code_model_id}',
183
  test_model_id='{self.test_model_id}',
184
  api_key={'***' if self.api_key else 'None'},
 
186
  gradio_host='{self.gradio_host}',
187
  gradio_port={self.gradio_port},
188
  gradio_debug={self.gradio_debug},
189
+ manager_verbosity={self.manager_verbosity},
190
+ max_manager_steps={self.max_manager_steps},
191
  planning_verbosity={self.planning_verbosity},
192
  max_planning_steps={self.max_planning_steps},
193
  coding_verbosity={self.coding_verbosity},
 
220
  print("Model Config:")
221
  print(settings.get_model_config())
222
  print()
223
+ print("Manager Model Config:")
224
+ print(settings.get_manager_model_config())
225
+ print()
226
  print("Code Model Config:")
227
  print(settings.get_code_model_config())
228
  print()
229
  print("Gradio Config:")
230
  print(settings.get_gradio_config())
231
  print()
232
+ print("Manager Config:")
233
+ print(settings.get_manager_config())
234
+ print()
235
  print("Planning Config:")
236
  print(settings.get_planning_config())
237
  print()
test_manager_agent.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Test cases for the Gradio Manager Agent.
3
+
4
+ This module contains unit tests and integration tests for the manager agent
5
+ functionality, including managed agent coordination and workflow testing.
6
+ """
7
+
8
+ import unittest
9
+ from unittest.mock import Mock, patch
10
+
11
+ from manager_agent import (
12
+ GradioManagerAgent,
13
+ ManagerResult,
14
+ )
15
+
16
+
17
+ class TestGradioManagerAgent(unittest.TestCase):
18
+ """Test the main GradioManagerAgent class."""
19
+
20
+ def setUp(self):
21
+ """Set up test fixtures."""
22
+ # Mock settings
23
+ self.mock_settings_patcher = patch("manager_agent.settings")
24
+ self.mock_settings = self.mock_settings_patcher.start()
25
+
26
+ # Set up mock settings
27
+ self.mock_settings.manager_model_id = "test-manager-model"
28
+ self.mock_settings.model_id = "test-model"
29
+ self.mock_settings.code_model_id = "test-code-model"
30
+ self.mock_settings.test_model_id = "test-test-model"
31
+ self.mock_settings.api_base_url = "http://test.api"
32
+ self.mock_settings.api_key = "test-key"
33
+ self.mock_settings.manager_verbosity = 1
34
+ self.mock_settings.planning_verbosity = 1
35
+ self.mock_settings.coding_verbosity = 1
36
+ self.mock_settings.testing_verbosity = 1
37
+ self.mock_settings.max_manager_steps = 10
38
+ self.mock_settings.max_coding_steps = 15
39
+ self.mock_settings.max_testing_steps = 10
40
+
41
+ def tearDown(self):
42
+ """Clean up test fixtures."""
43
+ self.mock_settings_patcher.stop()
44
+
45
+ @patch("manager_agent.LiteLLMModel")
46
+ @patch("manager_agent.ToolCallingAgent")
47
+ @patch("manager_agent.GradioPlanningAgent")
48
+ @patch("manager_agent.GradioCodingAgent")
49
+ @patch("manager_agent.GradioTestingAgent")
50
+ def test_manager_agent_initialization(
51
+ self,
52
+ mock_testing_agent,
53
+ mock_coding_agent,
54
+ mock_planning_agent,
55
+ mock_tool_calling_agent,
56
+ mock_litellm_model,
57
+ ):
58
+ """Test manager agent initialization."""
59
+ # Mock the managed agents
60
+ mock_planning_instance = Mock()
61
+ mock_planning_instance.name = "planning_agent"
62
+ mock_planning_instance.description = "Planning agent"
63
+ mock_planning_agent.return_value = mock_planning_instance
64
+
65
+ mock_coding_instance = Mock()
66
+ mock_coding_instance.name = "coding_agent"
67
+ mock_coding_instance.description = "Coding agent"
68
+ mock_coding_agent.return_value = mock_coding_instance
69
+
70
+ mock_testing_instance = Mock()
71
+ mock_testing_instance.name = "testing_agent"
72
+ mock_testing_instance.description = "Testing agent"
73
+ mock_testing_agent.return_value = mock_testing_instance
74
+
75
+ # Create manager agent
76
+ manager = GradioManagerAgent()
77
+
78
+ # Verify initialization
79
+ self.assertIsInstance(manager, GradioManagerAgent)
80
+ self.assertEqual(manager.max_iterations, 3)
81
+ mock_litellm_model.assert_called_once()
82
+ mock_tool_calling_agent.assert_called_once()
83
+
84
+ @patch("manager_agent.LiteLLMModel")
85
+ @patch("manager_agent.ToolCallingAgent")
86
+ @patch("manager_agent.GradioPlanningAgent")
87
+ @patch("manager_agent.GradioCodingAgent")
88
+ @patch("manager_agent.GradioTestingAgent")
89
+ def test_develop_application_success(
90
+ self,
91
+ mock_testing_agent,
92
+ mock_coding_agent,
93
+ mock_planning_agent,
94
+ mock_tool_calling_agent,
95
+ mock_litellm_model,
96
+ ):
97
+ """Test successful application development workflow."""
98
+ # Mock the managed agents
99
+ mock_planning_instance = Mock()
100
+ mock_planning_instance.name = "planning_agent"
101
+ mock_planning_instance.description = "Planning agent"
102
+ mock_planning_agent.return_value = mock_planning_instance
103
+
104
+ mock_coding_instance = Mock()
105
+ mock_coding_instance.name = "coding_agent"
106
+ mock_coding_instance.description = "Coding agent"
107
+ mock_coding_agent.return_value = mock_coding_instance
108
+
109
+ mock_testing_instance = Mock()
110
+ mock_testing_instance.name = "testing_agent"
111
+ mock_testing_instance.description = "Testing agent"
112
+ mock_testing_agent.return_value = mock_testing_instance
113
+
114
+ # Mock the main agent
115
+ mock_agent_instance = Mock()
116
+ mock_agent_instance.run.return_value = "Workflow completed successfully"
117
+ mock_tool_calling_agent.return_value = mock_agent_instance
118
+
119
+ # Create manager and test workflow
120
+ manager = GradioManagerAgent()
121
+ result = manager.develop_application("Create a simple calculator")
122
+
123
+ # Verify the result
124
+ self.assertIsInstance(result, ManagerResult)
125
+ self.assertTrue(result.success)
126
+ self.assertEqual(result.iterations, 1)
127
+ self.assertIn("Workflow completed successfully", result.final_message)
128
+
129
+ @patch("manager_agent.LiteLLMModel")
130
+ @patch("manager_agent.ToolCallingAgent")
131
+ @patch("manager_agent.GradioPlanningAgent")
132
+ @patch("manager_agent.GradioCodingAgent")
133
+ @patch("manager_agent.GradioTestingAgent")
134
+ def test_develop_application_failure(
135
+ self,
136
+ mock_testing_agent,
137
+ mock_coding_agent,
138
+ mock_planning_agent,
139
+ mock_tool_calling_agent,
140
+ mock_litellm_model,
141
+ ):
142
+ """Test application development workflow failure handling."""
143
+ # Mock the managed agents
144
+ mock_planning_instance = Mock()
145
+ mock_planning_instance.name = "planning_agent"
146
+ mock_planning_instance.description = "Planning agent"
147
+ mock_planning_agent.return_value = mock_planning_instance
148
+
149
+ mock_coding_instance = Mock()
150
+ mock_coding_instance.name = "coding_agent"
151
+ mock_coding_instance.description = "Coding agent"
152
+ mock_coding_agent.return_value = mock_coding_instance
153
+
154
+ mock_testing_instance = Mock()
155
+ mock_testing_instance.name = "testing_agent"
156
+ mock_testing_instance.description = "Testing agent"
157
+ mock_testing_agent.return_value = mock_testing_instance
158
+
159
+ # Mock the main agent to raise an exception
160
+ mock_agent_instance = Mock()
161
+ mock_agent_instance.run.side_effect = Exception("Workflow failed")
162
+ mock_tool_calling_agent.return_value = mock_agent_instance
163
+
164
+ # Create manager and test workflow
165
+ manager = GradioManagerAgent()
166
+ result = manager.develop_application("Create a simple calculator")
167
+
168
+ # Verify the error handling
169
+ self.assertIsInstance(result, ManagerResult)
170
+ self.assertFalse(result.success)
171
+ self.assertEqual(result.iterations, 0)
172
+ self.assertIn("Manager workflow failed", result.final_message)
173
+ self.assertIn("Workflow failed", result.error_messages)
174
+
175
+ def test_format_result_as_markdown_success(self):
176
+ """Test formatting a successful result as markdown."""
177
+ result = ManagerResult(
178
+ success=True,
179
+ planning_result=None,
180
+ coding_result=None,
181
+ testing_result=None,
182
+ iterations=2,
183
+ final_message="All steps completed successfully",
184
+ error_messages=[],
185
+ )
186
+
187
+ manager = GradioManagerAgent()
188
+ markdown = manager.format_result_as_markdown(result)
189
+
190
+ self.assertIn("Development Workflow โœ…", markdown)
191
+ self.assertIn("Status**: Success", markdown)
192
+ self.assertIn("Iterations**: 2", markdown)
193
+ self.assertIn("All steps completed successfully", markdown)
194
+
195
+ def test_format_result_as_markdown_failure(self):
196
+ """Test formatting a failed result as markdown."""
197
+ result = ManagerResult(
198
+ success=False,
199
+ planning_result=None,
200
+ coding_result=None,
201
+ testing_result=None,
202
+ iterations=1,
203
+ final_message="Workflow failed at planning stage",
204
+ error_messages=["Planning agent error", "Configuration issue"],
205
+ )
206
+
207
+ manager = GradioManagerAgent()
208
+ markdown = manager.format_result_as_markdown(result)
209
+
210
+ self.assertIn("Development Workflow โŒ", markdown)
211
+ self.assertIn("Status**: Failed", markdown)
212
+ self.assertIn("Iterations**: 1", markdown)
213
+ self.assertIn("Workflow failed at planning stage", markdown)
214
+ self.assertIn("Planning agent error", markdown)
215
+ self.assertIn("Configuration issue", markdown)
216
+
217
+
218
+ if __name__ == "__main__":
219
+ unittest.main()
test_testing_agent.py CHANGED
@@ -16,7 +16,6 @@ from testing_agent import (
16
  GradioTestingAgent,
17
  TestingResult,
18
  check_app_health,
19
- create_gradio_testing_agent,
20
  run_gradio_app,
21
  setup_venv_with_uv,
22
  stop_gradio_processes,
@@ -265,20 +264,5 @@ class TestGradioTestingAgent(unittest.TestCase):
265
  self.assertIn("/tmp/test.png", report)
266
 
267
 
268
- class TestTestingAgentFactory(unittest.TestCase):
269
- """Test the factory function for creating testing agents."""
270
-
271
- @patch("testing_agent.GradioTestingAgent")
272
- def test_create_gradio_testing_agent(self, mock_agent_class):
273
- """Test creating a testing agent with factory function."""
274
- mock_agent = Mock()
275
- mock_agent_class.return_value = mock_agent
276
-
277
- agent = create_gradio_testing_agent()
278
-
279
- self.assertEqual(agent, mock_agent)
280
- mock_agent_class.assert_called_once_with()
281
-
282
-
283
  if __name__ == "__main__":
284
  unittest.main()
 
16
  GradioTestingAgent,
17
  TestingResult,
18
  check_app_health,
 
19
  run_gradio_app,
20
  setup_venv_with_uv,
21
  stop_gradio_processes,
 
264
  self.assertIn("/tmp/test.png", report)
265
 
266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  if __name__ == "__main__":
268
  unittest.main()
testing_agent.py CHANGED
@@ -12,79 +12,13 @@ This module provides a specialized testing agent that can:
12
  import os
13
  import subprocess
14
  import time
15
- from dataclasses import dataclass
16
  from pathlib import Path
17
 
18
  from smolagents import LiteLLMModel, ToolCallingAgent, tool
19
 
20
- from coding_agent import CodingResult
21
  from settings import settings
22
 
23
 
24
- @dataclass
25
- class TestingResult:
26
- """Result of the testing agent containing validation details."""
27
-
28
- success: bool
29
- project_path: str
30
- setup_successful: bool
31
- server_launched: bool
32
- ui_accessible: bool
33
- test_cases_passed: list[str]
34
- test_cases_failed: list[str]
35
- error_messages: list[str]
36
- screenshots: list[str]
37
- performance_metrics: dict[str, float]
38
- logs: str
39
-
40
-
41
- @tool
42
- def setup_venv_with_uv(project_path: str) -> str:
43
- """
44
- Set up a virtual environment using uv for the Gradio project.
45
-
46
- Args:
47
- project_path: Path to the Gradio project directory
48
-
49
- Returns:
50
- Status message indicating success or failure
51
- """
52
- try:
53
- # Change to project directory
54
- original_cwd = os.getcwd()
55
- project_dir = Path(project_path)
56
-
57
- if not project_dir.exists():
58
- return f"Error: Project directory {project_path} does not exist"
59
-
60
- os.chdir(project_dir)
61
-
62
- # Install dependencies using uv
63
- result = subprocess.run(
64
- ["uv", "sync"],
65
- capture_output=True,
66
- text=True,
67
- timeout=300, # 5 minutes timeout
68
- )
69
-
70
- os.chdir(original_cwd)
71
-
72
- if result.returncode == 0:
73
- return f"Successfully set up virtual environment for {project_path}"
74
- else:
75
- return f"Error setting up venv: {result.stderr}"
76
-
77
- except subprocess.TimeoutExpired:
78
- os.chdir(original_cwd)
79
- return "Error: uv sync timed out after 5 minutes"
80
- except FileNotFoundError:
81
- os.chdir(original_cwd)
82
- return "Error: uv command not found. Please install uv first."
83
- except Exception as e:
84
- os.chdir(original_cwd)
85
- return f"Unexpected error: {str(e)}"
86
-
87
-
88
  @tool
89
  def run_gradio_app(project_path: str, timeout: int = 30) -> str:
90
  """
@@ -106,7 +40,7 @@ def run_gradio_app(project_path: str, timeout: int = 30) -> str:
106
 
107
  # Start the Gradio app in background
108
  process = subprocess.Popen(
109
- ["uv", "run", "python", "app.py"],
110
  cwd=project_dir,
111
  stdout=subprocess.PIPE,
112
  stderr=subprocess.PIPE,
@@ -291,6 +225,83 @@ def stop_gradio_processes() -> str:
291
  return f"Error stopping processes: {str(e)}"
292
 
293
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  class GradioTestingAgent:
295
  """
296
  A specialized ToolCallingAgent for testing Gradio applications.
@@ -317,6 +328,21 @@ class GradioTestingAgent:
317
  verbosity_level: Level of verbosity for agent output (uses settings if None)
318
  max_steps: Maximum number of testing steps (uses settings if None)
319
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  # Use settings as defaults, but allow override
321
  self.model_id = model_id or settings.test_model_id
322
  self.api_base_url = api_base_url or settings.api_base_url
@@ -333,11 +359,11 @@ class GradioTestingAgent:
333
 
334
  # Define the tools for testing
335
  testing_tools = [
336
- setup_venv_with_uv,
337
  run_gradio_app,
338
  check_app_health,
339
  test_gradio_ui_basic,
340
  stop_gradio_processes,
 
341
  ]
342
 
343
  # Initialize the ToolCallingAgent
@@ -346,270 +372,139 @@ class GradioTestingAgent:
346
  tools=testing_tools,
347
  verbosity_level=verbosity_level,
348
  max_steps=max_steps,
 
 
349
  )
350
 
351
  self.sandbox_path = Path("sandbox")
352
 
353
- def test_application(self, coding_result: CodingResult) -> TestingResult:
354
  """
355
- Test the Gradio application created by the coding agent.
356
 
357
  Args:
358
- coding_result: The result from the coding agent
 
359
 
360
  Returns:
361
- TestingResult containing comprehensive test information
362
  """
363
- if not coding_result.success:
364
- return TestingResult(
365
- success=False,
366
- project_path=coding_result.project_path,
367
- setup_successful=False,
368
- server_launched=False,
369
- ui_accessible=False,
370
- test_cases_passed=[],
371
- test_cases_failed=["Coding agent failed to create application"],
372
- error_messages=coding_result.error_messages,
373
- screenshots=[],
374
- performance_metrics={},
375
- logs="Testing skipped due to coding failure",
376
- )
377
-
378
- project_path = coding_result.project_path
379
-
380
- # Create comprehensive test prompt
381
- test_prompt = f"""
382
- You are a specialized testing agent for Gradio applications. Your task is to \
383
- thoroughly test the Gradio application located at: {project_path}
384
-
385
- Please perform the following testing steps in order:
386
-
387
- 1. **Environment Setup**: Use setup_venv_with_uv to ensure the virtual environment \
388
- is properly configured
389
- 2. **Application Launch**: Use run_gradio_app to start the Gradio application
390
- 3. **Health Check**: Use check_app_health to verify the application is responding
391
- 4. **UI Testing**: Use test_gradio_ui_basic to test the user interface components
392
- 5. **Cleanup**: Use stop_gradio_processes to clean up after testing
393
-
394
- For each step, report:
395
- - Whether the step succeeded or failed
396
- - Any error messages encountered
397
- - Performance observations (loading times, responsiveness)
398
- - Screenshots taken (if any)
399
-
400
- If any critical step fails, still attempt the remaining steps where possible to \
401
- gather maximum diagnostic information.
402
-
403
- The application should be a functional Gradio app with interactive components. Test for:
404
- - Proper page loading
405
- - Presence of Gradio components
406
- - Interactive elements (buttons, inputs, etc.)
407
- - Basic functionality
408
-
409
- Provide a comprehensive summary of all test results at the end.
410
- """
411
-
412
- try:
413
- # Run the testing workflow
414
- result = self.agent.run(test_prompt)
415
-
416
- # Parse the agent's response to create structured result
417
- return self._parse_testing_response(result, project_path)
418
-
419
- except Exception as e:
420
- return TestingResult(
421
- success=False,
422
- project_path=project_path,
423
- setup_successful=False,
424
- server_launched=False,
425
- ui_accessible=False,
426
- test_cases_passed=[],
427
- test_cases_failed=["Testing agent execution failed"],
428
- error_messages=[str(e)],
429
- screenshots=[],
430
- performance_metrics={},
431
- logs=f"Testing agent error: {str(e)}",
432
- )
433
-
434
- def _parse_testing_response(
435
- self, response: str, project_path: str
436
- ) -> TestingResult:
437
- """
438
- Parse the agent's testing response into a structured TestingResult.
439
-
440
- Args:
441
- response: Raw response from the testing agent
442
- project_path: Path to the tested project
443
 
444
- Returns:
445
- Structured TestingResult
446
- """
447
- # Initialize default values
448
- setup_successful = False
449
- server_launched = False
450
- ui_accessible = False
451
- test_cases_passed = []
452
- test_cases_failed = []
453
- error_messages = []
454
- screenshots = []
455
- performance_metrics = {}
456
-
457
- # Simple parsing logic based on common success/failure indicators
458
- response_lower = response.lower()
459
-
460
- # Check for setup success
461
- if "successfully set up virtual environment" in response_lower:
462
- setup_successful = True
463
- test_cases_passed.append("Virtual environment setup")
464
- elif "error setting up venv" in response_lower:
465
- test_cases_failed.append("Virtual environment setup")
466
-
467
- # Check for server launch
468
- if "successfully started gradio app" in response_lower:
469
- server_launched = True
470
- test_cases_passed.append("Gradio application launch")
471
- elif "error running gradio app" in response_lower:
472
- test_cases_failed.append("Gradio application launch")
473
-
474
- # Check for health status
475
- if "application is healthy" in response_lower:
476
- ui_accessible = True
477
- test_cases_passed.append("Application health check")
478
- elif "cannot connect to" in response_lower:
479
- test_cases_failed.append("Application health check")
480
-
481
- # Check for UI testing
482
- if (
483
- "page loaded successfully" in response_lower
484
- and "gradio container found" in response_lower
485
- ):
486
- test_cases_passed.append("UI component testing")
487
- elif "error during ui testing" in response_lower:
488
- test_cases_failed.append("UI component testing")
489
-
490
- # Look for screenshots
491
- if "screenshot saved" in response_lower:
492
- screenshots.append("/tmp/gradio_test_screenshot.png")
493
-
494
- # Extract performance metrics if mentioned
495
- if "response time:" in response_lower:
496
- # Simple regex to extract response time
497
- import re
498
-
499
- time_match = re.search(r"response time: ([\d.]+)s", response_lower)
500
- if time_match:
501
- performance_metrics["response_time_seconds"] = float(
502
- time_match.group(1)
503
- )
504
-
505
- # Determine overall success
506
- success = (
507
- setup_successful
508
- and server_launched
509
- and ui_accessible
510
- and len(test_cases_failed) == 0
511
- )
512
-
513
- return TestingResult(
514
- success=success,
515
- project_path=project_path,
516
- setup_successful=setup_successful,
517
- server_launched=server_launched,
518
- ui_accessible=ui_accessible,
519
- test_cases_passed=test_cases_passed,
520
- test_cases_failed=test_cases_failed,
521
- error_messages=error_messages,
522
- screenshots=screenshots,
523
- performance_metrics=performance_metrics,
524
- logs=response,
525
- )
526
-
527
- def generate_test_report(self, testing_result: TestingResult) -> str:
528
- """
529
- Generate a comprehensive test report in markdown format.
530
-
531
- Args:
532
- testing_result: The result from testing the application
533
-
534
- Returns:
535
- Markdown-formatted test report
536
- """
537
- status_emoji = "โœ…" if testing_result.success else "โŒ"
538
-
539
- report = f"""
540
- # Gradio Application Test Report {status_emoji}
541
-
542
- ## Summary
543
- - **Project Path**: `{testing_result.project_path}`
544
- - **Overall Success**: {testing_result.success}
545
- - **Environment Setup**: {"โœ…" if testing_result.setup_successful else "โŒ"}
546
- - **Server Launch**: {"โœ…" if testing_result.server_launched else "โŒ"}
547
- - **UI Accessibility**: {"โœ…" if testing_result.ui_accessible else "โŒ"}
548
-
549
- ## Test Cases
550
-
551
- ### Passed ({len(testing_result.test_cases_passed)})
552
- {chr(10).join(f"- โœ… {case}" for case in testing_result.test_cases_passed)}
553
-
554
- ### Failed ({len(testing_result.test_cases_failed)})
555
- {chr(10).join(f"- โŒ {case}" for case in testing_result.test_cases_failed)}
556
-
557
- ## Performance Metrics
558
- {chr(10).join(f"- **{key}**: {value}" for key, value in \
559
- testing_result.performance_metrics.items()) if testing_result.performance_metrics else \
560
- "No performance metrics collected"}
561
-
562
- ## Screenshots
563
- {chr(10).join(f"- {screenshot}" for screenshot in testing_result.screenshots) \
564
- if testing_result.screenshots else "No screenshots captured"}
565
-
566
- ## Error Messages
567
- {chr(10).join(f"- {error}" for error in testing_result.error_messages) \
568
- if testing_result.error_messages else "No errors reported"}
569
-
570
- ## Detailed Logs
571
  ```
572
- {testing_result.logs}
573
  ```
574
 
575
- ---
576
- *Report generated by GradioTestingAgent*
577
- """
578
-
579
- return report.strip()
580
-
581
-
582
- def create_gradio_testing_agent() -> GradioTestingAgent:
583
- """
584
- Create a Gradio testing agent with default settings.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
585
 
586
- Returns:
587
- Configured GradioTestingAgent instance
588
- """
589
- return GradioTestingAgent()
590
 
591
 
592
  if __name__ == "__main__":
593
  # Example usage
594
- from coding_agent import create_gradio_coding_agent
595
  from planning_agent import GradioPlanningAgent
596
 
597
  # Create agents
598
  planning_agent = GradioPlanningAgent()
599
- coding_agent = create_gradio_coding_agent()
600
- testing_agent = create_gradio_testing_agent()
601
 
602
- # Example workflow
603
- print("Planning a simple calculator app...")
604
- plan = planning_agent.plan_application(
605
- "Create a simple calculator with basic arithmetic operations"
606
  )
607
 
608
- print("Implementing the application...")
609
- implementation = coding_agent.implement_application(plan)
610
 
611
- print("Testing the application...")
612
- test_results = testing_agent.test_application(implementation)
613
 
614
- print("Test Report:")
615
- print(testing_agent.generate_test_report(test_results))
 
12
  import os
13
  import subprocess
14
  import time
 
15
  from pathlib import Path
16
 
17
  from smolagents import LiteLLMModel, ToolCallingAgent, tool
18
 
 
19
  from settings import settings
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  @tool
23
  def run_gradio_app(project_path: str, timeout: int = 30) -> str:
24
  """
 
40
 
41
  # Start the Gradio app in background
42
  process = subprocess.Popen(
43
+ ["uv", "run", "gradio", "app.py"],
44
  cwd=project_dir,
45
  stdout=subprocess.PIPE,
46
  stderr=subprocess.PIPE,
 
225
  return f"Error stopping processes: {str(e)}"
226
 
227
 
228
+ @tool
229
+ def uv_add_packages(project_path: str, packages: str) -> str:
230
+ """
231
+ Add missing packages to the project using uv add.
232
+
233
+ Args:
234
+ project_path: Path to the project directory containing pyproject.toml
235
+ packages: Space-separated list of package names to add \
236
+ (e.g., "requests pandas numpy")
237
+
238
+ Returns:
239
+ Status message indicating success or failure of adding packages
240
+ """
241
+ try:
242
+ # Change to project directory
243
+ original_cwd = os.getcwd()
244
+ project_dir = Path(project_path)
245
+
246
+ if not project_dir.exists():
247
+ return f"Error: Project directory {project_path} does not exist"
248
+
249
+ # Check if pyproject.toml exists
250
+ pyproject_file = project_dir / "pyproject.toml"
251
+ if not pyproject_file.exists():
252
+ return f"Error: pyproject.toml not found in {project_path}"
253
+
254
+ os.chdir(project_dir)
255
+
256
+ # Split packages and add them one by one for better error handling
257
+ package_list = packages.strip().split()
258
+ if not package_list:
259
+ return "Error: No packages specified to add"
260
+
261
+ added_packages = []
262
+ failed_packages = []
263
+
264
+ for package in package_list:
265
+ if not package.strip():
266
+ continue
267
+
268
+ result = subprocess.run(
269
+ ["uv", "add", package.strip()],
270
+ capture_output=True,
271
+ text=True,
272
+ timeout=120, # 2 minutes timeout per package
273
+ )
274
+
275
+ if result.returncode == 0:
276
+ added_packages.append(package.strip())
277
+ else:
278
+ failed_packages.append(f"{package.strip()} ({result.stderr.strip()})")
279
+
280
+ os.chdir(original_cwd)
281
+
282
+ # Prepare status message
283
+ status_parts = []
284
+ if added_packages:
285
+ status_parts.append(f"Successfully added: {', '.join(added_packages)}")
286
+ if failed_packages:
287
+ status_parts.append(f"Failed to add: {'; '.join(failed_packages)}")
288
+
289
+ if not status_parts:
290
+ return "No packages were processed"
291
+
292
+ return "; ".join(status_parts)
293
+
294
+ except subprocess.TimeoutExpired:
295
+ os.chdir(original_cwd)
296
+ return f"Error: uv add timed out while adding packages: {packages}"
297
+ except FileNotFoundError:
298
+ os.chdir(original_cwd)
299
+ return "Error: uv command not found. Please install uv first."
300
+ except Exception as e:
301
+ os.chdir(original_cwd)
302
+ return f"Unexpected error adding packages: {str(e)}"
303
+
304
+
305
  class GradioTestingAgent:
306
  """
307
  A specialized ToolCallingAgent for testing Gradio applications.
 
328
  verbosity_level: Level of verbosity for agent output (uses settings if None)
329
  max_steps: Maximum number of testing steps (uses settings if None)
330
  """
331
+ self.name = "testing_agent"
332
+ self.description = """Expert QA engineer specializing in Gradio application \
333
+ testing and validation.
334
+
335
+ This agent thoroughly tests Gradio applications by:
336
+ - Setting up virtual environments using uv
337
+ - Launching and health-checking Gradio applications
338
+ - Performing basic UI testing with browser automation
339
+ - Validating functionality and responsiveness
340
+ - Generating comprehensive test reports with screenshots
341
+ - Providing detailed error analysis and debugging information
342
+
343
+ Returns structured test results indicating success/failure with specific details \
344
+ about what works and what needs fixing."""
345
+
346
  # Use settings as defaults, but allow override
347
  self.model_id = model_id or settings.test_model_id
348
  self.api_base_url = api_base_url or settings.api_base_url
 
359
 
360
  # Define the tools for testing
361
  testing_tools = [
 
362
  run_gradio_app,
363
  check_app_health,
364
  test_gradio_ui_basic,
365
  stop_gradio_processes,
366
+ uv_add_packages,
367
  ]
368
 
369
  # Initialize the ToolCallingAgent
 
372
  tools=testing_tools,
373
  verbosity_level=verbosity_level,
374
  max_steps=max_steps,
375
+ name=self.name,
376
+ description=self.description,
377
  )
378
 
379
  self.sandbox_path = Path("sandbox")
380
 
381
+ def __call__(self, task: str, **kwargs) -> str:
382
  """
383
+ Handle testing tasks as a managed agent.
384
 
385
  Args:
386
+ task: The coding result or task description
387
+ **kwargs: Additional keyword arguments (ignored)
388
 
389
  Returns:
390
+ String response containing the formatted testing result
391
  """
392
+ full_prompt = f"""You are an expert QA engineer specializing in \
393
+ Gradio application testing and validation.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
 
395
+ **CONTEXT:**
396
+ You received this message from an expert Python developer:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  ```
398
+ {task}
399
  ```
400
 
401
+ **YOUR MISSION:**
402
+ Perform comprehensive testing of the Gradio application and provide a detailed \
403
+ quality assurance report.
404
+
405
+ **TESTING PROTOCOL:**
406
+ 1. **Application Launch**: Use `run_gradio_app` to start the application
407
+ 2. **Dependency Management**: If missing packages are detected, use `uv_add_packages` \
408
+ to add them
409
+ 3. **Health Check**: Use `check_app_health` to verify HTTP response
410
+ 4. **UI Testing**: Use `test_gradio_ui_basic` for basic interface validation
411
+ 5. **Cleanup**: Use `stop_gradio_processes` to clean up after testing
412
+
413
+ **IMPORTANT CONSTRAINTS:**
414
+ - You can ONLY access files in the `./sandbox/` directory
415
+ - All projects to test will be located in subdirectories of `./sandbox/`
416
+ - Use relative paths starting with `./sandbox/[project_name]`
417
+
418
+ **REPORT FORMAT:**
419
+ Structure your final report as follows:
420
+
421
+ ## ๐Ÿงช GRADIO APPLICATION TEST REPORT
422
+
423
+ ### ๐Ÿ“‹ Test Summary
424
+ - **Application**: [App name/purpose]
425
+ - **Test Status**: โœ… PASSED / โŒ FAILED / โš ๏ธ PARTIAL
426
+ - **Test Duration**: [Time taken]
427
+ - **Key Findings**: [Brief summary]
428
+
429
+ ### ๐Ÿ”ง Environment Setup
430
+ - **Virtual Environment**: [Status and details]
431
+ - **Dependencies**: [Installation results]
432
+ - **Setup Issues**: [Any problems encountered]
433
+
434
+ ### ๐Ÿš€ Application Launch
435
+ - **Startup Status**: [Success/failure]
436
+ - **Server URL**: [Access URL if successful]
437
+ - **Launch Time**: [Time to start]
438
+ - **Startup Logs**: [Relevant output]
439
+
440
+ ### ๐Ÿฅ Health Check
441
+ - **HTTP Response**: [Status code and response time]
442
+ - **Accessibility**: [Can the app be reached]
443
+ - **Performance**: [Response times, any issues]
444
+
445
+ ### ๐Ÿ–ฅ๏ธ User Interface Testing
446
+ - **Page Load**: [Success/failure]
447
+ - **Gradio Container**: [Found/not found]
448
+ - **Interactive Elements**: [Count and types]
449
+ - **UI Responsiveness**: [Any issues]
450
+ - **Screenshots**: [Paths to saved images]
451
+
452
+ ### โš ๏ธ Issues Found
453
+ - [List any problems, bugs, or concerns]
454
+ - [Include severity levels: CRITICAL, HIGH, MEDIUM, LOW]
455
+ - [Provide specific error messages and context]
456
+
457
+ ### โœ… Recommendations
458
+ - [Suggestions for improvements]
459
+ - [Required fixes for critical issues]
460
+ - [Performance optimization suggestions]
461
+
462
+ ### ๐Ÿ“Š Test Metrics
463
+ - **Total Tests**: [Number]
464
+ - **Passed**: [Number]
465
+ - **Failed**: [Number]
466
+ - **Success Rate**: [Percentage]
467
+
468
+ **TESTING GUIDELINES:**
469
+ - Always clean up processes after testing
470
+ - Capture screenshots when possible for documentation
471
+ - Report specific error messages, not just generic failures
472
+ - Distinguish between setup issues vs. application issues
473
+ - Test both functionality and user experience
474
+ - Provide actionable feedback for developers
475
+
476
+ **ERROR HANDLING:**
477
+ - If environment setup fails, provide specific uv/dependency guidance
478
+ - If missing packages are detected, use `uv_add_packages` to add them automatically
479
+ - If app won't start, analyze logs for root cause and check for import errors
480
+ - If UI testing fails, check if it's a browser/selenium issue vs. app issue
481
+ - Always attempt cleanup even if earlier steps fail
482
+
483
+ Begin testing now and provide your comprehensive report."""
484
+ try:
485
+ return self.agent.run(full_prompt)
486
 
487
+ except Exception as e:
488
+ return f"โŒ Testing failed: {str(e)}"
 
 
489
 
490
 
491
  if __name__ == "__main__":
492
  # Example usage
493
+ from coding_agent import GradioCodingAgent
494
  from planning_agent import GradioPlanningAgent
495
 
496
  # Create agents
497
  planning_agent = GradioPlanningAgent()
498
+ coding_agent = GradioCodingAgent()
499
+ testing_agent = GradioTestingAgent()
500
 
501
+ plan_result = planning_agent(
502
+ "Create a simple calculator with basic arithmetic operations /no_think"
 
 
503
  )
504
 
505
+ implementation_result = coding_agent(plan_result)
 
506
 
507
+ test_result = testing_agent(implementation_result)
 
508
 
509
+ print("=== TEST REPORT ===")
510
+ print(test_result)
uv.lock CHANGED
@@ -8,7 +8,7 @@ resolution-markers = [
8
 
9
  [manifest]
10
  members = [
11
- "gradio-app",
12
  "likable",
13
  ]
14
 
@@ -428,9 +428,9 @@ wheels = [
428
  ]
429
 
430
  [[package]]
431
- name = "gradio-app"
432
  version = "0.1.0"
433
- source = { virtual = "sandbox/gradio_app" }
434
  dependencies = [
435
  { name = "gradio" },
436
  ]
 
8
 
9
  [manifest]
10
  members = [
11
+ "gradio-calculator",
12
  "likable",
13
  ]
14
 
 
428
  ]
429
 
430
  [[package]]
431
+ name = "gradio-calculator"
432
  version = "0.1.0"
433
+ source = { virtual = "sandbox/gradio_calculator" }
434
  dependencies = [
435
  { name = "gradio" },
436
  ]