Spaces:
Running
Running
File size: 5,777 Bytes
4b445f6 b9da50c 4b445f6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | """
Tests for parallel agent execution via asyncio.gather.
These tests verify:
1. All three agents can be instantiated independently
2. Each agent has the correct name and loads its prompt
3. Agent prompts don't overlap (security != performance != style)
4. asyncio.gather runs agents concurrently
5. If one agent fails, the others still succeed
Why parallel execution matters:
- Sequential: 3 agents Γ ~5 seconds each = ~15 seconds total
- Parallel: max(~5s, ~5s, ~5s) = ~5 seconds total (3x faster)
- We use asyncio.gather() which runs coroutines concurrently
- If one agent raises an exception, gather() can be configured to
continue or cancel the others. We handle exceptions inside each
agent's review() method, so gather() always succeeds.
"""
import asyncio
import pytest
from app.agents.performance_agent import PerformanceAgent
from app.agents.security_agent import SecurityAgent
from app.agents.style_agent import StyleAgent
# βββ Agent Identity Tests βββββββββββββββββββββββββββββββββββββββββββββββββ
class TestAgentIdentities:
def test_all_agents_have_unique_names(self):
"""Each agent must have a distinct name for finding attribution."""
security = SecurityAgent()
performance = PerformanceAgent()
style = StyleAgent()
names = {security.agent_name, performance.agent_name, style.agent_name}
assert names == {"security", "performance", "style"}
def test_all_agents_load_prompts(self):
"""Each agent should load its system prompt without errors."""
for agent_class in [SecurityAgent, PerformanceAgent, StyleAgent]:
agent = agent_class()
prompt = agent.system_prompt
assert len(prompt) > 100, f"{agent.agent_name} prompt is too short"
def test_prompts_are_domain_specific(self):
"""Each prompt should focus on its domain, not overlap with others."""
security = SecurityAgent()
performance = PerformanceAgent()
style = StyleAgent()
# Security prompt should mention security-specific terms
assert "CWE" in security.system_prompt
assert "vulnerability" in security.system_prompt.lower() or "injection" in security.system_prompt.lower()
# Performance prompt should mention performance-specific terms
assert "N+1" in performance.system_prompt or "n+1" in performance.system_prompt.lower()
assert "O(n" in performance.system_prompt or "quadratic" in performance.system_prompt.lower()
# Style prompt should mention style-specific terms
assert "naming" in style.system_prompt.lower()
assert "readability" in style.system_prompt.lower() or "maintainability" in style.system_prompt.lower()
def test_prompts_have_scope_boundaries(self):
"""Each prompt should explicitly exclude other domains."""
security = SecurityAgent()
performance = PerformanceAgent()
style = StyleAgent()
# Security should say it doesn't do style/performance
sec_lower = security.system_prompt.lower()
assert "do not comment on" in sec_lower or "only" in sec_lower
# Performance should say it doesn't do security/style
perf_lower = performance.system_prompt.lower()
assert "do not comment on" in perf_lower or "only" in perf_lower
# Style should say it doesn't do security/performance
style_lower = style.system_prompt.lower()
assert "do not comment on" in style_lower or "only" in style_lower
# βββ Parallel Execution Tests βββββββββββββββββββββββββββββββββββββββββββββ
class TestParallelExecution:
@pytest.mark.asyncio
async def test_gather_runs_concurrently(self):
"""
asyncio.gather should run tasks concurrently, not sequentially.
We simulate this with sleep-based tasks β if they run in parallel,
total time should be ~max(durations), not sum(durations).
"""
async def slow_task(name: str, duration: float) -> str:
await asyncio.sleep(duration)
return name
import time
start = time.time()
results = await asyncio.gather(
slow_task("security", 0.1),
slow_task("performance", 0.1),
slow_task("style", 0.1),
)
elapsed = time.time() - start
assert set(results) == {"security", "performance", "style"}
# If parallel: ~0.1s. If sequential: ~0.3s. Allow generous margin.
assert elapsed < 0.25, f"Tasks took {elapsed:.2f}s β should be parallel (~0.1s)"
@pytest.mark.asyncio
async def test_gather_handles_partial_failure(self):
"""
If one agent fails, the others should still return results.
Our agents handle exceptions internally (return []), so
asyncio.gather() never sees the exception. All three calls succeed.
"""
async def success_task() -> list:
return [{"finding": "real"}]
async def failing_task() -> list:
# Simulates what BaseAgent.review() does on failure
try:
raise Exception("Groq API timeout")
except Exception:
return [] # Graceful degradation
results = await asyncio.gather(
success_task(),
failing_task(),
success_task(),
)
assert len(results) == 3
assert len(results[0]) == 1 # First agent succeeded
assert len(results[1]) == 0 # Second agent failed gracefully
assert len(results[2]) == 1 # Third agent succeeded
|