ninja-code-guard / tests /unit /test_performance_agent.py
NinjainPJs's picture
Fix all ruff lint issues β€” 0 errors, 92 tests passing
b9da50c
"""
Tests for the Performance Agent and radon tool.
These tests verify:
1. PerformanceAgent identifies as "performance" and loads its prompt
2. Radon correctly detects high-complexity functions
3. Radon handles non-Python files and empty input gracefully
4. The agent converts LLM output to Finding objects correctly
5. The agent handles LLM failures without crashing
Testing approach:
- Radon tests use REAL Radon execution on synthetic code (it's fast and local)
- LLM tests use mocks (we don't want to burn Groq API quota in CI)
- Conversion tests verify the base_agent β†’ Finding pipeline
"""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from app.agents.base_agent import AgentFindings, FindingOutput
from app.agents.performance_agent import PerformanceAgent
from app.github.client import PRData
from app.tools.radon_tool import run_radon
# ─── Fixtures ──────────────────────────────────────────────────────────────
@pytest.fixture
def sample_pr_data():
"""PRData with code that has performance issues."""
return PRData(
repo_full_name="ninjacode911/codeguard-test",
pr_number=4,
commit_sha="abc123",
title="Add user processing",
diff=(
'diff --git a/app.py b/app.py\n'
'+def process_users(users):\n'
'+ result = []\n'
'+ for u in users:\n'
'+ for item in users:\n'
'+ if u["id"] == item["id"]:\n'
'+ result.append(u)\n'
'+ return result\n'
),
changed_files=[{"filename": "app.py", "status": "modified"}],
file_contents={
"app.py": (
'def process_users(users):\n'
' result = []\n'
' for u in users:\n'
' for item in users:\n'
' if u["id"] == item["id"]:\n'
' result.append(u)\n'
' return result\n'
),
},
)
@pytest.fixture
def mock_perf_findings():
"""Mock LLM output for performance findings."""
return AgentFindings(
findings=[
FindingOutput(
file_path="app.py",
line_start=3,
line_end=6,
severity="high",
category="quadratic_loop",
title="O(nΒ²) nested loop in process_users",
description=(
"Nested loop iterates over the same list twice, resulting in "
"O(nΒ²) time complexity. With 10K users this takes 100M iterations."
),
suggested_fix=(
"seen = set()\n"
"result = [u for u in users if u['id'] not in seen and not seen.add(u['id'])]"
),
cwe_id=None,
confidence=0.90,
),
]
)
# ─── PerformanceAgent Tests ───────────────────────────────────────────────
class TestPerformanceAgent:
def test_agent_name(self):
"""PerformanceAgent should identify as 'performance'."""
agent = PerformanceAgent()
assert agent.agent_name == "performance"
def test_system_prompt_loads(self):
"""System prompt should exist and contain performance-related content."""
agent = PerformanceAgent()
prompt = agent.system_prompt
assert len(prompt) > 100
assert "performance" in prompt.lower()
assert "N+1" in prompt or "n+1" in prompt.lower()
def test_conversion_produces_performance_findings(self, mock_perf_findings):
"""Converted findings should have agent='performance'."""
agent = PerformanceAgent()
findings = agent._convert_to_findings(mock_perf_findings)
assert len(findings) == 1
assert findings[0].agent == "performance"
assert findings[0].severity == "high"
assert findings[0].category == "quadratic_loop"
assert findings[0].cwe_id is None # Performance issues don't have CWE IDs
@pytest.mark.asyncio
async def test_review_handles_llm_failure(self, sample_pr_data):
"""LLM failure should return empty list, not crash."""
mock_chain = AsyncMock(side_effect=Exception("Groq rate limit"))
with patch("app.agents.base_agent.ChatGroq") as mock_chat_groq:
mock_llm_instance = MagicMock()
mock_llm_instance.with_structured_output.return_value = MagicMock(
__ror__=MagicMock(return_value=mock_chain),
__or__=MagicMock(return_value=mock_chain),
)
mock_chat_groq.return_value = mock_llm_instance
agent = PerformanceAgent()
with patch.object(agent, "run_static_analysis", return_value=""):
findings = await agent.review(sample_pr_data)
assert findings == []
# ─── Radon Tool Tests ─────────────────────────────────────────────────────
class TestRadonTool:
@pytest.mark.asyncio
async def test_detects_high_complexity(self):
"""Radon should flag functions with cyclomatic complexity > 10."""
# This function has many branches β†’ high complexity
complex_code = (
"def complex_func(a, b, c, d, e, f, g, h, i, j, k):\n"
" if a: return 1\n"
" elif b: return 2\n"
" elif c: return 3\n"
" elif d: return 4\n"
" elif e: return 5\n"
" elif f: return 6\n"
" elif g: return 7\n"
" elif h: return 8\n"
" elif i: return 9\n"
" elif j: return 10\n"
" elif k: return 11\n"
" else: return 0\n"
)
files = {"complex.py": complex_code}
result = await run_radon(files)
# Radon should find this function and report it
if result: # radon installed
assert "complex_func" in result or "complexity" in result.lower()
@pytest.mark.asyncio
async def test_returns_empty_for_simple_code(self):
"""Simple code (low complexity) should produce no output."""
simple_code = "def add(a, b):\n return a + b\n"
files = {"simple.py": simple_code}
result = await run_radon(files)
# Simple function has complexity 1 (grade A) β€” should not be flagged
assert result == ""
@pytest.mark.asyncio
async def test_skips_non_python_files(self):
"""Radon should ignore non-Python files."""
files = {
"style.css": "body { color: red; }",
"README.md": "# Hello",
}
result = await run_radon(files)
assert result == ""
@pytest.mark.asyncio
async def test_handles_empty_input(self):
"""Empty file dict should return empty string."""
result = await run_radon({})
assert result == ""