Spaces:
Running
Running
File size: 7,721 Bytes
4b445f6 b9da50c 4b445f6 b9da50c 4b445f6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | """
Tests for the Style Agent and Ruff linter tool.
These tests verify:
1. StyleAgent identifies as "style" and loads its prompt
2. Ruff correctly detects lint issues (unused imports, etc.)
3. Ruff handles non-Python files and empty input gracefully
4. The agent converts LLM output to Finding objects correctly
5. The agent handles LLM failures without crashing
Ruff is an extremely fast Python linter written in Rust. It replaces
flake8, isort, pycodestyle, and dozens of other tools. Tests use REAL
Ruff execution on synthetic code β it runs in milliseconds.
"""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from app.agents.base_agent import AgentFindings, FindingOutput
from app.agents.style_agent import StyleAgent
from app.github.client import PRData
from app.tools.linter_tool import run_ruff
# βββ Fixtures ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@pytest.fixture
def sample_pr_data():
"""PRData with code that has style issues."""
return PRData(
repo_full_name="ninjacode911/codeguard-test",
pr_number=4,
commit_sha="abc123",
title="Add utility function",
diff=(
'diff --git a/util.py b/util.py\n'
'+import os\n'
'+import json\n'
'+\n'
'+def x(a, b):\n'
'+ t = []\n'
'+ for i in a:\n'
'+ if i in b:\n'
'+ t.append(i)\n'
'+ return t\n'
),
changed_files=[{"filename": "util.py", "status": "added"}],
file_contents={
"util.py": (
'import os\n'
'import json\n'
'\n'
'def x(a, b):\n'
' t = []\n'
' for i in a:\n'
' if i in b:\n'
' t.append(i)\n'
' return t\n'
),
},
)
@pytest.fixture
def mock_style_findings():
"""Mock LLM output for style findings."""
return AgentFindings(
findings=[
FindingOutput(
file_path="util.py",
line_start=1,
line_end=1,
severity="low",
category="unused_import",
title="Unused import 'os'",
description="The 'os' module is imported but never used in the file.",
suggested_fix="Remove the import: delete 'import os'",
cwe_id=None,
confidence=0.95,
),
FindingOutput(
file_path="util.py",
line_start=4,
line_end=9,
severity="medium",
category="naming",
title="Non-descriptive function name 'x'",
description=(
"Function name 'x' doesn't describe what the function does. "
"It computes the intersection of two lists."
),
suggested_fix="def find_common_elements(list_a, list_b):",
cwe_id=None,
confidence=0.85,
),
]
)
# βββ StyleAgent Tests βββββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestStyleAgent:
def test_agent_name(self):
"""StyleAgent should identify as 'style'."""
agent = StyleAgent()
assert agent.agent_name == "style"
def test_system_prompt_loads(self):
"""System prompt should exist and contain style-related content."""
agent = StyleAgent()
prompt = agent.system_prompt
assert len(prompt) > 100
assert "style" in prompt.lower() or "maintainability" in prompt.lower()
assert "naming" in prompt.lower()
def test_conversion_produces_style_findings(self, mock_style_findings):
"""Converted findings should have agent='style'."""
agent = StyleAgent()
findings = agent._convert_to_findings(mock_style_findings)
assert len(findings) == 2
assert all(f.agent == "style" for f in findings)
assert findings[0].severity == "low"
assert findings[0].category == "unused_import"
assert findings[1].severity == "medium"
assert findings[1].category == "naming"
assert findings[0].cwe_id is None # Style issues don't have CWE IDs
assert findings[1].cwe_id is None
@pytest.mark.asyncio
async def test_review_handles_llm_failure(self, sample_pr_data):
"""LLM failure should return empty list, not crash."""
mock_chain = AsyncMock(side_effect=Exception("Groq API timeout"))
with patch("app.agents.base_agent.ChatGroq") as mock_chat_groq:
mock_llm_instance = MagicMock()
mock_llm_instance.with_structured_output.return_value = MagicMock(
__ror__=MagicMock(return_value=mock_chain),
__or__=MagicMock(return_value=mock_chain),
)
mock_chat_groq.return_value = mock_llm_instance
agent = StyleAgent()
with patch.object(agent, "run_static_analysis", return_value=""):
findings = await agent.review(sample_pr_data)
assert findings == []
# βββ Ruff Tool Tests ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestRuffTool:
@pytest.mark.asyncio
async def test_detects_unused_imports(self):
"""Ruff should detect unused imports (F401)."""
code_with_unused = (
"import os\n"
"import json\n"
"\n"
"def hello():\n"
" return 'world'\n"
)
files = {"app.py": code_with_unused}
result = await run_ruff(files)
if result: # ruff installed
assert "F401" in result # Unused import rule code
assert "os" in result or "json" in result
@pytest.mark.asyncio
async def test_clean_code_returns_empty(self):
"""Code with no lint issues should return empty string."""
clean_code = "def add(a: int, b: int) -> int:\n return a + b\n"
files = {"clean.py": clean_code}
result = await run_ruff(files)
assert result == ""
@pytest.mark.asyncio
async def test_skips_non_python_files(self):
"""Ruff should ignore non-Python files."""
files = {
"index.html": "<h1>Hello</h1>",
"style.css": "body { color: red; }",
}
result = await run_ruff(files)
assert result == ""
@pytest.mark.asyncio
async def test_handles_empty_input(self):
"""Empty file dict should return empty string."""
result = await run_ruff({})
assert result == ""
@pytest.mark.asyncio
async def test_caps_output_at_20_issues(self):
"""Output should cap at 20 issues to avoid prompt bloat."""
# Generate code with many unused imports
many_imports = "\n".join(f"import module_{i}" for i in range(30))
code = many_imports + "\n\ndef main():\n pass\n"
files = {"many_imports.py": code}
result = await run_ruff(files)
if result:
# Should mention capping
lines = result.strip().split("\n")
# The output should not have more than ~22 lines (header + 20 issues + "and X more")
assert len(lines) <= 25
|