ninja-code-guard / tests /unit /test_synthesizer.py
NinjainPJs's picture
Fix all ruff lint issues — 0 errors, 92 tests passing
b9da50c
"""
Tests for the Synthesizer Agent and Health Score calculator.
These tests verify:
1. Deduplication merges findings on the same file+line
2. Security agent takes precedence in severity conflicts
3. Health Score formula applies correct penalties
4. Recommendation logic (block/request_changes/approve)
5. Executive summary generation
6. Ranking puts critical findings first
"""
from app.agents.synthesizer import (
deduplicate_findings,
generate_executive_summary,
rank_findings,
synthesize,
)
from app.models.findings import Finding
from app.services.health_score import calculate_health_score, determine_recommendation
def _make_finding(agent="security", severity="high", file_path="app.py",
line_start=5, category="test", confidence=0.9, **kwargs):
"""Helper to create Finding objects with sensible defaults."""
return Finding(
agent=agent,
file_path=file_path,
line_start=line_start,
line_end=kwargs.get("line_end", line_start),
severity=severity,
category=category,
title=kwargs.get("title", f"Test {category}"),
description=kwargs.get("description", "Test finding description."),
suggested_fix=kwargs.get("suggested_fix", ""),
cwe_id=kwargs.get("cwe_id"),
confidence=confidence,
)
class TestDeduplication:
def test_no_duplicates_unchanged(self):
"""Findings on different lines should not be deduplicated."""
findings = [
_make_finding(line_start=5, category="sql_injection"),
_make_finding(line_start=10, category="xss"),
]
result = deduplicate_findings(findings)
assert len(result) == 2
def test_same_line_same_category_merged(self):
"""Two agents flagging same line+category should produce one finding."""
findings = [
_make_finding(agent="security", line_start=5, severity="critical", category="sql_injection"),
_make_finding(agent="performance", line_start=5, severity="high", category="sql_injection"),
]
result = deduplicate_findings(findings)
assert len(result) == 1
def test_same_line_different_category_kept(self):
"""Two agents flagging same line but different categories should both be kept."""
findings = [
_make_finding(agent="security", line_start=5, category="sql_injection"),
_make_finding(agent="style", line_start=5, category="naming"),
]
result = deduplicate_findings(findings)
assert len(result) == 2
def test_security_takes_precedence(self):
"""When merging same category, security agent's finding should be kept as primary."""
findings = [
_make_finding(agent="style", line_start=5, category="sql_injection"),
_make_finding(agent="security", line_start=5, category="sql_injection"),
]
result = deduplicate_findings(findings)
assert len(result) == 1
assert result[0].agent == "security"
def test_max_severity_wins(self):
"""Merged finding should use the maximum severity from all agents."""
findings = [
_make_finding(agent="security", line_start=5, severity="medium"),
_make_finding(agent="performance", line_start=5, severity="critical"),
]
result = deduplicate_findings(findings)
assert result[0].severity == "critical"
def test_merged_description_mentions_other_agents(self):
"""Merged finding should note which other agents also flagged it."""
findings = [
_make_finding(agent="security", line_start=5),
_make_finding(agent="performance", line_start=5),
]
result = deduplicate_findings(findings)
assert "performance" in result[0].description.lower()
class TestRanking:
def test_critical_before_low(self):
"""Critical findings should appear before low findings."""
findings = [
_make_finding(severity="low", line_start=1),
_make_finding(severity="critical", line_start=2),
_make_finding(severity="medium", line_start=3),
]
ranked = rank_findings(findings)
assert ranked[0].severity == "critical"
assert ranked[-1].severity == "low"
def test_same_severity_sorted_by_confidence(self):
"""Within same severity, higher confidence comes first."""
findings = [
_make_finding(severity="high", confidence=0.5, line_start=1),
_make_finding(severity="high", confidence=0.95, line_start=2),
]
ranked = rank_findings(findings)
assert ranked[0].confidence == 0.95
class TestHealthScore:
def test_no_findings_returns_100(self):
"""Empty findings should give perfect score."""
assert calculate_health_score([]) == 100
def test_one_critical_drops_significantly(self):
"""One critical finding should drop score by ~25 points."""
findings = [_make_finding(severity="critical", confidence=1.0)]
score = calculate_health_score(findings)
assert 70 <= score <= 80 # 100 - 25*1.0 = 75
def test_low_confidence_penalizes_less(self):
"""Low-confidence findings should penalize less."""
high_conf = [_make_finding(severity="high", confidence=1.0)]
low_conf = [_make_finding(severity="high", confidence=0.3)]
assert calculate_health_score(low_conf) > calculate_health_score(high_conf)
def test_score_never_below_zero(self):
"""Score should be clamped to 0 minimum."""
findings = [_make_finding(severity="critical") for _ in range(10)]
assert calculate_health_score(findings) == 0
def test_score_never_above_100(self):
"""Score should be clamped to 100 maximum."""
assert calculate_health_score([]) == 100
class TestRecommendation:
def test_critical_finding_blocks(self):
"""Any critical finding should result in 'block'."""
findings = [_make_finding(severity="critical")]
assert determine_recommendation(findings, 50) == "block"
def test_low_score_requests_changes(self):
"""Score below 50 should request changes."""
findings = [_make_finding(severity="medium")]
assert determine_recommendation(findings, 30) == "request_changes"
def test_healthy_pr_approves(self):
"""High score with no critical/high findings should approve."""
findings = [_make_finding(severity="low")]
assert determine_recommendation(findings, 90) == "approve"
def test_no_findings_approves(self):
"""No findings should approve."""
assert determine_recommendation([], 100) == "approve"
class TestExecutiveSummary:
def test_no_findings_positive_summary(self):
"""Empty findings should produce a positive summary."""
summary = generate_executive_summary([], 100, "approve")
assert "no issues" in summary.lower() or "clean" in summary.lower()
def test_summary_includes_counts(self):
"""Summary should mention finding counts."""
findings = [
_make_finding(severity="critical"),
_make_finding(severity="high", line_start=10),
]
summary = generate_executive_summary(findings, 50, "block")
assert "2" in summary
assert "critical" in summary.lower()
class TestSynthesize:
def test_full_synthesis_pipeline(self):
"""Full synthesize() should return a valid SynthesizedReview."""
sec = [_make_finding(agent="security", severity="critical", line_start=5)]
perf = [_make_finding(agent="performance", severity="high", line_start=10)]
style = [_make_finding(agent="style", severity="low", line_start=15)]
review = synthesize(sec, perf, style)
assert review.health_score >= 0
assert review.health_score <= 100
assert review.critical_count == 1
assert review.high_count == 1
assert review.low_count == 1
assert review.recommendation == "block" # Has critical
assert len(review.findings) == 3
assert len(review.executive_summary) > 0
def test_synthesis_with_duplicates(self):
"""Synthesis should deduplicate findings on same line+category."""
sec = [_make_finding(agent="security", line_start=5, category="sql_injection")]
perf = [_make_finding(agent="performance", line_start=5, category="sql_injection")]
style = []
review = synthesize(sec, perf, style)
assert len(review.findings) == 1 # Deduplicated (same line + category)
def test_synthesis_empty_input(self):
"""Empty input from all agents should produce clean review."""
review = synthesize([], [], [])
assert review.health_score == 100
assert review.recommendation == "approve"
assert len(review.findings) == 0