""" Tests for the Synthesizer Agent and Health Score calculator. These tests verify: 1. Deduplication merges findings on the same file+line 2. Security agent takes precedence in severity conflicts 3. Health Score formula applies correct penalties 4. Recommendation logic (block/request_changes/approve) 5. Executive summary generation 6. Ranking puts critical findings first """ from app.agents.synthesizer import ( deduplicate_findings, generate_executive_summary, rank_findings, synthesize, ) from app.models.findings import Finding from app.services.health_score import calculate_health_score, determine_recommendation def _make_finding(agent="security", severity="high", file_path="app.py", line_start=5, category="test", confidence=0.9, **kwargs): """Helper to create Finding objects with sensible defaults.""" return Finding( agent=agent, file_path=file_path, line_start=line_start, line_end=kwargs.get("line_end", line_start), severity=severity, category=category, title=kwargs.get("title", f"Test {category}"), description=kwargs.get("description", "Test finding description."), suggested_fix=kwargs.get("suggested_fix", ""), cwe_id=kwargs.get("cwe_id"), confidence=confidence, ) class TestDeduplication: def test_no_duplicates_unchanged(self): """Findings on different lines should not be deduplicated.""" findings = [ _make_finding(line_start=5, category="sql_injection"), _make_finding(line_start=10, category="xss"), ] result = deduplicate_findings(findings) assert len(result) == 2 def test_same_line_same_category_merged(self): """Two agents flagging same line+category should produce one finding.""" findings = [ _make_finding(agent="security", line_start=5, severity="critical", category="sql_injection"), _make_finding(agent="performance", line_start=5, severity="high", category="sql_injection"), ] result = deduplicate_findings(findings) assert len(result) == 1 def test_same_line_different_category_kept(self): """Two agents flagging same line but different categories should both be kept.""" findings = [ _make_finding(agent="security", line_start=5, category="sql_injection"), _make_finding(agent="style", line_start=5, category="naming"), ] result = deduplicate_findings(findings) assert len(result) == 2 def test_security_takes_precedence(self): """When merging same category, security agent's finding should be kept as primary.""" findings = [ _make_finding(agent="style", line_start=5, category="sql_injection"), _make_finding(agent="security", line_start=5, category="sql_injection"), ] result = deduplicate_findings(findings) assert len(result) == 1 assert result[0].agent == "security" def test_max_severity_wins(self): """Merged finding should use the maximum severity from all agents.""" findings = [ _make_finding(agent="security", line_start=5, severity="medium"), _make_finding(agent="performance", line_start=5, severity="critical"), ] result = deduplicate_findings(findings) assert result[0].severity == "critical" def test_merged_description_mentions_other_agents(self): """Merged finding should note which other agents also flagged it.""" findings = [ _make_finding(agent="security", line_start=5), _make_finding(agent="performance", line_start=5), ] result = deduplicate_findings(findings) assert "performance" in result[0].description.lower() class TestRanking: def test_critical_before_low(self): """Critical findings should appear before low findings.""" findings = [ _make_finding(severity="low", line_start=1), _make_finding(severity="critical", line_start=2), _make_finding(severity="medium", line_start=3), ] ranked = rank_findings(findings) assert ranked[0].severity == "critical" assert ranked[-1].severity == "low" def test_same_severity_sorted_by_confidence(self): """Within same severity, higher confidence comes first.""" findings = [ _make_finding(severity="high", confidence=0.5, line_start=1), _make_finding(severity="high", confidence=0.95, line_start=2), ] ranked = rank_findings(findings) assert ranked[0].confidence == 0.95 class TestHealthScore: def test_no_findings_returns_100(self): """Empty findings should give perfect score.""" assert calculate_health_score([]) == 100 def test_one_critical_drops_significantly(self): """One critical finding should drop score by ~25 points.""" findings = [_make_finding(severity="critical", confidence=1.0)] score = calculate_health_score(findings) assert 70 <= score <= 80 # 100 - 25*1.0 = 75 def test_low_confidence_penalizes_less(self): """Low-confidence findings should penalize less.""" high_conf = [_make_finding(severity="high", confidence=1.0)] low_conf = [_make_finding(severity="high", confidence=0.3)] assert calculate_health_score(low_conf) > calculate_health_score(high_conf) def test_score_never_below_zero(self): """Score should be clamped to 0 minimum.""" findings = [_make_finding(severity="critical") for _ in range(10)] assert calculate_health_score(findings) == 0 def test_score_never_above_100(self): """Score should be clamped to 100 maximum.""" assert calculate_health_score([]) == 100 class TestRecommendation: def test_critical_finding_blocks(self): """Any critical finding should result in 'block'.""" findings = [_make_finding(severity="critical")] assert determine_recommendation(findings, 50) == "block" def test_low_score_requests_changes(self): """Score below 50 should request changes.""" findings = [_make_finding(severity="medium")] assert determine_recommendation(findings, 30) == "request_changes" def test_healthy_pr_approves(self): """High score with no critical/high findings should approve.""" findings = [_make_finding(severity="low")] assert determine_recommendation(findings, 90) == "approve" def test_no_findings_approves(self): """No findings should approve.""" assert determine_recommendation([], 100) == "approve" class TestExecutiveSummary: def test_no_findings_positive_summary(self): """Empty findings should produce a positive summary.""" summary = generate_executive_summary([], 100, "approve") assert "no issues" in summary.lower() or "clean" in summary.lower() def test_summary_includes_counts(self): """Summary should mention finding counts.""" findings = [ _make_finding(severity="critical"), _make_finding(severity="high", line_start=10), ] summary = generate_executive_summary(findings, 50, "block") assert "2" in summary assert "critical" in summary.lower() class TestSynthesize: def test_full_synthesis_pipeline(self): """Full synthesize() should return a valid SynthesizedReview.""" sec = [_make_finding(agent="security", severity="critical", line_start=5)] perf = [_make_finding(agent="performance", severity="high", line_start=10)] style = [_make_finding(agent="style", severity="low", line_start=15)] review = synthesize(sec, perf, style) assert review.health_score >= 0 assert review.health_score <= 100 assert review.critical_count == 1 assert review.high_count == 1 assert review.low_count == 1 assert review.recommendation == "block" # Has critical assert len(review.findings) == 3 assert len(review.executive_summary) > 0 def test_synthesis_with_duplicates(self): """Synthesis should deduplicate findings on same line+category.""" sec = [_make_finding(agent="security", line_start=5, category="sql_injection")] perf = [_make_finding(agent="performance", line_start=5, category="sql_injection")] style = [] review = synthesize(sec, perf, style) assert len(review.findings) == 1 # Deduplicated (same line + category) def test_synthesis_empty_input(self): """Empty input from all agents should produce clean review.""" review = synthesize([], [], []) assert review.health_score == 100 assert review.recommendation == "approve" assert len(review.findings) == 0