Spaces:
Running
Running
| """ | |
| Tests for the Synthesizer Agent and Health Score calculator. | |
| These tests verify: | |
| 1. Deduplication merges findings on the same file+line | |
| 2. Security agent takes precedence in severity conflicts | |
| 3. Health Score formula applies correct penalties | |
| 4. Recommendation logic (block/request_changes/approve) | |
| 5. Executive summary generation | |
| 6. Ranking puts critical findings first | |
| """ | |
| from app.agents.synthesizer import ( | |
| deduplicate_findings, | |
| generate_executive_summary, | |
| rank_findings, | |
| synthesize, | |
| ) | |
| from app.models.findings import Finding | |
| from app.services.health_score import calculate_health_score, determine_recommendation | |
| def _make_finding(agent="security", severity="high", file_path="app.py", | |
| line_start=5, category="test", confidence=0.9, **kwargs): | |
| """Helper to create Finding objects with sensible defaults.""" | |
| return Finding( | |
| agent=agent, | |
| file_path=file_path, | |
| line_start=line_start, | |
| line_end=kwargs.get("line_end", line_start), | |
| severity=severity, | |
| category=category, | |
| title=kwargs.get("title", f"Test {category}"), | |
| description=kwargs.get("description", "Test finding description."), | |
| suggested_fix=kwargs.get("suggested_fix", ""), | |
| cwe_id=kwargs.get("cwe_id"), | |
| confidence=confidence, | |
| ) | |
| class TestDeduplication: | |
| def test_no_duplicates_unchanged(self): | |
| """Findings on different lines should not be deduplicated.""" | |
| findings = [ | |
| _make_finding(line_start=5, category="sql_injection"), | |
| _make_finding(line_start=10, category="xss"), | |
| ] | |
| result = deduplicate_findings(findings) | |
| assert len(result) == 2 | |
| def test_same_line_same_category_merged(self): | |
| """Two agents flagging same line+category should produce one finding.""" | |
| findings = [ | |
| _make_finding(agent="security", line_start=5, severity="critical", category="sql_injection"), | |
| _make_finding(agent="performance", line_start=5, severity="high", category="sql_injection"), | |
| ] | |
| result = deduplicate_findings(findings) | |
| assert len(result) == 1 | |
| def test_same_line_different_category_kept(self): | |
| """Two agents flagging same line but different categories should both be kept.""" | |
| findings = [ | |
| _make_finding(agent="security", line_start=5, category="sql_injection"), | |
| _make_finding(agent="style", line_start=5, category="naming"), | |
| ] | |
| result = deduplicate_findings(findings) | |
| assert len(result) == 2 | |
| def test_security_takes_precedence(self): | |
| """When merging same category, security agent's finding should be kept as primary.""" | |
| findings = [ | |
| _make_finding(agent="style", line_start=5, category="sql_injection"), | |
| _make_finding(agent="security", line_start=5, category="sql_injection"), | |
| ] | |
| result = deduplicate_findings(findings) | |
| assert len(result) == 1 | |
| assert result[0].agent == "security" | |
| def test_max_severity_wins(self): | |
| """Merged finding should use the maximum severity from all agents.""" | |
| findings = [ | |
| _make_finding(agent="security", line_start=5, severity="medium"), | |
| _make_finding(agent="performance", line_start=5, severity="critical"), | |
| ] | |
| result = deduplicate_findings(findings) | |
| assert result[0].severity == "critical" | |
| def test_merged_description_mentions_other_agents(self): | |
| """Merged finding should note which other agents also flagged it.""" | |
| findings = [ | |
| _make_finding(agent="security", line_start=5), | |
| _make_finding(agent="performance", line_start=5), | |
| ] | |
| result = deduplicate_findings(findings) | |
| assert "performance" in result[0].description.lower() | |
| class TestRanking: | |
| def test_critical_before_low(self): | |
| """Critical findings should appear before low findings.""" | |
| findings = [ | |
| _make_finding(severity="low", line_start=1), | |
| _make_finding(severity="critical", line_start=2), | |
| _make_finding(severity="medium", line_start=3), | |
| ] | |
| ranked = rank_findings(findings) | |
| assert ranked[0].severity == "critical" | |
| assert ranked[-1].severity == "low" | |
| def test_same_severity_sorted_by_confidence(self): | |
| """Within same severity, higher confidence comes first.""" | |
| findings = [ | |
| _make_finding(severity="high", confidence=0.5, line_start=1), | |
| _make_finding(severity="high", confidence=0.95, line_start=2), | |
| ] | |
| ranked = rank_findings(findings) | |
| assert ranked[0].confidence == 0.95 | |
| class TestHealthScore: | |
| def test_no_findings_returns_100(self): | |
| """Empty findings should give perfect score.""" | |
| assert calculate_health_score([]) == 100 | |
| def test_one_critical_drops_significantly(self): | |
| """One critical finding should drop score by ~25 points.""" | |
| findings = [_make_finding(severity="critical", confidence=1.0)] | |
| score = calculate_health_score(findings) | |
| assert 70 <= score <= 80 # 100 - 25*1.0 = 75 | |
| def test_low_confidence_penalizes_less(self): | |
| """Low-confidence findings should penalize less.""" | |
| high_conf = [_make_finding(severity="high", confidence=1.0)] | |
| low_conf = [_make_finding(severity="high", confidence=0.3)] | |
| assert calculate_health_score(low_conf) > calculate_health_score(high_conf) | |
| def test_score_never_below_zero(self): | |
| """Score should be clamped to 0 minimum.""" | |
| findings = [_make_finding(severity="critical") for _ in range(10)] | |
| assert calculate_health_score(findings) == 0 | |
| def test_score_never_above_100(self): | |
| """Score should be clamped to 100 maximum.""" | |
| assert calculate_health_score([]) == 100 | |
| class TestRecommendation: | |
| def test_critical_finding_blocks(self): | |
| """Any critical finding should result in 'block'.""" | |
| findings = [_make_finding(severity="critical")] | |
| assert determine_recommendation(findings, 50) == "block" | |
| def test_low_score_requests_changes(self): | |
| """Score below 50 should request changes.""" | |
| findings = [_make_finding(severity="medium")] | |
| assert determine_recommendation(findings, 30) == "request_changes" | |
| def test_healthy_pr_approves(self): | |
| """High score with no critical/high findings should approve.""" | |
| findings = [_make_finding(severity="low")] | |
| assert determine_recommendation(findings, 90) == "approve" | |
| def test_no_findings_approves(self): | |
| """No findings should approve.""" | |
| assert determine_recommendation([], 100) == "approve" | |
| class TestExecutiveSummary: | |
| def test_no_findings_positive_summary(self): | |
| """Empty findings should produce a positive summary.""" | |
| summary = generate_executive_summary([], 100, "approve") | |
| assert "no issues" in summary.lower() or "clean" in summary.lower() | |
| def test_summary_includes_counts(self): | |
| """Summary should mention finding counts.""" | |
| findings = [ | |
| _make_finding(severity="critical"), | |
| _make_finding(severity="high", line_start=10), | |
| ] | |
| summary = generate_executive_summary(findings, 50, "block") | |
| assert "2" in summary | |
| assert "critical" in summary.lower() | |
| class TestSynthesize: | |
| def test_full_synthesis_pipeline(self): | |
| """Full synthesize() should return a valid SynthesizedReview.""" | |
| sec = [_make_finding(agent="security", severity="critical", line_start=5)] | |
| perf = [_make_finding(agent="performance", severity="high", line_start=10)] | |
| style = [_make_finding(agent="style", severity="low", line_start=15)] | |
| review = synthesize(sec, perf, style) | |
| assert review.health_score >= 0 | |
| assert review.health_score <= 100 | |
| assert review.critical_count == 1 | |
| assert review.high_count == 1 | |
| assert review.low_count == 1 | |
| assert review.recommendation == "block" # Has critical | |
| assert len(review.findings) == 3 | |
| assert len(review.executive_summary) > 0 | |
| def test_synthesis_with_duplicates(self): | |
| """Synthesis should deduplicate findings on same line+category.""" | |
| sec = [_make_finding(agent="security", line_start=5, category="sql_injection")] | |
| perf = [_make_finding(agent="performance", line_start=5, category="sql_injection")] | |
| style = [] | |
| review = synthesize(sec, perf, style) | |
| assert len(review.findings) == 1 # Deduplicated (same line + category) | |
| def test_synthesis_empty_input(self): | |
| """Empty input from all agents should produce clean review.""" | |
| review = synthesize([], [], []) | |
| assert review.health_score == 100 | |
| assert review.recommendation == "approve" | |
| assert len(review.findings) == 0 | |