"""Tests for the grading logic.""" import pytest from server.graders import grade_review, grade_review_with_breakdown, GradeBreakdown from server.tasks import TASKS, get_task class TestGradeReview: """Test the deterministic grade_review function.""" def test_perfect_score_easy(self): task = get_task("task_easy") score = grade_review( ["null_pointer", "missing_return"], "Null dereference risk and missing return statement.", task, "medium", ) # base=1.0 + quality=0.10 + severity=0.05 = 1.0 (clamped) assert score >= 0.95 def test_perfect_score_medium(self): task = get_task("task_medium") score = grade_review( ["sql_injection", "hardcoded_secret"], "SQL injection via f-string. Hardcoded secret key in plaintext.", task, "high", ) assert score >= 0.95 def test_perfect_score_hard(self): task = get_task("task_hard") score = grade_review( ["race_condition", "improper_error_handling", "timing_attack"], "Non-atomic race condition. Bare except swallows errors. Timing attack via non-constant-time comparison.", task, "critical", ) assert score >= 0.95 def test_empty_submission_scores_zero(self): task = get_task("task_easy") score = grade_review([], "", task) assert score == 0.0 def test_no_issues_scores_zero(self): task = get_task("task_easy") score = grade_review([], "Everything looks fine.", task) assert score == 0.0 def test_partial_recall(self): task = get_task("task_easy") score = grade_review(["null_pointer"], "Found null issue.", task) # base = 1/2 = 0.5 assert 0.4 <= score <= 0.7 def test_false_positive_penalty(self): task = get_task("task_easy") score_clean = grade_review(["null_pointer"], "Null check missing.", task) score_fp = grade_review( ["null_pointer", "sql_injection"], "Null check missing.", task, ) # False positive should reduce score assert score_fp < score_clean def test_quality_bonus_with_keywords(self): task = get_task("task_easy") score_no_kw = grade_review(["null_pointer"], "Found an issue.", task) score_kw = grade_review( ["null_pointer"], "Null dereference — the .get() call may return None without a check.", task, ) assert score_kw >= score_no_kw def test_severity_bonus(self): task = get_task("task_medium") score_wrong = grade_review( ["sql_injection"], "Issues found.", task, "low" ) score_correct = grade_review( ["sql_injection"], "Issues found.", task, "high" ) assert score_correct > score_wrong def test_all_false_positives_score_zero(self): task = get_task("task_easy") score = grade_review( ["sql_injection", "race_condition", "timing_attack"], "Multiple issues.", task, ) assert score == 0.0 def test_score_clamped_to_one(self): task = get_task("task_easy") score = grade_review( ["null_pointer", "missing_return"], "Null None check missing return statement.", task, "medium", ) assert score <= 1.0 def test_score_clamped_to_zero(self): task = get_task("task_hard") score = grade_review( ["null_pointer", "missing_return", "sql_injection", "hardcoded_secret"], "Wrong issues.", task, ) assert score >= 0.0 class TestGradeBreakdown: """Test the grade_review_with_breakdown function.""" def test_breakdown_fields(self): task = get_task("task_easy") bd = grade_review_with_breakdown( ["null_pointer", "sql_injection"], "Null issue found.", task, ) assert isinstance(bd, GradeBreakdown) assert "null_pointer" in bd.correctly_found assert "missing_return" in bd.missed assert "sql_injection" in bd.false_positives def test_severity_correct_flag(self): task = get_task("task_medium") bd = grade_review_with_breakdown( ["sql_injection"], "SQL injection.", task, "high" ) assert bd.severity_correct is True bd_wrong = grade_review_with_breakdown( ["sql_injection"], "SQL injection.", task, "low" ) assert bd_wrong.severity_correct is False class TestTaskCoverage: """Test that all tasks are properly configured.""" def test_all_tasks_exist(self): expected = {"task_extra_easy", "task_easy", "task_medium", "task_hard", "task_expert"} assert set(TASKS.keys()) == expected def test_all_tasks_have_planted_issues(self): for task_id, task in TASKS.items(): assert len(task.planted_issues) > 0, f"{task_id} has no planted issues" def test_difficulty_progression(self): difficulties = [TASKS[t].difficulty for t in TASKS] assert "extra_easy" in difficulties assert "easy" in difficulties assert "medium" in difficulties assert "hard" in difficulties assert "expert" in difficulties def test_planted_issue_count_increases(self): counts = {t: len(TASKS[t].planted_issues) for t in TASKS} assert counts["task_extra_easy"] <= counts["task_easy"] assert counts["task_easy"] <= counts["task_medium"] assert counts["task_medium"] <= counts["task_hard"] assert counts["task_hard"] <= counts["task_expert"] def test_get_task_fallback(self): task = get_task("nonexistent_task") assert task.task_id == "task_easy"