from __future__ import annotations from code_security_auditor_env.models import CodeSecurityAction from code_security_auditor_env.server.grader import evaluate_finding from code_security_auditor_env.server.security_environment import CodeSecurityAuditorEnvironment from code_security_auditor_env.server.tasks import get_task def test_grader_deterministic_easy_match() -> None: task = get_task("easy") first = task.vulnerabilities[0] eval_a = evaluate_finding( task=task, filename=first.filename, vuln_type=first.vuln_type, severity=first.severity, line_start=first.line, line_end=first.line, confidence=0.8, matched_already=[], ) eval_b = evaluate_finding( task=task, filename=first.filename, vuln_type=first.vuln_type, severity=first.severity, line_start=first.line, line_end=first.line, confidence=0.8, matched_already=[], ) assert eval_a == eval_b assert eval_a.is_confirmed_match assert 0.0 <= eval_a.component_score <= 1.0 def test_env_final_score_in_unit_interval() -> None: env = CodeSecurityAuditorEnvironment(default_task_id="easy") obs = env.reset(task_id="easy") assert obs.task_id == "easy" obs = env.step(CodeSecurityAction(action_type="inspect_file", filename="app/routes.py")) assert 0.0 <= float(obs.reward or 0.0) <= 1.0 obs = env.step( CodeSecurityAction( action_type="submit_finding", filename="app/routes.py", line_start=8, vuln_type="sql_injection", severity="high", confidence=0.85, evidence="user id interpolated in SQL", summary="SQL injection in get_user", ) ) assert 0.0 <= float(obs.reward or 0.0) <= 1.0 obs = env.step(CodeSecurityAction(action_type="submit_final_report")) assert obs.done is True assert 0.0 <= float(obs.reward or 0.0) <= 1.0