Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from code_security_auditor_env.models import CodeSecurityAction | |
| from code_security_auditor_env.server.grader import evaluate_finding | |
| from code_security_auditor_env.server.security_environment import CodeSecurityAuditorEnvironment | |
| from code_security_auditor_env.server.tasks import get_task | |
| def test_grader_deterministic_easy_match() -> None: | |
| task = get_task("easy") | |
| first = task.vulnerabilities[0] | |
| eval_a = evaluate_finding( | |
| task=task, | |
| filename=first.filename, | |
| vuln_type=first.vuln_type, | |
| severity=first.severity, | |
| line_start=first.line, | |
| line_end=first.line, | |
| confidence=0.8, | |
| matched_already=[], | |
| ) | |
| eval_b = evaluate_finding( | |
| task=task, | |
| filename=first.filename, | |
| vuln_type=first.vuln_type, | |
| severity=first.severity, | |
| line_start=first.line, | |
| line_end=first.line, | |
| confidence=0.8, | |
| matched_already=[], | |
| ) | |
| assert eval_a == eval_b | |
| assert eval_a.is_confirmed_match | |
| assert 0.0 <= eval_a.component_score <= 1.0 | |
| def test_env_final_score_in_unit_interval() -> None: | |
| env = CodeSecurityAuditorEnvironment(default_task_id="easy") | |
| obs = env.reset(task_id="easy") | |
| assert obs.task_id == "easy" | |
| obs = env.step(CodeSecurityAction(action_type="inspect_file", filename="app/routes.py")) | |
| assert 0.0 <= float(obs.reward or 0.0) <= 1.0 | |
| obs = env.step( | |
| CodeSecurityAction( | |
| action_type="submit_finding", | |
| filename="app/routes.py", | |
| line_start=8, | |
| vuln_type="sql_injection", | |
| severity="high", | |
| confidence=0.85, | |
| evidence="user id interpolated in SQL", | |
| summary="SQL injection in get_user", | |
| ) | |
| ) | |
| assert 0.0 <= float(obs.reward or 0.0) <= 1.0 | |
| obs = env.step(CodeSecurityAction(action_type="submit_final_report")) | |
| assert obs.done is True | |
| assert 0.0 <= float(obs.reward or 0.0) <= 1.0 | |