Spaces:
Sleeping
Sleeping
File size: 2,002 Bytes
8c391c7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | from __future__ import annotations
from code_security_auditor_env.models import CodeSecurityAction
from code_security_auditor_env.server.grader import evaluate_finding
from code_security_auditor_env.server.security_environment import CodeSecurityAuditorEnvironment
from code_security_auditor_env.server.tasks import get_task
def test_grader_deterministic_easy_match() -> None:
task = get_task("easy")
first = task.vulnerabilities[0]
eval_a = evaluate_finding(
task=task,
filename=first.filename,
vuln_type=first.vuln_type,
severity=first.severity,
line_start=first.line,
line_end=first.line,
confidence=0.8,
matched_already=[],
)
eval_b = evaluate_finding(
task=task,
filename=first.filename,
vuln_type=first.vuln_type,
severity=first.severity,
line_start=first.line,
line_end=first.line,
confidence=0.8,
matched_already=[],
)
assert eval_a == eval_b
assert eval_a.is_confirmed_match
assert 0.0 <= eval_a.component_score <= 1.0
def test_env_final_score_in_unit_interval() -> None:
env = CodeSecurityAuditorEnvironment(default_task_id="easy")
obs = env.reset(task_id="easy")
assert obs.task_id == "easy"
obs = env.step(CodeSecurityAction(action_type="inspect_file", filename="app/routes.py"))
assert 0.0 <= float(obs.reward or 0.0) <= 1.0
obs = env.step(
CodeSecurityAction(
action_type="submit_finding",
filename="app/routes.py",
line_start=8,
vuln_type="sql_injection",
severity="high",
confidence=0.85,
evidence="user id interpolated in SQL",
summary="SQL injection in get_user",
)
)
assert 0.0 <= float(obs.reward or 0.0) <= 1.0
obs = env.step(CodeSecurityAction(action_type="submit_final_report"))
assert obs.done is True
assert 0.0 <= float(obs.reward or 0.0) <= 1.0
|