File size: 2,002 Bytes
8c391c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from __future__ import annotations

from code_security_auditor_env.models import CodeSecurityAction
from code_security_auditor_env.server.grader import evaluate_finding
from code_security_auditor_env.server.security_environment import CodeSecurityAuditorEnvironment
from code_security_auditor_env.server.tasks import get_task


def test_grader_deterministic_easy_match() -> None:
    task = get_task("easy")
    first = task.vulnerabilities[0]

    eval_a = evaluate_finding(
        task=task,
        filename=first.filename,
        vuln_type=first.vuln_type,
        severity=first.severity,
        line_start=first.line,
        line_end=first.line,
        confidence=0.8,
        matched_already=[],
    )
    eval_b = evaluate_finding(
        task=task,
        filename=first.filename,
        vuln_type=first.vuln_type,
        severity=first.severity,
        line_start=first.line,
        line_end=first.line,
        confidence=0.8,
        matched_already=[],
    )

    assert eval_a == eval_b
    assert eval_a.is_confirmed_match
    assert 0.0 <= eval_a.component_score <= 1.0


def test_env_final_score_in_unit_interval() -> None:
    env = CodeSecurityAuditorEnvironment(default_task_id="easy")
    obs = env.reset(task_id="easy")
    assert obs.task_id == "easy"

    obs = env.step(CodeSecurityAction(action_type="inspect_file", filename="app/routes.py"))
    assert 0.0 <= float(obs.reward or 0.0) <= 1.0

    obs = env.step(
        CodeSecurityAction(
            action_type="submit_finding",
            filename="app/routes.py",
            line_start=8,
            vuln_type="sql_injection",
            severity="high",
            confidence=0.85,
            evidence="user id interpolated in SQL",
            summary="SQL injection in get_user",
        )
    )
    assert 0.0 <= float(obs.reward or 0.0) <= 1.0

    obs = env.step(CodeSecurityAction(action_type="submit_final_report"))
    assert obs.done is True
    assert 0.0 <= float(obs.reward or 0.0) <= 1.0