AIMLxDIV commited on
Commit
f60853b
·
unverified ·
2 Parent(s): 264345fe04b6ee

Merge pull request #19 from DsThakurRawat/add-grader-security

Browse files
codereview_env/graders/security_grader.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from codereview_env.models import Scenario, ActionRecord, Category, Severity
3
+ from codereview_env.graders.grader_utils import find_best_match, keyword_overlap
4
+
5
+ def severity_to_int(sev: Severity) -> int:
6
+ mapping = {
7
+ Severity.LOW: 1,
8
+ Severity.MEDIUM: 2,
9
+ Severity.HIGH: 3,
10
+ Severity.CRITICAL: 4
11
+ }
12
+ return mapping.get(sev, 0)
13
+
14
+ def grade_security_audit(scenario: Scenario, history: List[ActionRecord]) -> float:
15
+ """Grade security audit: 0.7 * correct_severity + 0.3 * keyword_accuracy."""
16
+ flagged_actions = [r for r in history if r.category == Category.SECURITY]
17
+ if not scenario.ground_truth_issues:
18
+ return 1.0 if not flagged_actions else 0.0
19
+
20
+ total_score = 0.0
21
+ already_matched = set()
22
+
23
+ for action in flagged_actions:
24
+ match = find_best_match(action, scenario.ground_truth_issues, already_matched)
25
+ if match:
26
+ # Correct Severity: 1.0 if match, else penalty per level
27
+ sev_diff = abs(severity_to_int(action.severity) - severity_to_int(match.severity))
28
+ sev_score = max(0.0, 1.0 - (sev_diff * 0.3))
29
+
30
+ # Keyword Accuracy
31
+ kw_score = keyword_overlap(action.body, match.keywords)
32
+
33
+ total_score += 0.7 * sev_score + 0.3 * kw_score
34
+ already_matched.add(match.id)
35
+
36
+ # Normalize by number of GT issues
37
+ return round(min(1.0, total_score / len(scenario.ground_truth_issues)), 4) if scenario.ground_truth_issues else 1.0