File size: 1,271 Bytes
26aeea9
f0e5a58
26aeea9
f0e5a58
 
 
 
26aeea9
f0e5a58
26aeea9
 
 
 
 
f0e5a58
26aeea9
 
 
 
 
 
f0e5a58
 
 
26aeea9
 
f0e5a58
 
 
 
 
 
 
 
 
26aeea9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""
Evaluator for SQL Injection task (Easy, 4 phases).

Formula:
    weighted = sum(weight_i × phase_reward_i)
    penalty  = min(0.3, detection_score × 0.003)
    score    = clamp(weighted - penalty, 0.0, 1.0)

Phase weights: [0.15, 0.25, 0.25, 0.35]
"""

from typing import Tuple, Dict
from .base import BaseEvaluator

_WEIGHTS = [0.15, 0.25, 0.25, 0.35]


class SQLEvaluator(BaseEvaluator):

    @classmethod
    def evaluate(cls, task) -> Tuple[float, Dict]:
        weighted = sum(w * r for w, r in zip(_WEIGHTS, task.phase_rewards))
        penalty = min(0.3, task.detection_score * 0.003)
        reward = round(max(0.0, min(1.0, weighted - penalty)), 4)

        signals = {
            "exploit_success": task.exploit_triggered,
            "episode_done":    task.episode_done,
            "partial_score":   round(weighted, 4),
            "integrity_ok":    not any(r < 0 for r in task.step_rewards),
            "reward":          reward,
            "errors":          task.last_error,
            "alerts":          "FLAG EXTRACTED" if task.flag else "",
            "detection_score": task.detection_score,
            "hints":           f"Phase: {task.phase}" if not task.episode_done else "Episode complete.",
        }
        return reward, signals