Spaces:
Sleeping
Sleeping
File size: 1,271 Bytes
26aeea9 f0e5a58 26aeea9 f0e5a58 26aeea9 f0e5a58 26aeea9 f0e5a58 26aeea9 f0e5a58 26aeea9 f0e5a58 26aeea9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | """
Evaluator for SQL Injection task (Easy, 4 phases).
Formula:
weighted = sum(weight_i × phase_reward_i)
penalty = min(0.3, detection_score × 0.003)
score = clamp(weighted - penalty, 0.0, 1.0)
Phase weights: [0.15, 0.25, 0.25, 0.35]
"""
from typing import Tuple, Dict
from .base import BaseEvaluator
_WEIGHTS = [0.15, 0.25, 0.25, 0.35]
class SQLEvaluator(BaseEvaluator):
@classmethod
def evaluate(cls, task) -> Tuple[float, Dict]:
weighted = sum(w * r for w, r in zip(_WEIGHTS, task.phase_rewards))
penalty = min(0.3, task.detection_score * 0.003)
reward = round(max(0.0, min(1.0, weighted - penalty)), 4)
signals = {
"exploit_success": task.exploit_triggered,
"episode_done": task.episode_done,
"partial_score": round(weighted, 4),
"integrity_ok": not any(r < 0 for r in task.step_rewards),
"reward": reward,
"errors": task.last_error,
"alerts": "FLAG EXTRACTED" if task.flag else "",
"detection_score": task.detection_score,
"hints": f"Phase: {task.phase}" if not task.episode_done else "Episode complete.",
}
return reward, signals
|