Spaces:
Sleeping
Sleeping
| """ | |
| Evaluator for SQL Injection task (Easy, 4 phases). | |
| Formula: | |
| weighted = sum(weight_i × phase_reward_i) | |
| penalty = min(0.3, detection_score × 0.003) | |
| score = clamp(weighted - penalty, 0.0, 1.0) | |
| Phase weights: [0.15, 0.25, 0.25, 0.35] | |
| """ | |
| from typing import Tuple, Dict | |
| from .base import BaseEvaluator | |
| _WEIGHTS = [0.15, 0.25, 0.25, 0.35] | |
| class SQLEvaluator(BaseEvaluator): | |
| def evaluate(cls, task) -> Tuple[float, Dict]: | |
| weighted = sum(w * r for w, r in zip(_WEIGHTS, task.phase_rewards)) | |
| penalty = min(0.3, task.detection_score * 0.003) | |
| reward = round(max(0.0, min(1.0, weighted - penalty)), 4) | |
| signals = { | |
| "exploit_success": task.exploit_triggered, | |
| "episode_done": task.episode_done, | |
| "partial_score": round(weighted, 4), | |
| "integrity_ok": not any(r < 0 for r in task.step_rewards), | |
| "reward": reward, | |
| "errors": task.last_error, | |
| "alerts": "FLAG EXTRACTED" if task.flag else "", | |
| "detection_score": task.detection_score, | |
| "hints": f"Phase: {task.phase}" if not task.episode_done else "Episode complete.", | |
| } | |
| return reward, signals | |