| from typing import List, Optional, Tuple |
|
|
|
|
| class ActionValidator: |
| """ |
| Validates SRE actions to ensure they stay within safety boundaries. |
| Prevents destructive operations like 100% shedding on critical nodes. |
| |
| Implements soft cooldown for scaling actions: instead of hard-rejecting |
| a rapid re-scale, the action passes with a penalty signal. The environment |
| can use this penalty to reduce the reward, teaching the agent to wait |
| without blocking emergency scaling. |
| """ |
| def __init__(self, critical_nodes: Optional[List[str]] = None, cooldown_ticks: int = 3): |
| self.critical_nodes = critical_nodes or ["node-0", "node-1", "node-2"] |
| self.cooldown_ticks = cooldown_ticks |
| |
| self._last_scale: dict[str, Tuple[int, str]] = {} |
| self._current_tick: int = 0 |
|
|
| def set_tick(self, tick: int) -> None: |
| """Update the current tick counter for cooldown tracking.""" |
| self._current_tick = tick |
|
|
| def validate(self, action_type: str, target: str, parameter: float, valid_targets: Optional[List[str]] = None) -> Tuple[bool, str, float]: |
| """ |
| Returns (is_valid, error_message, cooldown_penalty). |
| |
| cooldown_penalty is in [0, 1]: |
| 0.0 = no penalty (action is fine) |
| >0 = soft penalty for rapid re-scaling (action still executes) |
| Hard violations (critical shed, out-of-range) still reject with penalty=0. |
| """ |
| if hasattr(action_type, "value"): |
| action = str(action_type.value) |
| else: |
| action = str(action_type) |
|
|
| cooldown_penalty = 0.0 |
|
|
| |
| if action == "NO_OP": |
| return True, "Success", 0.0 |
|
|
| if valid_targets is not None and target not in valid_targets: |
| return False, f"Unknown target node: {target}", 0.0 |
|
|
| if action == "SHED_LOAD" and target in self.critical_nodes: |
| return False, f"Forbidden: Load shedding on critical node {target}.", 0.0 |
|
|
| if action in ["SCALE_UP", "SCALE_DOWN"]: |
| if parameter < 0.0: |
| return False, "Negative scaling parameters are not allowed.", 0.0 |
| if parameter > 10.0: |
| return False, "Scaling parameter must be <= 10.0.", 0.0 |
|
|
| |
| |
| last_tick, last_action = self._last_scale.get(target, (0, "")) |
| ticks_since = self._current_tick - last_tick |
| if ticks_since < self.cooldown_ticks and last_action == action: |
| |
| cooldown_penalty = (self.cooldown_ticks - ticks_since) / self.cooldown_ticks |
| |
| self._last_scale[target] = (self._current_tick, action) |
|
|
| if action in ["REROUTE_TRAFFIC", "SHED_LOAD"] and not (0.0 <= parameter <= 1.0): |
| return False, f"{action} parameter must be in [0.0, 1.0].", 0.0 |
|
|
| if action == "NO_OP" and parameter != 0.0: |
| return False, "NO_OP requires parameter=0.0.", 0.0 |
|
|
| return True, "Success", cooldown_penalty |
|
|