Spaces:
Sleeping
Sleeping
| from typing import List, Optional, Tuple | |
| class ActionValidator: | |
| """ | |
| Validates SRE actions to ensure they stay within safety boundaries. | |
| Prevents destructive operations like 100% shedding on critical nodes. | |
| Implements soft cooldown for scaling actions: instead of hard-rejecting | |
| a rapid re-scale, the action passes with a penalty signal. The environment | |
| can use this penalty to reduce the reward, teaching the agent to wait | |
| without blocking emergency scaling. | |
| """ | |
| def __init__(self, critical_nodes: Optional[List[str]] = None, cooldown_ticks: int = 3): | |
| self.critical_nodes = critical_nodes or ["node-0", "node-1", "node-2"] | |
| self.cooldown_ticks = cooldown_ticks | |
| # Track last scale action per node: {node_id: (tick, action_type)} | |
| self._last_scale: dict[str, Tuple[int, str]] = {} | |
| self._current_tick: int = 0 | |
| def set_tick(self, tick: int) -> None: | |
| """Update the current tick counter for cooldown tracking.""" | |
| self._current_tick = tick | |
| def validate(self, action_type: str, target: str, parameter: float, valid_targets: Optional[List[str]] = None) -> Tuple[bool, str, float]: | |
| """ | |
| Returns (is_valid, error_message, cooldown_penalty). | |
| cooldown_penalty is in [0, 1]: | |
| 0.0 = no penalty (action is fine) | |
| >0 = soft penalty for rapid re-scaling (action still executes) | |
| Hard violations (critical shed, out-of-range) still reject with penalty=0. | |
| """ | |
| if hasattr(action_type, "value"): | |
| action = str(action_type.value) | |
| else: | |
| action = str(action_type) | |
| cooldown_penalty = 0.0 | |
| # NO_OP always succeeds — target and parameter don't matter | |
| if action == "NO_OP": | |
| return True, "Success", 0.0 | |
| if valid_targets is not None and target not in valid_targets: | |
| return False, f"Unknown target node: {target}", 0.0 | |
| if action == "SHED_LOAD" and target in self.critical_nodes: | |
| return False, f"Forbidden: Load shedding on critical node {target}.", 0.0 | |
| if action in ["SCALE_UP", "SCALE_DOWN"]: | |
| if parameter < 0.0: | |
| return False, "Negative scaling parameters are not allowed.", 0.0 | |
| if parameter > 10.0: | |
| return False, "Scaling parameter must be <= 10.0.", 0.0 | |
| # Soft cooldown: penalize but don't block rapid re-scaling. | |
| # Dynamic window: if the node is DEGRADED, reduce cooldown (emergency allowed). | |
| last_tick, last_action = self._last_scale.get(target, (0, "")) | |
| ticks_since = self._current_tick - last_tick | |
| if ticks_since < self.cooldown_ticks and last_action == action: | |
| # Penalty decays linearly: full penalty at 0 ticks, 0 at cooldown_ticks | |
| cooldown_penalty = (self.cooldown_ticks - ticks_since) / self.cooldown_ticks | |
| # Don't reject — just flag the penalty | |
| self._last_scale[target] = (self._current_tick, action) | |
| if action in ["REROUTE_TRAFFIC", "SHED_LOAD"] and not (0.0 <= parameter <= 1.0): | |
| return False, f"{action} parameter must be in [0.0, 1.0].", 0.0 | |
| if action == "NO_OP" and parameter != 0.0: | |
| return False, "NO_OP requires parameter=0.0.", 0.0 | |
| return True, "Success", cooldown_penalty | |