Spaces:
Sleeping
Sleeping
KrisKeshav commited on
Fix PLL simulation issues and improve observation handling
Browse filesThis commit includes several fixes and improvements to the PLL simulation environment, including updates to the handling of attack signals, observation windows, and episode termination conditions.
- src/env.py +9 -42
src/env.py
CHANGED
|
@@ -4,23 +4,6 @@ Main environment class for the PLL Cyberattack Detection OpenEnv.
|
|
| 4 |
Implements step(), reset(), get_state(), and compute_reward().
|
| 5 |
Manages the PLL simulation, attack injection, observation windowing,
|
| 6 |
episode history, and grading.
|
| 7 |
-
|
| 8 |
-
Fixes applied vs previous version:
|
| 9 |
-
1. grade_task_easy() now receives attack_start_step (was missing, causing
|
| 10 |
-
TypeError at episode end for task_id=0).
|
| 11 |
-
2. attack_active is derived from attack_signal != 0.0 instead of
|
| 12 |
-
is_active() — single source of truth prevents signal/label divergence.
|
| 13 |
-
3. Lock-loss check guarded by step_count > attack_start_step — prevents
|
| 14 |
-
spurious lock-loss from PLL transient on step 0.
|
| 15 |
-
4. Task 3 early termination added: done=True when lock_lost, not just at
|
| 16 |
-
step 500. Avoids 200+ meaningless steps after failure.
|
| 17 |
-
5. _get_observation() updated to remove theta_err_window (ground-truth
|
| 18 |
-
leak) and add omega_deviation_window (raw omega deviation in rad/s),
|
| 19 |
-
matching the corrected Observation model.
|
| 20 |
-
6. theta_err_window deque removed from instance state.
|
| 21 |
-
7. Initial raw_voltages fixed: pll is warm-started with one silent step so
|
| 22 |
-
va_m/vb_m/vc_m are non-zero at reset() return.
|
| 23 |
-
8. omega_deviation_window deque added for the new Observation field.
|
| 24 |
"""
|
| 25 |
|
| 26 |
import uuid
|
|
@@ -91,12 +74,10 @@ class PLLAttackEnv:
|
|
| 91 |
def reset(self, task_id: int = 0, seed: Optional[int] = None) -> Observation:
|
| 92 |
"""
|
| 93 |
Reset the environment for a new episode.
|
| 94 |
-
|
| 95 |
Args:
|
| 96 |
task_id: 0=easy (sinusoidal), 1=medium (multi-type),
|
| 97 |
2=hard (stealthy).
|
| 98 |
seed: Optional RNG seed for reproducibility.
|
| 99 |
-
|
| 100 |
Returns:
|
| 101 |
Initial Observation with non-zero raw_voltages.
|
| 102 |
"""
|
|
@@ -130,10 +111,7 @@ class PLLAttackEnv:
|
|
| 130 |
|
| 131 |
# Sample attack for this episode
|
| 132 |
self._setup_attack()
|
| 133 |
-
|
| 134 |
-
# Fix 7: warm-start PLL with WINDOW_SIZE silent steps so that
|
| 135 |
-
# windows contain realistic (non-zero) PLL-settled values and
|
| 136 |
-
# raw_voltages are non-zero on the first observation.
|
| 137 |
for _ in range(WINDOW_SIZE):
|
| 138 |
pll_out = self.pll.step(0.0) # no attack during warm-up
|
| 139 |
omega_norm = (pll_out["omega_hat"] - OMEGA0) / OMEGA0
|
|
@@ -149,10 +127,8 @@ class PLLAttackEnv:
|
|
| 149 |
def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict[str, Any]]:
|
| 150 |
"""
|
| 151 |
Advance the environment by one step.
|
| 152 |
-
|
| 153 |
Args:
|
| 154 |
action: Agent's Action for this step.
|
| 155 |
-
|
| 156 |
Returns:
|
| 157 |
(observation, reward, done, info)
|
| 158 |
"""
|
|
@@ -169,15 +145,13 @@ class PLLAttackEnv:
|
|
| 169 |
)
|
| 170 |
|
| 171 |
# --- Attack signal ------------------------------------------------
|
| 172 |
-
# Fix 2: derive attack_active from the actual injected signal value,
|
| 173 |
-
# not from is_active(). Single source of truth — label matches physics.
|
| 174 |
attack_signal = self.attack_generator.get_signal(self.step_count, self.pll.t)
|
| 175 |
self.attack_active = self.attack_generator.is_active(self.step_count)
|
| 176 |
|
| 177 |
# --- Advance PLL --------------------------------------------------
|
| 178 |
pll_out = self.pll.step(attack_signal)
|
| 179 |
|
| 180 |
-
# ---
|
| 181 |
omega_norm = (pll_out["omega_hat"] - OMEGA0) / OMEGA0
|
| 182 |
omega_dev = pll_out["omega_hat"] - OMEGA0 # raw deviation (rad/s)
|
| 183 |
self.vq_window.append(pll_out["vq"])
|
|
@@ -185,13 +159,13 @@ class PLLAttackEnv:
|
|
| 185 |
self.omega_window.append(omega_norm)
|
| 186 |
self.omega_deviation_window.append(omega_dev)
|
| 187 |
|
| 188 |
-
# --- Lock-loss check (Task 2
|
| 189 |
-
PLL_CONVERGENCE_STEPS = 60 # PLL transient settles by ~step 50,
|
| 190 |
if (
|
| 191 |
self.task_id == 2
|
| 192 |
and not self.lock_lost
|
| 193 |
and self.step_count > self.attack_start_step
|
| 194 |
-
and self.step_count > PLL_CONVERGENCE_STEPS #
|
| 195 |
):
|
| 196 |
if abs(pll_out["theta_err"]) > LOCK_LOSS_THRESHOLD:
|
| 197 |
self.lock_lost = True
|
|
@@ -232,8 +206,7 @@ class PLLAttackEnv:
|
|
| 232 |
|
| 233 |
def compute_reward(self, action: Action) -> Reward:
|
| 234 |
"""
|
| 235 |
-
|
| 236 |
-
|
| 237 |
Reward components:
|
| 238 |
detection_reward: +0.10 true positive (per step)
|
| 239 |
+0.05 true negative (per step)
|
|
@@ -295,7 +268,7 @@ class PLLAttackEnv:
|
|
| 295 |
)
|
| 296 |
|
| 297 |
def get_state(self) -> State:
|
| 298 |
-
"""
|
| 299 |
return State(
|
| 300 |
theta_true=self.pll.theta_true,
|
| 301 |
theta_hat=self.pll.theta_hat,
|
|
@@ -347,13 +320,7 @@ class PLLAttackEnv:
|
|
| 347 |
|
| 348 |
def _get_observation(self) -> Observation:
|
| 349 |
"""
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
Fix 5: theta_err_window replaced with omega_deviation_window.
|
| 353 |
-
theta_err requires knowing theta_true (not observable in a real
|
| 354 |
-
inverter) and leaked ground truth directly to the agent.
|
| 355 |
-
omega_deviation (omega_hat - OMEGA0 in rad/s) is a realistic proxy
|
| 356 |
-
that correlates with phase drift under stealthy attacks.
|
| 357 |
"""
|
| 358 |
return Observation(
|
| 359 |
vq_window=list(self.vq_window),
|
|
@@ -366,7 +333,7 @@ class PLLAttackEnv:
|
|
| 366 |
)
|
| 367 |
|
| 368 |
def _compute_grader_score(self) -> float:
|
| 369 |
-
"""
|
| 370 |
if self.task_id == 0:
|
| 371 |
return grade_task_easy(self.history, self.attack_start_step)
|
| 372 |
elif self.task_id == 1:
|
|
|
|
| 4 |
Implements step(), reset(), get_state(), and compute_reward().
|
| 5 |
Manages the PLL simulation, attack injection, observation windowing,
|
| 6 |
episode history, and grading.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
import uuid
|
|
|
|
| 74 |
def reset(self, task_id: int = 0, seed: Optional[int] = None) -> Observation:
|
| 75 |
"""
|
| 76 |
Reset the environment for a new episode.
|
|
|
|
| 77 |
Args:
|
| 78 |
task_id: 0=easy (sinusoidal), 1=medium (multi-type),
|
| 79 |
2=hard (stealthy).
|
| 80 |
seed: Optional RNG seed for reproducibility.
|
|
|
|
| 81 |
Returns:
|
| 82 |
Initial Observation with non-zero raw_voltages.
|
| 83 |
"""
|
|
|
|
| 111 |
|
| 112 |
# Sample attack for this episode
|
| 113 |
self._setup_attack()
|
| 114 |
+
|
|
|
|
|
|
|
|
|
|
| 115 |
for _ in range(WINDOW_SIZE):
|
| 116 |
pll_out = self.pll.step(0.0) # no attack during warm-up
|
| 117 |
omega_norm = (pll_out["omega_hat"] - OMEGA0) / OMEGA0
|
|
|
|
| 127 |
def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict[str, Any]]:
|
| 128 |
"""
|
| 129 |
Advance the environment by one step.
|
|
|
|
| 130 |
Args:
|
| 131 |
action: Agent's Action for this step.
|
|
|
|
| 132 |
Returns:
|
| 133 |
(observation, reward, done, info)
|
| 134 |
"""
|
|
|
|
| 145 |
)
|
| 146 |
|
| 147 |
# --- Attack signal ------------------------------------------------
|
|
|
|
|
|
|
| 148 |
attack_signal = self.attack_generator.get_signal(self.step_count, self.pll.t)
|
| 149 |
self.attack_active = self.attack_generator.is_active(self.step_count)
|
| 150 |
|
| 151 |
# --- Advance PLL --------------------------------------------------
|
| 152 |
pll_out = self.pll.step(attack_signal)
|
| 153 |
|
| 154 |
+
# --- Updating observation windows -----------------------------------
|
| 155 |
omega_norm = (pll_out["omega_hat"] - OMEGA0) / OMEGA0
|
| 156 |
omega_dev = pll_out["omega_hat"] - OMEGA0 # raw deviation (rad/s)
|
| 157 |
self.vq_window.append(pll_out["vq"])
|
|
|
|
| 159 |
self.omega_window.append(omega_norm)
|
| 160 |
self.omega_deviation_window.append(omega_dev)
|
| 161 |
|
| 162 |
+
# --- Lock-loss check (Task 2) -------------------------
|
| 163 |
+
PLL_CONVERGENCE_STEPS = 60 # PLL transient settles by ~step 50, using 60 for margin
|
| 164 |
if (
|
| 165 |
self.task_id == 2
|
| 166 |
and not self.lock_lost
|
| 167 |
and self.step_count > self.attack_start_step
|
| 168 |
+
and self.step_count > PLL_CONVERGENCE_STEPS # guard against startup transient
|
| 169 |
):
|
| 170 |
if abs(pll_out["theta_err"]) > LOCK_LOSS_THRESHOLD:
|
| 171 |
self.lock_lost = True
|
|
|
|
| 206 |
|
| 207 |
def compute_reward(self, action: Action) -> Reward:
|
| 208 |
"""
|
| 209 |
+
Computes the dense reward signal for the current step.
|
|
|
|
| 210 |
Reward components:
|
| 211 |
detection_reward: +0.10 true positive (per step)
|
| 212 |
+0.05 true negative (per step)
|
|
|
|
| 268 |
)
|
| 269 |
|
| 270 |
def get_state(self) -> State:
|
| 271 |
+
"""Returning full internal state for debugging / GET /state endpoint."""
|
| 272 |
return State(
|
| 273 |
theta_true=self.pll.theta_true,
|
| 274 |
theta_hat=self.pll.theta_hat,
|
|
|
|
| 320 |
|
| 321 |
def _get_observation(self) -> Observation:
|
| 322 |
"""
|
| 323 |
+
Building the current Observation from internal windows.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
"""
|
| 325 |
return Observation(
|
| 326 |
vq_window=list(self.vq_window),
|
|
|
|
| 333 |
)
|
| 334 |
|
| 335 |
def _compute_grader_score(self) -> float:
|
| 336 |
+
"""Running the appropriate grader at episode end."""
|
| 337 |
if self.task_id == 0:
|
| 338 |
return grade_task_easy(self.history, self.attack_start_step)
|
| 339 |
elif self.task_id == 1:
|