KrisKeshav commited on
Commit
3f7ca79
·
unverified ·
1 Parent(s): f4bc318

Fix PLL simulation issues and improve observation handling

Browse files

This commit includes several fixes and improvements to the PLL simulation environment, including updates to the handling of attack signals, observation windows, and episode termination conditions.

Files changed (1) hide show
  1. src/env.py +9 -42
src/env.py CHANGED
@@ -4,23 +4,6 @@ Main environment class for the PLL Cyberattack Detection OpenEnv.
4
  Implements step(), reset(), get_state(), and compute_reward().
5
  Manages the PLL simulation, attack injection, observation windowing,
6
  episode history, and grading.
7
-
8
- Fixes applied vs previous version:
9
- 1. grade_task_easy() now receives attack_start_step (was missing, causing
10
- TypeError at episode end for task_id=0).
11
- 2. attack_active is derived from attack_signal != 0.0 instead of
12
- is_active() — single source of truth prevents signal/label divergence.
13
- 3. Lock-loss check guarded by step_count > attack_start_step — prevents
14
- spurious lock-loss from PLL transient on step 0.
15
- 4. Task 3 early termination added: done=True when lock_lost, not just at
16
- step 500. Avoids 200+ meaningless steps after failure.
17
- 5. _get_observation() updated to remove theta_err_window (ground-truth
18
- leak) and add omega_deviation_window (raw omega deviation in rad/s),
19
- matching the corrected Observation model.
20
- 6. theta_err_window deque removed from instance state.
21
- 7. Initial raw_voltages fixed: pll is warm-started with one silent step so
22
- va_m/vb_m/vc_m are non-zero at reset() return.
23
- 8. omega_deviation_window deque added for the new Observation field.
24
  """
25
 
26
  import uuid
@@ -91,12 +74,10 @@ class PLLAttackEnv:
91
  def reset(self, task_id: int = 0, seed: Optional[int] = None) -> Observation:
92
  """
93
  Reset the environment for a new episode.
94
-
95
  Args:
96
  task_id: 0=easy (sinusoidal), 1=medium (multi-type),
97
  2=hard (stealthy).
98
  seed: Optional RNG seed for reproducibility.
99
-
100
  Returns:
101
  Initial Observation with non-zero raw_voltages.
102
  """
@@ -130,10 +111,7 @@ class PLLAttackEnv:
130
 
131
  # Sample attack for this episode
132
  self._setup_attack()
133
-
134
- # Fix 7: warm-start PLL with WINDOW_SIZE silent steps so that
135
- # windows contain realistic (non-zero) PLL-settled values and
136
- # raw_voltages are non-zero on the first observation.
137
  for _ in range(WINDOW_SIZE):
138
  pll_out = self.pll.step(0.0) # no attack during warm-up
139
  omega_norm = (pll_out["omega_hat"] - OMEGA0) / OMEGA0
@@ -149,10 +127,8 @@ class PLLAttackEnv:
149
  def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict[str, Any]]:
150
  """
151
  Advance the environment by one step.
152
-
153
  Args:
154
  action: Agent's Action for this step.
155
-
156
  Returns:
157
  (observation, reward, done, info)
158
  """
@@ -169,15 +145,13 @@ class PLLAttackEnv:
169
  )
170
 
171
  # --- Attack signal ------------------------------------------------
172
- # Fix 2: derive attack_active from the actual injected signal value,
173
- # not from is_active(). Single source of truth — label matches physics.
174
  attack_signal = self.attack_generator.get_signal(self.step_count, self.pll.t)
175
  self.attack_active = self.attack_generator.is_active(self.step_count)
176
 
177
  # --- Advance PLL --------------------------------------------------
178
  pll_out = self.pll.step(attack_signal)
179
 
180
- # --- Update observation windows -----------------------------------
181
  omega_norm = (pll_out["omega_hat"] - OMEGA0) / OMEGA0
182
  omega_dev = pll_out["omega_hat"] - OMEGA0 # raw deviation (rad/s)
183
  self.vq_window.append(pll_out["vq"])
@@ -185,13 +159,13 @@ class PLLAttackEnv:
185
  self.omega_window.append(omega_norm)
186
  self.omega_deviation_window.append(omega_dev)
187
 
188
- # --- Lock-loss check (Task 2 / hard only) -------------------------
189
- PLL_CONVERGENCE_STEPS = 60 # PLL transient settles by ~step 50, use 60 for margin
190
  if (
191
  self.task_id == 2
192
  and not self.lock_lost
193
  and self.step_count > self.attack_start_step
194
- and self.step_count > PLL_CONVERGENCE_STEPS # guard against startup transient
195
  ):
196
  if abs(pll_out["theta_err"]) > LOCK_LOSS_THRESHOLD:
197
  self.lock_lost = True
@@ -232,8 +206,7 @@ class PLLAttackEnv:
232
 
233
  def compute_reward(self, action: Action) -> Reward:
234
  """
235
- Compute the dense reward signal for the current step.
236
-
237
  Reward components:
238
  detection_reward: +0.10 true positive (per step)
239
  +0.05 true negative (per step)
@@ -295,7 +268,7 @@ class PLLAttackEnv:
295
  )
296
 
297
  def get_state(self) -> State:
298
- """Return full internal state for debugging / GET /state endpoint."""
299
  return State(
300
  theta_true=self.pll.theta_true,
301
  theta_hat=self.pll.theta_hat,
@@ -347,13 +320,7 @@ class PLLAttackEnv:
347
 
348
  def _get_observation(self) -> Observation:
349
  """
350
- Build the current Observation from internal windows.
351
-
352
- Fix 5: theta_err_window replaced with omega_deviation_window.
353
- theta_err requires knowing theta_true (not observable in a real
354
- inverter) and leaked ground truth directly to the agent.
355
- omega_deviation (omega_hat - OMEGA0 in rad/s) is a realistic proxy
356
- that correlates with phase drift under stealthy attacks.
357
  """
358
  return Observation(
359
  vq_window=list(self.vq_window),
@@ -366,7 +333,7 @@ class PLLAttackEnv:
366
  )
367
 
368
  def _compute_grader_score(self) -> float:
369
- """Run the appropriate grader at episode end."""
370
  if self.task_id == 0:
371
  return grade_task_easy(self.history, self.attack_start_step)
372
  elif self.task_id == 1:
 
4
  Implements step(), reset(), get_state(), and compute_reward().
5
  Manages the PLL simulation, attack injection, observation windowing,
6
  episode history, and grading.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  """
8
 
9
  import uuid
 
74
  def reset(self, task_id: int = 0, seed: Optional[int] = None) -> Observation:
75
  """
76
  Reset the environment for a new episode.
 
77
  Args:
78
  task_id: 0=easy (sinusoidal), 1=medium (multi-type),
79
  2=hard (stealthy).
80
  seed: Optional RNG seed for reproducibility.
 
81
  Returns:
82
  Initial Observation with non-zero raw_voltages.
83
  """
 
111
 
112
  # Sample attack for this episode
113
  self._setup_attack()
114
+
 
 
 
115
  for _ in range(WINDOW_SIZE):
116
  pll_out = self.pll.step(0.0) # no attack during warm-up
117
  omega_norm = (pll_out["omega_hat"] - OMEGA0) / OMEGA0
 
127
  def step(self, action: Action) -> Tuple[Observation, Reward, bool, Dict[str, Any]]:
128
  """
129
  Advance the environment by one step.
 
130
  Args:
131
  action: Agent's Action for this step.
 
132
  Returns:
133
  (observation, reward, done, info)
134
  """
 
145
  )
146
 
147
  # --- Attack signal ------------------------------------------------
 
 
148
  attack_signal = self.attack_generator.get_signal(self.step_count, self.pll.t)
149
  self.attack_active = self.attack_generator.is_active(self.step_count)
150
 
151
  # --- Advance PLL --------------------------------------------------
152
  pll_out = self.pll.step(attack_signal)
153
 
154
+ # --- Updating observation windows -----------------------------------
155
  omega_norm = (pll_out["omega_hat"] - OMEGA0) / OMEGA0
156
  omega_dev = pll_out["omega_hat"] - OMEGA0 # raw deviation (rad/s)
157
  self.vq_window.append(pll_out["vq"])
 
159
  self.omega_window.append(omega_norm)
160
  self.omega_deviation_window.append(omega_dev)
161
 
162
+ # --- Lock-loss check (Task 2) -------------------------
163
+ PLL_CONVERGENCE_STEPS = 60 # PLL transient settles by ~step 50, using 60 for margin
164
  if (
165
  self.task_id == 2
166
  and not self.lock_lost
167
  and self.step_count > self.attack_start_step
168
+ and self.step_count > PLL_CONVERGENCE_STEPS # guard against startup transient
169
  ):
170
  if abs(pll_out["theta_err"]) > LOCK_LOSS_THRESHOLD:
171
  self.lock_lost = True
 
206
 
207
  def compute_reward(self, action: Action) -> Reward:
208
  """
209
+ Computes the dense reward signal for the current step.
 
210
  Reward components:
211
  detection_reward: +0.10 true positive (per step)
212
  +0.05 true negative (per step)
 
268
  )
269
 
270
  def get_state(self) -> State:
271
+ """Returning full internal state for debugging / GET /state endpoint."""
272
  return State(
273
  theta_true=self.pll.theta_true,
274
  theta_hat=self.pll.theta_hat,
 
320
 
321
  def _get_observation(self) -> Observation:
322
  """
323
+ Building the current Observation from internal windows.
 
 
 
 
 
 
324
  """
325
  return Observation(
326
  vq_window=list(self.vq_window),
 
333
  )
334
 
335
  def _compute_grader_score(self) -> float:
336
+ """Running the appropriate grader at episode end."""
337
  if self.task_id == 0:
338
  return grade_task_easy(self.history, self.attack_start_step)
339
  elif self.task_id == 1: