File size: 25,534 Bytes
363abf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad92ece
 
 
 
 
 
 
 
 
 
363abf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad92ece
 
 
 
 
 
 
 
363abf3
 
 
 
 
ad92ece
 
 
 
 
 
 
 
 
363abf3
 
 
 
 
 
ad92ece
 
 
 
 
363abf3
 
 
 
 
 
 
ad92ece
 
 
 
 
363abf3
ad92ece
 
 
363abf3
ad92ece
 
 
363abf3
ad92ece
363abf3
ad92ece
363abf3
ad92ece
363abf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad92ece
 
 
 
 
 
 
 
363abf3
ad92ece
 
363abf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad92ece
 
 
 
 
 
 
 
 
 
 
 
 
363abf3
ad92ece
363abf3
ad92ece
363abf3
ad92ece
 
363abf3
ad92ece
 
363abf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
"""
Wildfire Containment Simulator β€” Main Environment.

Implements the OpenEnv API: step(), reset(), state().
Orchestrates grid, fire spread, weather, resources, and reward computation.
"""

from __future__ import annotations

import logging
from typing import Optional

import numpy as np
from pydantic import ValidationError

from .models import (
    Action, ActionType, Observation, StepResult, ClusterStats,
    FireState, FuelType, TierConfig, TIER_EASY, TIER_MEDIUM, TIER_HARD,
)
from .grid import Grid
from .fire_spread import FireSpreadEngine
from .weather import WeatherEngine
from .resources import ResourceManager
from .reward import RewardCalculator
from .briefing import generate_briefing, OperationalBriefing

logger = logging.getLogger(__name__)


class WildfireEnv:
    """
    Wildfire Containment Simulator environment.

    Simulates a grid-based wildfire where an AI agent dispatches
    firefighting resources to contain the fire before it reaches
    populated zones.

    API:
        reset(task_id, seed) -> Observation
        step(action) -> StepResult
        state() -> dict
    """

    TIER_MAP = {
        "easy": TIER_EASY,
        "medium": TIER_MEDIUM,
        "hard": TIER_HARD,
    }

    def __init__(self, config: Optional[TierConfig] = None):
        self.config = config or TIER_EASY
        self.rng = np.random.default_rng(42)
        self.current_step = 0
        self.done = False

        # Components (initialized in reset)
        self.grid: Optional[Grid] = None
        self.fire_engine: Optional[FireSpreadEngine] = None
        self.weather: Optional[WeatherEngine] = None
        self.resources: Optional[ResourceManager] = None
        self.reward_calc: Optional[RewardCalculator] = None

        self.events_log: list[str] = []

        # Episode-level tracking for new reward structure
        self._prev_action: Optional[Action] = None
        self._invalid_action_count: int = 0
        self._crew_casualty_occurred: bool = False
        self._prev_state: Optional[dict] = None
        self.active_briefing: Optional[OperationalBriefing] = None

        # Last observation returned to the agent (agent's view, not ground truth)
        self._current_obs: Optional[Observation] = None

    def reset(self, task_id: str = "easy", seed: int = 42) -> Observation:
        """
        Initialize the environment for a new episode.

        Args:
            task_id: One of "easy", "medium", "hard".
            seed: Random seed for reproducibility.

        Returns:
            Initial observation.
        """
        self.config = self.TIER_MAP.get(task_id, TIER_EASY)
        self.rng = np.random.default_rng(seed)
        self.current_step = 0
        self.done = False
        self.events_log = []
        self._prev_action = None
        self._invalid_action_count = 0
        self._crew_casualty_occurred = False
        self._prev_state = None

        # Initialize components
        self.grid = Grid(self.config, self.rng)
        self.fire_engine = FireSpreadEngine(self.grid, self.rng)
        self.weather = WeatherEngine(self.config, self.rng)
        self.resources = ResourceManager(self.config, self.grid)
        self.reward_calc = RewardCalculator(self.config)
        self.reward_calc.reset()
        self.resources.reset()
        self.weather.reset()

        # Ignite initial fire points
        self._ignite_initial_fires()

        # Generate operational briefing for this episode
        self.active_briefing = generate_briefing(self.config, self.rng, self.grid)

        # Build and return initial observation (with briefing attached)
        obs = self._build_observation()
        obs.briefing = self.active_briefing
        self.events_log.append("Episode started. Fire ignited.")
        self._current_obs = obs
        return obs

    def step(self, action: Action) -> StepResult:
        """
        Execute one simulation step.

        Follows the 11-step tick sequence:
        1. Validate action
        2. Execute action
        3. Spread fire
        4. Update intensities (handled inside spread)
        5. Apply suppression
        6. Evolve weather
        7. Update moisture
        8. Propagate smoke
        9. Compute reward
        10. Check termination
        11. Build observation

        Args:
            action: The agent's chosen action.

        Returns:
            StepResult with observation, reward, done flag, and info dict.
        """
        if self.done:
            return StepResult(
                observation=self._build_observation(),
                reward=0.0,
                done=True,
                info={"error": "Episode already finished"},
            )

        step_events: list[str] = []

        # Snapshot state before this step's changes
        prev_state = self._snapshot_state()

        # ── Step 1: Validate action ──
        action_was_redundant = self._is_redundant(action)
        valid, msg = self._validate_action(action)
        if not valid:
            self.reward_calc.record_invalid_action()
            self._invalid_action_count += 1
            self.resources.wasted_actions += 1
            step_events.append(f"Invalid action: {msg}")
            # Skip to reward/termination
        else:
            # ── Step 2: Execute action ──
            exec_events = self._execute_action(action)
            step_events.extend(exec_events)

        self._prev_action = action

        # ── Step 3-4: Spread fire + update intensities ──
        ws = self.weather.state
        spread_events = self.fire_engine.spread_step(ws.wind_speed_kmh, ws.wind_direction_deg)
        step_events.extend(spread_events)

        # ── Step 5: Apply suppression ──
        supp_events = self.resources.apply_suppression()
        step_events.extend(supp_events)

        # ── Step 6: Evolve weather ──
        weather_events = self.weather.step(self.current_step)
        step_events.extend(weather_events)

        # ── Step 7: Update moisture ──
        self.grid.update_moisture(ws.rain_active, ws.humidity_pct)

        # ── Step 8: Propagate smoke ──
        self.grid.propagate_smoke(ws.wind_direction_deg, ws.wind_speed_kmh)

        # ── Tick tanker cooldowns ──
        self.resources.tick_tanker_cooldowns()

        # ── Expire recon reveals ──
        self.resources.expire_reveals(self.current_step)

        # ── Handle staggered ignition (hard tier) ──
        if (self.config.staggered_ignition_step is not None
                and self.current_step == self.config.staggered_ignition_step):
            self._ignite_staggered_fire()
            step_events.append("NEW IGNITION: Additional fire started!")

        # ── Handle crew loss (hard tier) ──
        if (self.config.enable_crew_loss
                and self.config.crew_loss_step == self.current_step
                and self.config.crew_loss_id):
            loss_events = self.resources.apply_crew_loss(self.config.crew_loss_id)
            step_events.extend(loss_events)

        # Track crew casualty
        if self.resources.crew_casualties:
            self._crew_casualty_occurred = True

        self.current_step += 1

        # Log a hold-message when fire is extinguished before min_active_steps so
        # agents (and the LLM) understand the episode must continue for monitoring.
        burning_now = (self.grid.count_by_state(FireState.BURNING)
                       + self.grid.count_by_state(FireState.EMBER))
        if burning_now == 0 and self.current_step < self.config.min_active_steps:
            step_events.append(
                f"All fires contained. Holding perimeter until step "
                f"{self.config.min_active_steps} (min_active_steps)."
            )

        # ── Step 9: Compute reward ──
        legacy_reward = self.reward_calc.compute_reward(self.grid, self.resources, self.current_step)

        current_state = self._snapshot_state()
        step_reward = self.reward_calc.compute_step_reward(
            prev_state, current_state, valid, action_was_redundant
        )

        # ── Step 10: Check termination ──
        self.done = self._check_termination()

        terminal_reward = 0.0
        if self.done:
            terminal_state = dict(current_state)
            terminal_state["crew_casualty_occurred"] = self._crew_casualty_occurred
            terminal_state["invalid_action_count"] = self._invalid_action_count
            if self.active_briefing:
                terminal_state["priority_zones"] = self.active_briefing.priority_populated_zones
                terminal_state["_grid_ref"] = self.grid
            terminal_reward = self.reward_calc.compute_terminal_reward(
                terminal_state, self.current_step, self.config.episode_length
            )

        reward = step_reward + terminal_reward

        # ── Step 11: Build observation ──
        obs = self._build_observation()

        # Keep last 5 events
        self.events_log = (self.events_log + step_events)[-20:]

        info = {
            "step": self.current_step,
            "events": step_events,
            "legacy_reward": round(legacy_reward, 4),
            "reward_breakdown": self.reward_calc.get_component_breakdown(
                self.grid, self.resources, self.current_step
            ),
        }

        result = StepResult(
            observation=obs,
            reward=round(reward, 4),
            done=self.done,
            info=info,
        )
        self._current_obs = result.observation
        return result

    def state(self) -> dict:
        """
        Return full ground-truth state for grading/debugging.
        NOT for agent use β€” contains information hidden from the agent.
        """
        if self.grid is None:
            return {"error": "Environment not initialized. Call reset() first."}

        # Full grid state without any occlusion
        full_grid = []
        for r in range(self.grid.rows):
            row = []
            for c in range(self.grid.cols):
                static = self.grid.static_grid[r][c]
                dynamic = self.grid.dynamic_grid[r][c]
                row.append({
                    "row": r, "col": c,
                    "fuel_type": static.fuel_type.value,
                    "fuel_load": static.fuel_load,
                    "elevation_m": static.elevation_m,
                    "is_populated": static.is_populated,
                    "population": static.population,
                    "fire_state": dynamic.fire_state.value,
                    "fire_intensity": round(dynamic.fire_intensity, 4),
                    "moisture": round(dynamic.moisture, 4),
                    "time_burning": dynamic.time_burning,
                    "suppression_level": round(dynamic.suppression_level, 4),
                    "smoke_density": round(dynamic.smoke_density, 4),
                    "crew_present": dynamic.crew_present,
                })
            full_grid.append(row)

        return {
            "tier": self.config.tier_name,
            "current_step": self.current_step,
            "done": self.done,
            "grid": full_grid,
            "weather": self.weather.get_true_state().model_dump(),
            "resources": self.resources.get_resource_state().model_dump(),
            "reward_breakdown": self.reward_calc.get_component_breakdown(
                self.grid, self.resources, self.current_step
            ),
            "total_population": self.grid.get_total_population(),
            "population_lost": self.grid.get_population_lost(),
            "cells_burned": self.grid.get_burned_count(),
            "total_burnable": self.grid.get_total_burnable(),
        }

    # ══════════════════════════════════════════════════
    # PRIVATE METHODS
    # ══════════════════════════════════════════════════

    def _snapshot_state(self) -> dict:
        """Capture a lightweight state dict for reward delta computation."""
        total, contained = self.grid.get_fire_perimeter()
        containment_pct = contained / total if total > 0 else 1.0
        return {
            "containment_pct": containment_pct,
            "pop_lost": self.grid.get_population_lost(),
            "total_pop": self.grid.get_total_population(),
        }

    def _is_redundant(self, action: Action) -> bool:
        """True if action is a meaningless repeat of the previous action.

        Actions that use target coordinates (DROP_RETARDANT, DEPLOY_CREW, RECON_FLIGHT)
        are redundant when the type + target cell match.  Directional actions (MOVE_CREW,
        BUILD_FIREBREAK) require the same crew_id AND direction to be redundant β€” two
        consecutive MOVE_CREW steps by different crews, or in different directions, are
        valid patrol behaviour and must not be penalised.
        """
        if self._prev_action is None:
            return False
        prev = self._prev_action
        if action.action_type != prev.action_type:
            return False
        # Coordinate-targeted actions: redundant when same cell is targeted again
        if action.target_row is not None or prev.target_row is not None:
            return (action.target_row == prev.target_row
                    and action.target_col == prev.target_col)
        # Crew directional actions: redundant only when same crew moves same direction
        if action.crew_id is not None:
            return (action.crew_id == prev.crew_id
                    and action.direction == prev.direction)
        return False

    def _ignite_initial_fires(self) -> None:
        """Place initial fire ignition points based on tier config.

        Ignition candidates are shifted away from populated cells to ensure
        a minimum survivable distance, reducing unwinnable-scenario variance.

        Intensity is set high enough (0.65) that a single tanker drop (-0.4)
        leaves residual fire (0.25) so the episode cannot be solved in 1-2
        steps. The fire must spread, be actively managed, and burn for at
        least min_active_steps before the episode can end.
        """
        rows, cols = self.config.grid_rows, self.config.grid_cols

        # Minimum Manhattan distance from any populated cell per tier
        min_pop_dist = {"easy": 4, "medium": 6, "hard": 7}.get(self.config.tier_name, 5)

        if self.config.tier_name == "easy":
            # Two ignition points spread across the grid so crews must split
            r1, c1 = self._find_ignition_candidate(rows // 2, cols // 3, min_pop_dist)
            self.grid.ignite_cell(r1, c1, intensity=0.65)
            r2, c2 = self._find_ignition_candidate(rows // 2, 2 * cols // 3, min_pop_dist)
            self.grid.ignite_cell(r2, c2, intensity=0.65)
        elif self.config.tier_name == "medium":
            # Three ignition points: forces genuine multi-front management
            r1, c1 = self._find_ignition_candidate(rows // 4, cols // 3, min_pop_dist)
            self.grid.ignite_cell(r1, c1, intensity=0.65)
            r2, c2 = self._find_ignition_candidate(2 * rows // 3, 2 * cols // 3, min_pop_dist)
            self.grid.ignite_cell(r2, c2, intensity=0.65)
            r3, c3 = self._find_ignition_candidate(rows // 2, cols // 2, min_pop_dist)
            self.grid.ignite_cell(r3, c3, intensity=0.65)
        else:
            # Two initial points (third comes later via staggered ignition at step 30)
            r1, c1 = self._find_ignition_candidate(rows // 4, cols // 4, min_pop_dist)
            self.grid.ignite_cell(r1, c1, intensity=0.65)
            r2, c2 = self._find_ignition_candidate(rows // 2, 3 * cols // 4, min_pop_dist)
            self.grid.ignite_cell(r2, c2, intensity=0.65)

    def _find_ignition_candidate(self, target_r: int, target_c: int, min_pop_dist: int) -> tuple[int, int]:
        """Return the nearest valid ignition cell to (target_r, target_c) that is at
        least min_pop_dist (Manhattan) from every populated cell.

        Searches in expanding rings; falls back to the original target if no
        compliant cell is found within the grid bounds.
        """
        rows, cols = self.config.grid_rows, self.config.grid_cols

        pop_cells = [
            (r, c)
            for r in range(rows)
            for c in range(cols)
            if self.grid.static_grid[r][c].is_populated
        ]

        def _min_pop_dist(r: int, c: int) -> int:
            if not pop_cells:
                return 9999
            return min(abs(r - pr) + abs(c - pc) for pr, pc in pop_cells)

        for radius in range(max(rows, cols)):
            for dr in range(-radius, radius + 1):
                for dc in range(-radius, radius + 1):
                    if radius > 0 and abs(dr) + abs(dc) != radius:
                        continue
                    r, c = target_r + dr, target_c + dc
                    if not self.grid._in_bounds(r, c):
                        continue
                    static = self.grid.static_grid[r][c]
                    if static.fuel_type in (FuelType.WATER, FuelType.ROAD):
                        continue
                    if _min_pop_dist(r, c) >= min_pop_dist:
                        return r, c

        return target_r, target_c

    def _ignite_staggered_fire(self) -> None:
        """Ignite additional fire point(s) for hard tier."""
        rows, cols = self.config.grid_rows, self.config.grid_cols
        # Place in an area likely to cause problems
        target_r = 3 * rows // 4
        target_c = cols // 3
        # Find nearest unburned cell
        for dr in range(5):
            for dc in range(5):
                r, c = target_r + dr, target_c + dc
                if self.grid._in_bounds(r, c):
                    if self.grid.dynamic_grid[r][c].fire_state == FireState.UNBURNED:
                        self.grid.ignite_cell(r, c, intensity=0.7)
                        return

    def _validate_action(self, action: Action) -> tuple[bool, str]:
        """Validate action parameters. Returns (is_valid, error_message)."""
        try:
            # Pydantic validation already ran on construction,
            # but we do semantic validation here
            if action.action_type == ActionType.DEPLOY_CREW:
                if not self.grid._in_bounds(action.target_row, action.target_col):
                    return False, f"Target ({action.target_row},{action.target_col}) out of bounds"

            elif action.action_type == ActionType.DROP_RETARDANT:
                if not self.grid._in_bounds(action.target_row, action.target_col):
                    return False, f"Target ({action.target_row},{action.target_col}) out of bounds"

            elif action.action_type == ActionType.RECON_FLIGHT:
                if not self.grid._in_bounds(action.target_row, action.target_col):
                    return False, f"Target ({action.target_row},{action.target_col}) out of bounds"

            return True, ""

        except Exception as e:
            return False, str(e)

    def _execute_action(self, action: Action) -> list[str]:
        """Execute a validated action. Returns event messages."""
        events = []
        at = action.action_type

        if at == ActionType.DEPLOY_CREW:
            ok, msg = self.resources.deploy_crew(action.crew_id, action.target_row, action.target_col)
            events.append(msg)
            if not ok:
                self.resources.wasted_actions += 1

        elif at == ActionType.MOVE_CREW:
            ok, msg = self.resources.move_crew(action.crew_id, action.direction)
            events.append(msg)
            if not ok:
                self.resources.wasted_actions += 1

        elif at == ActionType.DROP_RETARDANT:
            ok, msg = self.resources.drop_retardant(action.tanker_id, action.target_row, action.target_col)
            events.append(msg)
            if not ok:
                self.resources.wasted_actions += 1

        elif at == ActionType.BUILD_FIREBREAK:
            ok, msg = self.resources.build_firebreak(action.crew_id, action.direction)
            events.append(msg)
            if not ok:
                self.resources.wasted_actions += 1

        elif at == ActionType.RECON_FLIGHT:
            ok, msg = self.resources.recon_flight(action.target_row, action.target_col, self.current_step)
            events.append(msg)
            if not ok:
                self.resources.wasted_actions += 1

        elif at == ActionType.IDLE:
            reason = action.reason or "No action taken"
            events.append(f"IDLE: {reason}")

        return events

    def _check_termination(self) -> bool:
        """Check if the episode should end."""
        # Time limit
        if self.current_step >= self.config.episode_length:
            return True

        # Fire fully contained (no burning cells)
        burning = self.grid.count_by_state(FireState.BURNING)
        ember = self.grid.count_by_state(FireState.EMBER)
        if burning == 0 and ember == 0:
            # Enforce minimum active steps β€” prevents trivial 1-2 step episodes
            # where a single tanker drop or natural burnout ends the episode
            # before the agent has taken any meaningful sequence of actions.
            if self.current_step < self.config.min_active_steps:
                return False
            # Don't terminate before staggered ignition fires (hard tier)
            if (self.config.staggered_ignition_step
                    and self.current_step < self.config.staggered_ignition_step):
                return False
            return True

        # All populated zones burned (catastrophic failure)
        total_pop = self.grid.get_total_population()
        lost_pop = self.grid.get_population_lost()
        if total_pop > 0 and lost_pop >= total_pop:
            return True

        return False

    def _build_observation(self) -> Observation:
        """Build the agent's observation with appropriate noise/occlusion."""
        # Grid observation with fog/smoke
        crew_positions = self.resources.get_crew_positions()
        grid_obs = self.grid.build_observation(
            enable_fog=self.config.enable_fog_of_war,
            fog_radius=self.config.fog_visibility_radius,
            crew_positions=crew_positions,
            revealed_cells=self.resources.revealed_cells,
        )

        # Weather observation (possibly noisy)
        weather_obs = self.weather.get_observation()

        # Resource state (fully observable)
        resource_state = self.resources.get_resource_state()

        # Stats
        total_burnable = self.grid.get_total_burnable()
        cells_burned = self.grid.get_burned_count()
        total_pop = self.grid.get_total_population()
        pop_lost = self.grid.get_population_lost()

        area_saved_pct = round(
            100.0 * (total_burnable - cells_burned) / total_burnable, 1
        ) if total_burnable > 0 else 100.0

        civilians_saved_pct = round(
            100.0 * (total_pop - pop_lost) / total_pop, 1
        ) if total_pop > 0 else 100.0

        stats = ClusterStats(
            cells_burned=cells_burned,
            cells_burning=self.grid.count_by_state(FireState.BURNING),
            cells_saved=total_burnable - cells_burned - self.grid.count_by_state(FireState.BURNING),
            population_threatened=self._count_threatened_population(),
            population_lost=pop_lost,
            total_population=total_pop,
            containment_pct=self._compute_containment_pct(),
            area_saved_pct=area_saved_pct,
            civilians_saved_pct=civilians_saved_pct,
            current_step=self.current_step,
            max_steps=self.config.episode_length,
            firebreaks_built=self.resources.total_firebreaks_built,
            retardant_drops=self.resources.total_retardant_drops,
        )

        # Recent events (last 5)
        recent = self.events_log[-5:] if self.events_log else []

        return Observation(
            grid=grid_obs,
            weather=weather_obs,
            resources=resource_state,
            stats=stats,
            recent_events=recent,
        )

    def _count_threatened_population(self) -> int:
        """Count population within 3 cells of active fire."""
        threatened = 0
        burning_cells = self.grid.get_burning_cells()
        counted = set()

        for br, bc in burning_cells:
            for r in range(max(0, br - 3), min(self.grid.rows, br + 4)):
                for c in range(max(0, bc - 3), min(self.grid.cols, bc + 4)):
                    if (r, c) not in counted:
                        static = self.grid.static_grid[r][c]
                        if static.is_populated:
                            dynamic = self.grid.dynamic_grid[r][c]
                            if dynamic.fire_state not in (FireState.BURNED_OUT, FireState.BURNING):
                                threatened += static.population
                                counted.add((r, c))
        return threatened

    def _compute_containment_pct(self) -> float:
        """Compute fire containment percentage."""
        total, contained = self.grid.get_fire_perimeter()
        if total == 0:
            return 100.0
        return round(100.0 * contained / total, 1)