Spaces:

nik-55
/

medchain-openenv-hackathon

Sleeping

File size: 38,609 Bytes

4afc4db

"""
Core simulation engine for the MedChain Env environment.

MedchainSimulation manages the full episode lifecycle:
  - Inventory tracked as FEFO lots per (location, product)
  - Purchase order pipeline with stochastic lead times
  - Event-driven inbox messages (crises, recalls, demand surges)
  - Daily demand generation and fulfillment
"""

from __future__ import annotations

import uuid
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Set, Tuple

import numpy as np

from .tasks import SimEvent, TaskConfig


# ─── Simulation Dataclasses ───────────────────────────────────────────────────

@dataclass
class Lot:
    lot_id: str
    qty: int
    expiry_day: Optional[int]   # None = non-perishable. Expired when current_day >= expiry_day.
    cost_per_unit: float


@dataclass
class PurchaseOrder:
    po_id: str
    supplier_id: str
    product_id: str
    destination_id: str
    quantity: int
    priority: str               # "standard" or "expedited"
    day_submitted: int
    eta_day: int
    unit_cost: float
    total_cost: float
    status: str                 # "pending_justification", "in_transit", "delivered"
    lot_id: str


@dataclass
class PendingBudgetOverride:
    ticket_id: str
    po: PurchaseOrder


@dataclass
class InboxMessage:
    msg_id: str
    priority: str
    timestamp_str: str          # "Day {n} {HH:MM}"
    sender: str
    subject: str
    body: str
    read: bool
    flagged: bool
    event_id: str


@dataclass
class JustificationRecord:
    ticket_id: str
    po_id: str
    reason: str
    is_coherent: bool


@dataclass
class SimState:
    # Episode meta
    task: str
    episode_id: str
    seed: int
    rng: np.random.Generator

    # Time
    day: int
    max_days: int

    # Action budget
    actions_remaining: int
    actions_per_shift: int

    # Budget
    budget_used: float
    budget_limit: float

    # Inventory: (location_id, product_id) -> List[Lot]  (FEFO-sorted)
    inventory: Dict[Tuple[str, str], List[Lot]]

    # Orders
    pipeline_orders: List[PurchaseOrder]
    po_counter: int

    # Inbox
    inbox: List[InboxMessage]
    msg_counter: int

    # Budget override tickets
    pending_overrides: Dict[str, PendingBudgetOverride]

    # Quarantine
    quarantined_lots: Set[str]

    # Demand / fulfillment tracking (one value per completed day)
    daily_demand: List[float]
    daily_fulfilled: List[float]
    daily_critical_demand: List[float]
    daily_critical_fulfilled: List[float]

    # Per-(location, product) daily tracking (for demand_history queries)
    daily_product_demand: Dict[Tuple[str, str], List[int]]
    daily_product_fulfilled: Dict[Tuple[str, str], List[int]]

    # Spend tracking
    total_spend: float
    total_wasted_value: float

    # Transfer tracking (task 2)
    transfer_count: int
    transfer_cost_paid: float

    # Capacity violations (task 2)
    capacity_violation_days: int

    # Active event effects: event_id -> last_day_active (inclusive)
    active_events: Dict[str, int]

    # Per-shift shaping reward helpers
    info_rewards_given_this_shift: Set[str]
    daily_stockout_count: int
    daily_expired_lots: int

    # Task 3 crisis tracking
    justification_log: List[JustificationRecord]
    mci_preemptive_order: bool
    recall_handled_by_day: Optional[int]


# ─── MedchainSimulation ───────────────────────────────────────────────────────

class MedchainSimulation:
    """
    Core simulation engine. Called by MedchainEnvironment's MCP tools.

    All public tool methods return a string (displayed to agent as ERP output).
    end_shift_tool() also stores _last_reward and _done for the environment.
    """

    def __init__(self, task_config: TaskConfig):
        self._task = task_config
        self._state: Optional[SimState] = None
        self._last_reward: float = 0.0
        self._done: bool = False

    # ── Called by environment.reset() ──────────────────────────────────────

    def reset(self, seed: int, episode_id: str) -> str:
        """Initialize a new episode. Returns dashboard text."""
        self._done = False
        self._last_reward = 0.0

        rng = np.random.default_rng(seed)
        self._state = SimState(
            task=self._task.name,
            episode_id=episode_id,
            seed=seed,
            rng=rng,
            day=1,
            max_days=self._task.max_days,
            actions_remaining=self._task.actions_per_shift,
            actions_per_shift=self._task.actions_per_shift,
            budget_used=0.0,
            budget_limit=self._task.budget_limit,
            inventory={},
            pipeline_orders=[],
            po_counter=1,
            inbox=[],
            msg_counter=1,
            pending_overrides={},
            quarantined_lots=set(),
            daily_demand=[],
            daily_fulfilled=[],
            daily_critical_demand=[],
            daily_critical_fulfilled=[],
            daily_product_demand={},
            daily_product_fulfilled={},
            total_spend=0.0,
            total_wasted_value=0.0,
            transfer_count=0,
            transfer_cost_paid=0.0,
            capacity_violation_days=0,
            active_events={},
            info_rewards_given_this_shift=set(),
            daily_stockout_count=0,
            daily_expired_lots=0,
            justification_log=[],
            mci_preemptive_order=False,
            recall_handled_by_day=None,
        )

        self._initialize_inventory()
        self._inject_day1_inbox()

        from .erp_formatter import format_dashboard
        return format_dashboard(self._state, self._task)

    def _initialize_inventory(self):
        """Seed initial inventory: initial_stock_days × base_demand per location/product."""
        state = self._state
        for product in self._task.products:
            for loc_id in product.locations:
                key = (loc_id, product.product_id)
                qty = int(product.base_demand * self._task.initial_stock_days)
                expiry_day = (
                    1 + int(product.shelf_life_days * 0.7)
                    if product.shelf_life_days is not None
                    else None
                )
                lot = Lot(
                    lot_id=f"INIT-{product.product_id}-{loc_id}",
                    qty=qty,
                    expiry_day=expiry_day,
                    cost_per_unit=product.unit_cost,
                )
                state.inventory[key] = [lot]

    def _inject_day1_inbox(self):
        """Add Day 1 inbox messages (welcome + any Day 1 events)."""
        state = self._state
        welcome = InboxMessage(
            msg_id=f"MSG-{state.msg_counter:04d}",
            priority="LOW",
            timestamp_str="Day 1 08:00",
            sender="System",
            subject="Shift Handover Notes",
            body=(
                f"Welcome to the {self._task.name} scenario.\n"
                f"You are managing medical supplies for {self._task.max_days} days.\n"
                f"Action budget: {self._task.actions_per_shift} actions per shift.\n"
                f"Budget ceiling: ${self._task.budget_limit:,.0f} outstanding orders.\n\n"
                "Use read_inbox to check messages, query_erp to check stock,\n"
                "submit_po to order supplies, and end_shift to advance the day."
            ),
            read=False,
            flagged=False,
            event_id="system_welcome",
        )
        state.inbox.append(welcome)
        state.msg_counter += 1
        self._inject_events_for_day(1)

    # ── Action Budget Helper ────────────────────────────────────────────────

    def _check_action_budget(self, tool_name: str) -> Optional[str]:
        """Returns error string if budget exhausted, None if OK. Does NOT decrement."""
        if tool_name == "end_shift":
            return None
        if self._state is None:
            return "ERROR: Environment not initialized. Call reset() first."
        if self._state.actions_remaining <= 0:
            return (
                "ERROR: Action budget exhausted for this shift.\n"
                f"Actions used: {self._state.actions_per_shift}/{self._state.actions_per_shift}\n"
                "Call end_shift() to advance to the next day and restore your action budget."
            )
        return None

    # ── MCP Tool Implementations ────────────────────────────────────────────

    def read_inbox(self, filter: str = "unread") -> str:
        err = self._check_action_budget("read_inbox")
        if err:
            return err
        self._state.actions_remaining -= 1

        messages = list(self._state.inbox)
        if filter == "unread":
            messages = [m for m in messages if not m.read]
        elif filter == "flagged":
            messages = [m for m in messages if m.flagged]
        # "all" → use full inbox

        for m in messages:
            m.read = True

        if not messages:
            return f"INBOX EMPTY\nFilter: {filter} | No messages matching filter."

        lines = []
        for m in messages:
            read_status = "READ" if m.read else "UNREAD"
            lines.append(
                f"\n[MSG {m.msg_id} | {read_status} | PRIORITY: {m.priority} | {m.timestamp_str}]"
            )
            lines.append(f"FROM: {m.sender}")
            lines.append(f"SUBJ: {m.subject}")
            lines.append("")
            lines.append(m.body)
            lines.append("")
        return "\n".join(lines)

    def query_erp(self, table: str, location: str = "all", sku: str = "all") -> str:
        err = self._check_action_budget("query_erp")
        if err:
            return err
        self._state.actions_remaining -= 1

        valid_tables = ["inventory", "expiry", "pipeline_orders", "demand_history"]
        if table not in valid_tables:
            return f"ERROR: Unknown table '{table}'. Valid tables: {valid_tables}"

        from .erp_formatter import (
            format_demand_history,
            format_expiry_table,
            format_inventory_table,
            format_pipeline_table,
        )
        if table == "inventory":
            return format_inventory_table(self._state, self._task, location, sku)
        elif table == "expiry":
            return format_expiry_table(self._state, self._task, location, sku)
        elif table == "pipeline_orders":
            return format_pipeline_table(self._state, location, sku)
        elif table == "demand_history":
            return format_demand_history(self._state, self._task, location, sku)
        return "ERROR: Unexpected table."

    def query_supplier(self, supplier_id: str) -> str:
        err = self._check_action_budget("query_supplier")
        if err:
            return err
        self._state.actions_remaining -= 1

        supplier = next((s for s in self._task.suppliers if s.supplier_id == supplier_id), None)
        if not supplier:
            available = [s.supplier_id for s in self._task.suppliers]
            return f"ERROR: Supplier '{supplier_id}' not found. Available: {available}"

        effective_lead_time = supplier.base_lead_time
        disruption_note = "No disruptions reported."
        for event_id, last_day in self._state.active_events.items():
            event = next((e for e in self._task.events if e.event_id == event_id), None)
            if (
                event
                and event.event_type == "supplier_disruption"
                and event.params.get("supplier_id") == supplier_id
            ):
                effective_lead_time = event.params["new_lead_time"]
                disruption_note = (
                    f"ACTIVE DISRUPTION: Lead time extended to {effective_lead_time} days. "
                    f"Reason: {event.params['reason']}"
                )

        from .erp_formatter import format_supplier_info
        return format_supplier_info(supplier, effective_lead_time, disruption_note)

    def query_forecast(self, product_id: str, location_id: str, horizon_days: int = 7) -> str:
        err = self._check_action_budget("query_forecast")
        if err:
            return err
        self._state.actions_remaining -= 1

        horizon_days = max(1, min(21, horizon_days))
        product = next((p for p in self._task.products if p.product_id == product_id), None)
        if not product:
            return f"ERROR: Product '{product_id}' not found."
        if location_id not in product.locations and location_id != "all":
            return f"ERROR: Product '{product_id}' is not stocked at '{location_id}'."

        from .erp_formatter import format_forecast
        return format_forecast(self._state, self._task, product, location_id, horizon_days)

    def submit_po(
        self,
        supplier_id: str,
        product_id: str,
        destination_id: str,
        quantity: int,
        priority: str = "standard",
    ) -> str:
        err = self._check_action_budget("submit_po")
        if err:
            return err

        if priority not in ("standard", "expedited"):
            return "ERROR: priority must be 'standard' or 'expedited'."
        if quantity <= 0:
            return "ERROR: quantity must be positive."

        supplier = next((s for s in self._task.suppliers if s.supplier_id == supplier_id), None)
        if not supplier:
            return f"ERROR: Supplier '{supplier_id}' not found."
        if product_id not in supplier.products:
            return f"ERROR: Supplier '{supplier_id}' does not supply '{product_id}'."

        valid_locs = [l.location_id for l in self._task.locations]
        if destination_id not in valid_locs:
            return f"ERROR: Destination '{destination_id}' not found. Valid: {valid_locs}"

        product = next((p for p in self._task.products if p.product_id == product_id), None)
        expedited_multiplier = 1.5 if priority == "expedited" else 1.0
        unit_cost  = product.unit_cost * supplier.cost_multiplier * expedited_multiplier
        total_cost = unit_cost * quantity

        if self._state.budget_used + total_cost > self._state.budget_limit:
            overage = (self._state.budget_used + total_cost) - self._state.budget_limit
            return (
                f"ERROR: BUDGET_EXCEEDED\n"
                f"Order cost: ${total_cost:,.2f} | "
                f"Current outstanding: ${self._state.budget_used:,.2f} | "
                f"Limit: ${self._state.budget_limit:,.2f}\n"
                f"Overage: ${overage:,.2f}\n"
                f"Reduce order quantity or wait for existing orders to be delivered."
            )

        # Effective lead time (check active disruptions)
        lead_time = supplier.base_lead_time
        for event_id, last_day in self._state.active_events.items():
            event = next((e for e in self._task.events if e.event_id == event_id), None)
            if (
                event
                and event.event_type == "supplier_disruption"
                and event.params.get("supplier_id") == supplier_id
            ):
                lead_time = event.params["new_lead_time"]

        if priority == "expedited":
            lead_time = max(1, lead_time - 2)

        # Stochastic jitter for task 3
        if supplier.lead_time_std > 0:
            jitter = int(round(self._state.rng.normal(0, supplier.lead_time_std)))
            lead_time = max(1, lead_time + jitter)

        eta_day = self._state.day + lead_time
        po_id  = f"POD-{self._state.po_counter:04d}"
        lot_id = f"LOT-{po_id}"
        self._state.po_counter += 1

        # Expedited: requires justification
        if priority == "expedited":
            ticket_id = f"BOT-{self._state.po_counter:04d}"
            self._state.po_counter += 1
            po = PurchaseOrder(
                po_id=po_id, supplier_id=supplier_id, product_id=product_id,
                destination_id=destination_id, quantity=quantity, priority=priority,
                day_submitted=self._state.day, eta_day=eta_day, unit_cost=unit_cost,
                total_cost=total_cost, status="pending_justification", lot_id=lot_id,
            )
            self._state.pending_overrides[ticket_id] = PendingBudgetOverride(
                ticket_id=ticket_id, po=po
            )
            self._state.actions_remaining -= 1
            return (
                f"ERROR: BUDGET_OVERRIDE_REQUIRED\n"
                f"Order {po_id} ({priority}, ${total_cost:,.2f} incl. 50% expedite premium) "
                f"requires justification.\n"
                f"Ticket ID: {ticket_id}\n"
                f"Use file_justification(ticket_id=\"{ticket_id}\", reason=\"...\") to proceed.\n"
                f"Justification will be audited by Finance. False justifications are flagged."
            )

        # Standard order: submit immediately
        self._state.actions_remaining -= 1
        po = PurchaseOrder(
            po_id=po_id, supplier_id=supplier_id, product_id=product_id,
            destination_id=destination_id, quantity=quantity, priority=priority,
            day_submitted=self._state.day, eta_day=eta_day, unit_cost=unit_cost,
            total_cost=total_cost, status="in_transit", lot_id=lot_id,
        )
        self._state.pipeline_orders.append(po)
        self._state.budget_used += total_cost

        return (
            f"OK — PO {po_id} submitted.\n"
            f"Product: {product_id} × {quantity} units\n"
            f"Supplier: {supplier_id} | Priority: {priority}\n"
            f"Destination: {destination_id} | ETA: Day {eta_day}\n"
            f"Cost: ${total_cost:,.2f} | "
            f"Budget remaining: ${self._state.budget_limit - self._state.budget_used:,.2f}"
        )

    def transfer(
        self,
        from_location_id: str,
        to_location_id: str,
        product_id: str,
        quantity: int,
    ) -> str:
        err = self._check_action_budget("transfer")
        if err:
            return err
        self._state.actions_remaining -= 1

        if quantity <= 0:
            return "ERROR: quantity must be positive."

        valid_locs = {l.location_id for l in self._task.locations}
        if from_location_id not in valid_locs:
            return f"ERROR: Location '{from_location_id}' not found."
        if to_location_id not in valid_locs:
            return f"ERROR: Location '{to_location_id}' not found."

        key_from = (from_location_id, product_id)
        lots = sorted(
            [
                l for l in self._state.inventory.get(key_from, [])
                if l.lot_id not in self._state.quarantined_lots
            ],
            key=lambda l: (l.expiry_day is None, l.expiry_day or 0),
        )
        available = sum(l.qty for l in lots)
        if available < quantity:
            return (
                f"ERROR: Insufficient stock at {from_location_id}. "
                f"Available: {available} units of {product_id}."
            )

        # Check destination capacity (task 2)
        dest_loc = next(
            (l for l in self._task.locations if l.location_id == to_location_id), None
        )
        if dest_loc and dest_loc.capacity is not None:
            current_at_dest = sum(
                sum(lot.qty for lot in lots2)
                for (loc, pid), lots2 in self._state.inventory.items()
                if loc == to_location_id
            )
            if current_at_dest + quantity > dest_loc.capacity:
                return (
                    f"ERROR: CAPACITY_EXCEEDED — {to_location_id} capacity {dest_loc.capacity}. "
                    f"Current: {current_at_dest}, Transfer: {quantity}."
                )

        # FEFO transfer
        remaining = quantity
        key_to = (to_location_id, product_id)
        if key_to not in self._state.inventory:
            self._state.inventory[key_to] = []

        for lot in lots:
            if remaining <= 0:
                break
            take = min(remaining, lot.qty)
            lot.qty -= take
            remaining -= take
            self._state.inventory[key_to].append(
                Lot(
                    lot_id=f"XFR-{lot.lot_id}",
                    qty=take,
                    expiry_day=lot.expiry_day,
                    cost_per_unit=lot.cost_per_unit,
                )
            )

        self._state.inventory[key_from] = [
            l for l in self._state.inventory[key_from] if l.qty > 0
        ]

        TRANSFER_FEE = 0.5
        fee = quantity * TRANSFER_FEE
        self._state.transfer_count += 1
        self._state.transfer_cost_paid += fee

        return (
            f"OK — Transfer complete.\n"
            f"{quantity} units of {product_id}: {from_location_id} → {to_location_id}\n"
            f"Transfer fee: ${fee:.2f}"
        )

    def quarantine_lot(self, location_id: str, sku: str, lot_id: str) -> str:
        err = self._check_action_budget("quarantine_lot")
        if err:
            return err
        self._state.actions_remaining -= 1

        valid_locs = {l.location_id for l in self._task.locations}
        if location_id not in valid_locs:
            return f"ERROR: Location '{location_id}' not found."

        key = (location_id, sku)
        lots = self._state.inventory.get(key, [])

        if lot_id == "all":
            target_lots = [l for l in lots]
        else:
            target_lots = [l for l in lots if l.lot_id == lot_id]
            if not target_lots:
                target_lots = [l for l in lots if lot_id in l.lot_id]

        if not target_lots:
            available_lots = [l.lot_id for l in lots]
            return (
                f"ERROR: Lot '{lot_id}' not found at {location_id} for SKU {sku}. "
                f"Available lots: {available_lots}"
            )

        quarantined_qty = 0
        disposal_ids = []
        for lot in target_lots:
            if lot.lot_id not in self._state.quarantined_lots:
                self._state.quarantined_lots.add(lot.lot_id)
                quarantined_qty += lot.qty
                disposal_ids.append(lot.lot_id)

        # Track recall completion for task 3
        if sku == "IV-SAL-500" and "RECALL-LOT" in lot_id:
            self._check_recall_completion()

        disposal_ticket = f"DIS-{self._state.po_counter:04d}"
        self._state.po_counter += 1

        return (
            f"OK — Quarantine complete.\n"
            f"SKU: {sku} | Location: {location_id}\n"
            f"Lots quarantined: {disposal_ids}\n"
            f"Units quarantined: {quarantined_qty}\n"
            f"Disposal ticket: {disposal_ticket} created."
        )

    def file_justification(self, ticket_id: str, reason: str) -> str:
        err = self._check_action_budget("file_justification")
        if err:
            return err
        self._state.actions_remaining -= 1

        if ticket_id not in self._state.pending_overrides:
            return (
                f"ERROR: Ticket '{ticket_id}' not found or already processed.\n"
                f"Active tickets: {list(self._state.pending_overrides.keys())}"
            )

        override = self._state.pending_overrides.pop(ticket_id)
        po = override.po

        active_event_types: Set[str] = set()
        for event_id in self._state.active_events:
            event = next((e for e in self._task.events if e.event_id == event_id), None)
            if event:
                active_event_types.add(event.event_type)

        from .grader import grade_justification
        is_coherent = grade_justification(reason, active_event_types)
        record = JustificationRecord(
            ticket_id=ticket_id, po_id=po.po_id, reason=reason, is_coherent=is_coherent
        )
        self._state.justification_log.append(record)

        po.status = "in_transit"
        self._state.pipeline_orders.append(po)
        self._state.budget_used += po.total_cost

        audit_note = ""
        if not is_coherent:
            audit_note = (
                "\n[AUDIT FLAG] Justification does not reference active crisis conditions. "
                "Flagged for Finance review. Penalty applied."
            )

        return (
            f"OK — Justification {'accepted' if is_coherent else 'FLAGGED'}. "
            f"PO {po.po_id} submitted.\n"
            f"Product: {po.product_id} × {po.quantity} units | Destination: {po.destination_id}\n"
            f"ETA: Day {po.eta_day} | Cost: ${po.total_cost:,.2f}"
            f"{audit_note}"
        )

    def end_shift_tool(self) -> str:
        """Advance simulation by one day. Stores _last_reward and _done."""
        state = self._state
        if state is None:
            return "ERROR: Environment not initialized."

        day = state.day
        report_lines = [f"╔═══ END OF SHIFT — Day {day} {'═' * 40}╗"]

        # ── Step 1: Deliver arriving orders ──────────────────────────────
        delivered = []
        for po in list(state.pipeline_orders):
            if po.eta_day <= day:
                product = next(
                    (p for p in self._task.products if p.product_id == po.product_id), None
                )
                key = (po.destination_id, po.product_id)
                if key not in state.inventory:
                    state.inventory[key] = []
                expiry_day = (day + product.shelf_life_days) if product.shelf_life_days else None
                lot = Lot(
                    lot_id=po.lot_id, qty=po.quantity,
                    expiry_day=expiry_day, cost_per_unit=po.unit_cost
                )
                state.inventory[key].append(lot)
                state.budget_used -= po.total_cost
                state.total_spend += po.total_cost
                po.status = "delivered"
                delivered.append(po)

        state.pipeline_orders = [po for po in state.pipeline_orders if po.status != "delivered"]
        if delivered:
            report_lines.append(f"  DELIVERIES: {len(delivered)} order(s) received.")

        # ── Step 2: Expire old lots ───────────────────────────────────────
        total_expired_units = 0
        total_expired_value = 0.0
        for key in list(state.inventory.keys()):
            fresh, expired = [], []
            for lot in state.inventory[key]:
                if lot.expiry_day is not None and lot.expiry_day <= day:
                    expired.append(lot)
                else:
                    fresh.append(lot)
            if expired:
                for lot in expired:
                    total_expired_units += lot.qty
                    total_expired_value += lot.qty * lot.cost_per_unit
                    state.total_wasted_value += lot.qty * lot.cost_per_unit
                state.daily_expired_lots += len(expired)
            state.inventory[key] = fresh

        if total_expired_units > 0:
            report_lines.append(
                f"  EXPIRED: {total_expired_units} units (${total_expired_value:,.2f} written off)"
            )

        # ── Step 3: Generate and fulfill demand ───────────────────────────
        day_demand = 0.0
        day_fulfilled = 0.0
        day_critical_demand = 0.0
        day_critical_fulfilled = 0.0

        for product in self._task.products:
            for loc_id in product.locations:
                demand    = self._generate_demand(product, loc_id, day)
                fulfilled = self._fefo_fulfill(product.product_id, loc_id, demand, day)

                day_demand    += demand
                day_fulfilled += fulfilled
                if product.criticality == "CRITICAL":
                    day_critical_demand    += demand
                    day_critical_fulfilled += fulfilled

                # Per-product daily tracking
                key = (loc_id, product.product_id)
                if key not in state.daily_product_demand:
                    state.daily_product_demand[key] = []
                    state.daily_product_fulfilled[key] = []
                state.daily_product_demand[key].append(demand)
                state.daily_product_fulfilled[key].append(fulfilled)

        state.daily_demand.append(day_demand)
        state.daily_fulfilled.append(day_fulfilled)
        state.daily_critical_demand.append(day_critical_demand)
        state.daily_critical_fulfilled.append(day_critical_fulfilled)

        day_svc = day_fulfilled / max(day_demand, 1)
        report_lines.append(
            f"  DEMAND: {int(day_demand)} units | FULFILLED: {int(day_fulfilled)} ({100 * day_svc:.1f}%)"
        )

        # ── Step 4: Check capacity violations (task 2) ────────────────────
        if any(l.capacity is not None for l in self._task.locations):
            for location in self._task.locations:
                if location.capacity is None:
                    continue
                current = sum(
                    sum(lot.qty for lot in lots)
                    for (lid, pid), lots in state.inventory.items()
                    if lid == location.location_id
                )
                if current > location.capacity:
                    state.capacity_violation_days += 1

        # ── Step 5: Inject recall lot for task 3 (Day 2, silent) ─────────
        if self._task.name == "hospital_network_crisis" and day == 2:
            self._inject_recall_lot()

        # ── Step 6: Advance day, reset budget, inject next-day events ────
        state.day += 1
        state.actions_remaining = state.actions_per_shift
        self._update_active_events(state.day)
        self._inject_events_for_day(state.day)

        # ── Step 7: Daily shaping reward ──────────────────────────────────
        shaping = 0.0
        day_service = day_fulfilled / max(day_demand, 1)
        shaping += 0.10 * day_service

        total_units = sum(
            lot.qty
            for lots in state.inventory.values()
            for lot in lots
            if lot.lot_id not in state.quarantined_lots
        )
        shaping -= 0.00005 * total_units
        shaping -= min(0.30, state.daily_expired_lots * 0.10)
        shaping -= min(0.50, state.daily_stockout_count * 0.20)

        state.info_rewards_given_this_shift = set()
        state.daily_stockout_count = 0
        state.daily_expired_lots   = 0

        # ── Step 8: Compute terminal score & check done ───────────────────
        from .grader import compute_reward
        final_score = compute_reward(state, self._task)
        done = state.day > state.max_days

        if done:
            report_lines.append(
                f"╠═══ EPISODE COMPLETE — Final Score: {final_score:.3f} {'═' * 30}╣"
            )
            total_d = sum(state.daily_demand)
            total_f = sum(state.daily_fulfilled)
            report_lines.append(
                f"  Service Level:  {total_f / max(total_d, 1) * 100:.1f}%"
            )
            report_lines.append(f"  Total Spend:    ${state.total_spend:,.2f}")
            report_lines.append(f"  Waste Value:    ${state.total_wasted_value:,.2f}")
            report_lines.append(f"╚{'═' * 68}╝")
            self._done = True
            self._last_reward = final_score
            return "\n".join(report_lines)

        self._done = False
        self._last_reward = shaping
        report_lines.append(
            f"╚═══ Day {day} committed. Day {state.day} begins. {'═' * 38}╝"
        )
        report_lines.append("")
        from .erp_formatter import format_dashboard
        report_lines.append(format_dashboard(state, self._task))
        return "\n".join(report_lines)

    # ── Private Helpers ────────────────────────────────────────────────────

    def _generate_demand(self, product, location_id: str, day: int) -> int:
        import math as _math
        state = self._state
        base  = product.base_demand

        if product.seasonal_amplitude > 0 and product.seasonal_period > 0:
            seasonal = product.seasonal_amplitude * _math.sin(
                2 * _math.pi * day / product.seasonal_period + product.seasonal_phase
            )
            base *= (1 + seasonal)

        for event_id, last_day in state.active_events.items():
            event = next((e for e in self._task.events if e.event_id == event_id), None)
            if event is None:
                continue
            if event.event_type == "mci":
                if (
                    product.criticality in ("CRITICAL", "HIGH")
                    and location_id in event.params.get("locations", [])
                ):
                    base *= event.params.get("demand_multiplier", 3.0)
            elif event.event_type == "demand_surge":
                if product.product_id in event.params.get("products", []):
                    base *= event.params.get("multiplier", 1.4)

        noise = state.rng.normal(0, product.demand_std)
        return max(0, int(round(base + noise)))

    def _fefo_fulfill(
        self, product_id: str, location_id: str, demand: int, day: int
    ) -> int:
        state = self._state
        key   = (location_id, product_id)
        lots  = state.inventory.get(key, [])

        lots_sorted = sorted(
            [l for l in lots if l.lot_id not in state.quarantined_lots and l.qty > 0],
            key=lambda l: (l.expiry_day is None, l.expiry_day or 0),
        )

        fulfilled = 0
        for lot in lots_sorted:
            if fulfilled >= demand:
                break
            take = min(demand - fulfilled, lot.qty)
            lot.qty   -= take
            fulfilled += take

        state.inventory[key] = [l for l in lots if l.qty > 0]

        if fulfilled < demand:
            state.daily_stockout_count += 1

        return fulfilled

    def _update_active_events(self, day: int):
        state = self._state
        state.active_events = {
            eid: last_day
            for eid, last_day in state.active_events.items()
            if last_day >= day
        }
        for event in self._task.events:
            if event.trigger_day == day and event.duration_days > 0:
                state.active_events[event.event_id] = day + event.duration_days - 1

    def _inject_events_for_day(self, day: int):
        state = self._state
        for event in self._task.events:
            if event.trigger_day == day:
                msg = InboxMessage(
                    msg_id=f"MSG-{state.msg_counter:04d}",
                    priority=event.message.priority,
                    timestamp_str=f"Day {day} 06:00",
                    sender=event.message.sender,
                    subject=event.message.subject,
                    body=event.message.body,
                    read=False,
                    flagged=(event.message.priority == "CRITICAL"),
                    event_id=event.event_id,
                )
                state.inbox.append(msg)
                state.msg_counter += 1

                if event.event_type == "cold_chain_breach":
                    self._apply_cold_chain_breach(event)

                if event.event_type == "budget_tighten":
                    state.budget_limit = event.params["new_budget_limit"]

            if event.warning_message and event.trigger_day - 1 == day:
                msg = InboxMessage(
                    msg_id=f"MSG-{state.msg_counter:04d}",
                    priority=event.warning_message.priority,
                    timestamp_str=f"Day {day} 18:00",
                    sender=event.warning_message.sender,
                    subject=event.warning_message.subject,
                    body=event.warning_message.body,
                    read=False,
                    flagged=False,
                    event_id=f"{event.event_id}_warning",
                )
                state.inbox.append(msg)
                state.msg_counter += 1

    def _apply_cold_chain_breach(self, event: SimEvent):
        state = self._state
        loc  = event.params["location_id"]
        prod = event.params["product_id"]
        key  = (loc, prod)
        for lot in state.inventory.get(key, []):
            state.quarantined_lots.add(lot.lot_id)

    def _inject_recall_lot(self):
        state = self._state
        recall_lot_id = "RECALL-LOT-IV2026-9821"
        for event in self._task.events:
            if event.event_id == "iv_saline_recall":
                qty     = event.params["qty_per_location"]
                product = next(
                    (p for p in self._task.products if p.product_id == "IV-SAL-500"), None
                )
                if product is None:
                    break
                for loc_id in event.params["locations_with_lot"]:
                    key = (loc_id, "IV-SAL-500")
                    if key not in state.inventory:
                        state.inventory[key] = []
                    lot = Lot(
                        lot_id=recall_lot_id,
                        qty=qty,
                        expiry_day=None,
                        cost_per_unit=product.unit_cost,
                    )
                    state.inventory[key].append(lot)
                break

    def _check_recall_completion(self):
        state = self._state
        recall_lot_id = "RECALL-LOT-IV2026-9821"
        if recall_lot_id not in state.quarantined_lots:
            return
        if state.recall_handled_by_day is None:
            state.recall_handled_by_day = state.day

    # ── Accessors used by MedchainEnvironment ──────────────────────────────

    def get_last_reward(self) -> float:
        return self._last_reward

    def is_done(self) -> bool:
        return self._done