Spaces:

aryanshh
/

NetZero-Nav

Sleeping

File size: 9,798 Bytes

import random
from typing import List, Tuple, Dict
from netzero_nav.models import (
    Observation, Inventory, Shipment, Order, 
    Action, ActionType, TransportMode, PartType
)

class NetZeroEnv:
    # Transport Mode Specs: (Cost multiplier, Speed/ETA, Carbon unit)
    TRANSPORT_SPECS = {
        TransportMode.SEA:  (1.0, 10, 0.1),
        TransportMode.AIR:  (5.0, 2,  2.0),
        TransportMode.RAIL: (2.5, 5,  0.5),
        TransportMode.ROAD: (1.5, 4,  0.8)
    }

    step_count: int
    inventory: Inventory
    active_shipments: List[Shipment]
    pending_orders: List[Order]
    carbon_total: float
    cash_balance: float
    carbon_quota: float
    sea_blocked_until: int
    done: bool

    def __init__(self, task: str = "easy"):
        self.task = task
        self.reset()

    def reset(self, seed: int = 42) -> Observation:
        random.seed(seed)
        self.step_count = 0
        self.inventory = Inventory()
        self.active_shipments: List[Shipment] = []
        self.pending_orders: List[Order] = self._generate_initial_orders()
        self.carbon_total = 0.0
        self.cash_balance = 10000.0
        self.carbon_quota = 1000.0 if self.task == "hard" else 2000.0
        self.sea_blocked_until = 0
        self.done = False
        return self._get_obs()

    def _generate_initial_orders(self) -> List[Order]:
        if self.task == "easy":
            return [
                Order(id="ORD_001", product="EcoPhone", quantity=5, due_date=20, reward=500.0),
                Order(id="ORD_002", product="GreenTab", quantity=3, due_date=30, reward=800.0)
            ]
        elif self.task == "medium":
            self.sea_blocked_until = 15
            return [
                Order(id="ORD_001", product="EcoPhone", quantity=8, due_date=15, reward=800.0),
                Order(id="ORD_002", product="GreenTab", quantity=5, due_date=25, reward=1200.0)
            ]
        else: # hard
            self.sea_blocked_until = 20
            self.carbon_quota = 800.0
            return [
                Order(id="ORD_001", product="EcoPhone", quantity=10, due_date=12, reward=1000.0),
                Order(id="ORD_002", product="GreenTab", quantity=10, due_date=20, reward=2000.0)
            ]

    def _get_obs(self, news: str = None) -> Observation:
        return Observation(
            step=self.step_count,
            inventory=self.inventory,
            active_shipments=self.active_shipments,
            pending_orders=self.pending_orders,
            carbon_total=self.carbon_total,
            carbon_quota=self.carbon_quota,
            cash_balance=self.cash_balance,
            news=news
        )

    def step(self, action: Action) -> Tuple[Observation, float, bool, dict]:
        reward = 0.0
        news = None
        info = {}

        # Day Advancement
        if action.action_type == ActionType.SKIP:
            self.step_count += 1
            if self.sea_blocked_until > 0 and self.step_count > self.sea_blocked_until:
                info["news"] = "Suez route is clear again."
                self.sea_blocked_until = 0

            # Process active shipments
            arrivals = []
            next_shipments = []
            for s in self.active_shipments:
                s.eta -= 1
                if s.eta <= 0:
                    arrivals.append(f"{s.quantity}x {s.part.value}")
                    self._receive_shipment(s)
                else:
                    next_shipments.append(s)
            self.active_shipments = next_shipments
            if arrivals:
                info["arrivals"] = arrivals

            # 3. Check Order Deadlines
            for order in self.pending_orders:
                if self.step_count > order.due_date:
                    reward -= 50.0  # Late penalty

        # 1. Process Actions (No Time Advancement)
        if action.action_type == ActionType.ORDER_PARTS:
            reward += self._handle_order_parts(action, info)
        elif action.action_type == ActionType.PRODUCE:
            reward += self._handle_production(action, info)
        elif action.action_type == ActionType.OFFSET:
            reward += self._handle_offset(action, info)
        elif action.action_type == ActionType.CANCEL:
            reward += self._handle_cancel(action, info)

        # 4. Check Termination
        if self.step_count >= 50 or not self.pending_orders:
            self.done = True
            info["final_score"] = self._calculate_final_score()

        return self._get_obs(news=news), reward, self.done, info

    def _handle_order_parts(self, action: Action, info: dict) -> float:
        if not action.part_type or not action.mode or not action.quantity:
            return -5.0
        
        base_cost = 10.0 * action.quantity
        mult, eta, carbon = self.TRANSPORT_SPECS[action.mode]
        
        # Check Disruption
        if action.mode == TransportMode.SEA and self.step_count < self.sea_blocked_until:
             info["error"] = "Suez Blocked: Sea routes unavailable"
             return -15.0

        total_cost = base_cost * mult
        
        if self.cash_balance < total_cost:
            info["error"] = "Insufficient funds"
            return -10.0
        
        self.cash_balance -= total_cost
        self.carbon_total += carbon * action.quantity
        
        merged = False
        for ship in self.active_shipments:
            if ship.part == action.part_type and ship.mode == action.mode and ship.eta == eta:
                ship.quantity += action.quantity
                ship.cost += total_cost
                ship.carbon_impact += carbon * action.quantity
                merged = True
                break
                
        if not merged:
            new_ship = Shipment(
                id=f"SHP_{random.randint(1000, 9999)}",
                part=action.part_type,
                quantity=action.quantity,
                mode=action.mode,
                eta=eta,
                carbon_impact=carbon * action.quantity,
                cost=total_cost
            )
            self.active_shipments.append(new_ship)
        return 2.0 

    def _handle_cancel(self, action: Action, info: dict) -> float:
        if not action.shipment_id: return 0.0
        
        for i, ship in enumerate(self.active_shipments):
            if ship.id == action.shipment_id:
                # Refund
                self.cash_balance += ship.cost
                self.carbon_total = max(0.0, self.carbon_total - ship.carbon_impact)
                self.active_shipments.pop(i)
                return 0.0
        return 0.0

    def _receive_shipment(self, ship: Shipment):
        current_val = getattr(self.inventory, ship.part.value)
        setattr(self.inventory, ship.part.value, current_val + ship.quantity)

    def _handle_production(self, action: Action, info: dict) -> float:
        if not action.product: return -5.0
        qty = action.quantity if action.quantity else 1
        
        # Determine part requirements depending on product. To keep simulation clean, both use chips.
        req_chips = qty
        req_sensors = qty if action.product == "EcoPhone" else 0
        req_batteries = qty if action.product == "GreenTab" else 0
        
        if self.inventory.chips >= req_chips and self.inventory.sensors >= req_sensors and self.inventory.batteries >= req_batteries:
            self.inventory.chips -= req_chips
            self.inventory.sensors -= req_sensors
            self.inventory.batteries -= req_batteries
            
            total_reward = 10.0 * qty
            remaining_produce = qty
            
            orders_to_remove = []
            for o in self.pending_orders:
                if o.product == action.product and remaining_produce > 0:
                    fulfilled = min(o.quantity, remaining_produce)
                    o.quantity -= fulfilled
                    remaining_produce -= fulfilled
                    if o.quantity <= 0:
                        orders_to_remove.append(o)
                        total_reward += o.reward
                        self.cash_balance += o.reward
                        
            for o in orders_to_remove:
                self.pending_orders.remove(o)
                
            return total_reward
        else:
            info["error"] = "Missing parts for run"
            return -10.0

    def _handle_offset(self, action: Action, info: dict) -> float:
        if not action.offset_amount: return -5.0
        if self.carbon_total <= 0:
            info["error"] = "No carbon footprint to offset"
            return -5.0
            
        cost = action.offset_amount * 2.0
        if self.cash_balance >= cost:
            self.cash_balance -= cost
            self.carbon_total = max(0.0, self.carbon_total - action.offset_amount)
            return 5.0
        else:
            info["error"] = "Insufficient funds for offset"
            return -10.0

    def _handle_reroute(self, action: Action, info: dict) -> float:
        return 0.0

    def _calculate_final_score(self) -> float:
        # Score is primarily based on order fulfillment, penalized by carbon overages
        total_orders = sum(o.quantity for o in self._generate_initial_orders())
        remaining = sum(o.quantity for o in self.pending_orders)
        
        fulfilled_ratio = (total_orders - remaining) / total_orders if total_orders > 0 else 1.0
        score = fulfilled_ratio
        
        # Penalize if carbon quota exceeded
        if self.carbon_total > self.carbon_quota:
            overage = self.carbon_total - self.carbon_quota
            penalty = (overage / self.carbon_quota) * 0.5
            score = max(0.0, score - penalty)
            
        return float(max(0.01, min(0.99, score)))