from typing import * import random import math import math from copy import deepcopy from app.models import ActionEnum, EventEnum, Observation, Reward class CrisisSimEnv: def __init__(self, config: Dict[str, Any]): self.config = config self.max_months = config.get("max_months", 12) self.task_difficulty = config.get("difficulty", "medium") self.reset() def reset(self) -> Observation: self.month = 0 # Initial states based on difficulty self.income = 3000.0 self.expenses = 2000.0 self.savings = 5000.0 self.debt = 1000.0 self.inflation = 0.02 self.currency_value = 1.0 # Base 1.0 self.food_price_index = 100.0 self.fuel_price = 3.0 # per unit self.current_event = EventEnum.none self.bankrupt = False if self.task_difficulty == "easy": self.savings = 8000.0 self.debt = 500.0 elif self.task_difficulty == "medium": self.inflation = 0.05 elif self.task_difficulty == "hard": self.inflation = 0.08 self.savings = 2000.0 self.debt = 3000.0 # Metrics for reward calculation self.initial_savings = self.savings self.initial_debt = self.debt self.smart_decisions = 0 self.bad_decisions = 0 self.consecutive_negative_months = 0 return self.state() def state(self) -> Observation: return Observation( income=self.income, expenses=self.expenses, savings=self.savings, debt=self.debt, inflation=self.inflation, currency_value=self.currency_value, food_price_index=self.food_price_index, fuel_price=self.fuel_price, current_event=self.current_event.value, month=self.month, bankrupt=self.bankrupt ) def _apply_action(self, action: ActionEnum): # Default action behavior if action == ActionEnum.cut_expenses: self.expenses = max(1000.0, self.expenses - 300.0) self.smart_decisions += 1 elif action == ActionEnum.stock_essentials: self.savings -= 500.0 self.expenses += 100.0 # higher maintenance # Provides buffer against food price index self.smart_decisions += 1 elif action == ActionEnum.invest_gold: self.savings -= 1000.0 self.smart_decisions += 1 elif action == ActionEnum.hold_cash: pass # No direct change, safe but vulnerable to inflation elif action == ActionEnum.convert_currency: self.savings -= 50.0 # fee self.smart_decisions += 1 elif action == ActionEnum.take_loan: self.savings += 2000.0 self.debt += 2200.0 # interest self.bad_decisions += 1 elif action == ActionEnum.pay_debt: amount = min(self.savings, self.debt) self.savings -= amount self.debt -= amount if amount > 0: self.smart_decisions += 1 elif action == ActionEnum.reduce_luxury: self.expenses -= 500.0 self.smart_decisions += 1 elif action == ActionEnum.build_emergency_fund: self.savings += 500.0 self.expenses += 500.0 # moving from income stream effectively, abstract logic: increase expenses, keep savings higher self.smart_decisions += 1 def _trigger_event(self): # Event probabilities based on difficulty events = [EventEnum.none] weights = [1.0] if self.task_difficulty == "easy": events.extend([EventEnum.job_loss, EventEnum.currency_crash]) weights.extend([0.05, 0.05]) elif self.task_difficulty == "medium": events.extend([EventEnum.oil_supply_shock, EventEnum.food_shortage]) weights = [0.4, 0.3, 0.3] elif self.task_difficulty == "hard": events.extend([EventEnum.war_outbreak, EventEnum.job_loss, EventEnum.currency_crash, EventEnum.import_ban]) weights = [0.1, 0.3, 0.2, 0.2, 0.2] self.current_event = random.choices(events, weights=weights, k=1)[0] def _apply_event(self): # Soften severity for easy task severity_mult = 0.5 if self.task_difficulty == "easy" else 1.0 if self.current_event == EventEnum.war_outbreak: self.fuel_price *= (1.0 + 0.15 * severity_mult) self.inflation += (0.02 * severity_mult) elif self.current_event == EventEnum.oil_supply_shock: self.fuel_price *= (1.0 + 0.15 * severity_mult) elif self.current_event == EventEnum.currency_crash: self.currency_value *= (1.0 - 0.15 * severity_mult) self.inflation += (0.02 * severity_mult) elif self.current_event == EventEnum.food_shortage: self.food_price_index *= (1.0 + 0.20 * severity_mult) elif self.current_event == EventEnum.job_loss: self.income *= (1.0 - 0.70 * severity_mult) # 30% retention normally, 65% on easy elif self.current_event == EventEnum.import_ban: self.food_price_index *= (1.0 + 0.10 * severity_mult) self.inflation += (0.01 * severity_mult) def _update_economy(self): # Cause-effect propagation # Fuel price increases transport costs, making food more expensive if self.fuel_price > 3.5: self.food_price_index += (self.fuel_price - 3.5) * 5.0 # Food price -> inflation if self.food_price_index > 120.0: self.inflation += 0.01 * (self.food_price_index / 100.0) # Apply inflation to expenses self.expenses *= (1.0 + self.inflation) # Debts accrue interest (e.g., 5% per month minimum) self.debt *= 1.05 # Update savings self.savings = self.savings + self.income - self.expenses def _check_bankruptcy(self): # Determine strictness if self.task_difficulty == "hard": consec_limit = 3 grace_buffer = 10000.0 elif self.task_difficulty == "medium": consec_limit = 4 grace_buffer = 15000.0 else: # easy consec_limit = 6 grace_buffer = 20000.0 # Register bad months vs recovery if self.savings < -grace_buffer: self.consecutive_negative_months += 1 else: self.consecutive_negative_months = max(0, self.consecutive_negative_months - 1) if self.consecutive_negative_months >= consec_limit: self.bankrupt = True def _compute_reward(self) -> float: survival_score = 1.0 if not self.bankrupt else 0.0 # Soft scaling: asymptotic curves instead of hard caps if self.savings > 0: savings_ratio = math.tanh(self.savings / max(1.0, self.initial_savings)) else: savings_ratio = -math.tanh(abs(self.savings) / 5000.0) if self.initial_debt > 0: debt_ratio = 1.0 - math.tanh(self.debt / max(1.0, self.initial_debt)) else: debt_ratio = math.exp(-self.debt / 2000.0) # State change dynamics (monthly deltas) prev_savings = getattr(self, "previous_savings", self.savings) prev_debt = getattr(self, "previous_debt", self.debt) prev_inflation = getattr(self, "previous_inflation", self.inflation) savings_delta = (self.savings - prev_savings) / 1000.0 debt_delta = (prev_debt - self.debt) / 1000.0 inflation_delta = (prev_inflation - self.inflation) * 20.0 # Small dynamic variation based on monthly momentum (+/- 0.05) momentum_bonus = math.tanh(savings_delta + debt_delta + inflation_delta) * 0.05 smart_bonus_ratio = math.tanh(self.smart_decisions / 5.0) bad_penalty_ratio = math.tanh(self.bad_decisions / 5.0) # Gradual penalty over steps instead of instant max punishment bad_state_penalty = self.consecutive_negative_months * 0.05 bankruptcy_penalty = 0.1 if self.bankrupt else 0.0 reward = ( survival_score * 0.30 + savings_ratio * 0.15 + debt_ratio * 0.15 + smart_bonus_ratio * 0.15 - bad_penalty_ratio * 0.15 - bad_state_penalty - bankruptcy_penalty + momentum_bonus ) # Add soft-scaled survival bonus per step reward += math.tanh(self.month / 12.0) * 0.10 # Normalize reward Incrementally to [0,1] with a floor to prevent instant 0.0 drops min_reward_floor = 0.15 # Soft clamp near the top to prevent flatlining at 1.00 or 0.90 if reward > 0.90: reward = 0.90 + 0.10 * math.tanh((reward - 0.90) * 5.0) normalized_reward = max(min_reward_floor, min(1.0, reward)) return normalized_reward def step(self, action: ActionEnum) -> Tuple[Observation, float, bool, Dict[str, Any]]: self.month += 1 # 1. Apply agent action self._apply_action(action) # 2. Random events self._trigger_event() # 3. Apply event impact self._apply_event() # 4. Update inflation and prices, savings self._update_economy() # 5. Check bankruptcy condition self._check_bankruptcy() # 6. Compute reward reward = self._compute_reward() # Store state variables to compare deltas next month self.previous_savings = self.savings self.previous_debt = self.debt self.previous_inflation = self.inflation # Check termination done = self.month >= self.max_months or self.bankrupt return self.state(), reward, done, {"bankrupt": self.bankrupt, "month": self.month}