| from typing import * |
| import random |
| import math |
| import math |
| from copy import deepcopy |
| from app.models import ActionEnum, EventEnum, Observation, Reward |
|
|
| class CrisisSimEnv: |
| def __init__(self, config: Dict[str, Any]): |
| self.config = config |
| self.max_months = config.get("max_months", 12) |
| self.task_difficulty = config.get("difficulty", "medium") |
| self.reset() |
| |
| def reset(self) -> Observation: |
| self.month = 0 |
| |
| |
| self.income = 3000.0 |
| self.expenses = 2000.0 |
| self.savings = 5000.0 |
| self.debt = 1000.0 |
| self.inflation = 0.02 |
| self.currency_value = 1.0 |
| self.food_price_index = 100.0 |
| self.fuel_price = 3.0 |
| self.current_event = EventEnum.none |
| self.bankrupt = False |
| |
| if self.task_difficulty == "easy": |
| self.savings = 8000.0 |
| self.debt = 500.0 |
| elif self.task_difficulty == "medium": |
| self.inflation = 0.05 |
| elif self.task_difficulty == "hard": |
| self.inflation = 0.08 |
| self.savings = 2000.0 |
| self.debt = 3000.0 |
| |
| |
| self.initial_savings = self.savings |
| self.initial_debt = self.debt |
| self.smart_decisions = 0 |
| self.bad_decisions = 0 |
| self.consecutive_negative_months = 0 |
| |
| return self.state() |
| |
| def state(self) -> Observation: |
| return Observation( |
| income=self.income, |
| expenses=self.expenses, |
| savings=self.savings, |
| debt=self.debt, |
| inflation=self.inflation, |
| currency_value=self.currency_value, |
| food_price_index=self.food_price_index, |
| fuel_price=self.fuel_price, |
| current_event=self.current_event.value, |
| month=self.month, |
| bankrupt=self.bankrupt |
| ) |
| |
| def _apply_action(self, action: ActionEnum): |
| |
| if action == ActionEnum.cut_expenses: |
| self.expenses = max(1000.0, self.expenses - 300.0) |
| self.smart_decisions += 1 |
| elif action == ActionEnum.stock_essentials: |
| self.savings -= 500.0 |
| self.expenses += 100.0 |
| |
| self.smart_decisions += 1 |
| elif action == ActionEnum.invest_gold: |
| self.savings -= 1000.0 |
| self.smart_decisions += 1 |
| elif action == ActionEnum.hold_cash: |
| pass |
| elif action == ActionEnum.convert_currency: |
| self.savings -= 50.0 |
| self.smart_decisions += 1 |
| elif action == ActionEnum.take_loan: |
| self.savings += 2000.0 |
| self.debt += 2200.0 |
| self.bad_decisions += 1 |
| elif action == ActionEnum.pay_debt: |
| amount = min(self.savings, self.debt) |
| self.savings -= amount |
| self.debt -= amount |
| if amount > 0: |
| self.smart_decisions += 1 |
| elif action == ActionEnum.reduce_luxury: |
| self.expenses -= 500.0 |
| self.smart_decisions += 1 |
| elif action == ActionEnum.build_emergency_fund: |
| self.savings += 500.0 |
| self.expenses += 500.0 |
| self.smart_decisions += 1 |
| |
| def _trigger_event(self): |
| |
| events = [EventEnum.none] |
| weights = [1.0] |
| |
| if self.task_difficulty == "easy": |
| events.extend([EventEnum.job_loss, EventEnum.currency_crash]) |
| weights.extend([0.05, 0.05]) |
| elif self.task_difficulty == "medium": |
| events.extend([EventEnum.oil_supply_shock, EventEnum.food_shortage]) |
| weights = [0.4, 0.3, 0.3] |
| elif self.task_difficulty == "hard": |
| events.extend([EventEnum.war_outbreak, EventEnum.job_loss, EventEnum.currency_crash, EventEnum.import_ban]) |
| weights = [0.1, 0.3, 0.2, 0.2, 0.2] |
| |
| self.current_event = random.choices(events, weights=weights, k=1)[0] |
| |
| def _apply_event(self): |
| |
| severity_mult = 0.5 if self.task_difficulty == "easy" else 1.0 |
|
|
| if self.current_event == EventEnum.war_outbreak: |
| self.fuel_price *= (1.0 + 0.15 * severity_mult) |
| self.inflation += (0.02 * severity_mult) |
| elif self.current_event == EventEnum.oil_supply_shock: |
| self.fuel_price *= (1.0 + 0.15 * severity_mult) |
| elif self.current_event == EventEnum.currency_crash: |
| self.currency_value *= (1.0 - 0.15 * severity_mult) |
| self.inflation += (0.02 * severity_mult) |
| elif self.current_event == EventEnum.food_shortage: |
| self.food_price_index *= (1.0 + 0.20 * severity_mult) |
| elif self.current_event == EventEnum.job_loss: |
| self.income *= (1.0 - 0.70 * severity_mult) |
| elif self.current_event == EventEnum.import_ban: |
| self.food_price_index *= (1.0 + 0.10 * severity_mult) |
| self.inflation += (0.01 * severity_mult) |
| |
| def _update_economy(self): |
| |
| |
| if self.fuel_price > 3.5: |
| self.food_price_index += (self.fuel_price - 3.5) * 5.0 |
| |
| |
| if self.food_price_index > 120.0: |
| self.inflation += 0.01 * (self.food_price_index / 100.0) |
| |
| |
| self.expenses *= (1.0 + self.inflation) |
| |
| |
| self.debt *= 1.05 |
| |
| |
| self.savings = self.savings + self.income - self.expenses |
| |
| def _check_bankruptcy(self): |
| |
| if self.task_difficulty == "hard": |
| consec_limit = 3 |
| grace_buffer = 10000.0 |
| elif self.task_difficulty == "medium": |
| consec_limit = 4 |
| grace_buffer = 15000.0 |
| else: |
| consec_limit = 6 |
| grace_buffer = 20000.0 |
| |
| |
| if self.savings < -grace_buffer: |
| self.consecutive_negative_months += 1 |
| else: |
| self.consecutive_negative_months = max(0, self.consecutive_negative_months - 1) |
| |
| if self.consecutive_negative_months >= consec_limit: |
| self.bankrupt = True |
| |
| def _compute_reward(self) -> float: |
| survival_score = 1.0 if not self.bankrupt else 0.0 |
| |
| |
| if self.savings > 0: |
| savings_ratio = math.tanh(self.savings / max(1.0, self.initial_savings)) |
| else: |
| savings_ratio = -math.tanh(abs(self.savings) / 5000.0) |
| |
| if self.initial_debt > 0: |
| debt_ratio = 1.0 - math.tanh(self.debt / max(1.0, self.initial_debt)) |
| else: |
| debt_ratio = math.exp(-self.debt / 2000.0) |
| |
| |
| prev_savings = getattr(self, "previous_savings", self.savings) |
| prev_debt = getattr(self, "previous_debt", self.debt) |
| prev_inflation = getattr(self, "previous_inflation", self.inflation) |
| |
| savings_delta = (self.savings - prev_savings) / 1000.0 |
| debt_delta = (prev_debt - self.debt) / 1000.0 |
| inflation_delta = (prev_inflation - self.inflation) * 20.0 |
| |
| |
| momentum_bonus = math.tanh(savings_delta + debt_delta + inflation_delta) * 0.05 |
| |
| smart_bonus_ratio = math.tanh(self.smart_decisions / 5.0) |
| bad_penalty_ratio = math.tanh(self.bad_decisions / 5.0) |
| |
| |
| bad_state_penalty = self.consecutive_negative_months * 0.05 |
| bankruptcy_penalty = 0.1 if self.bankrupt else 0.0 |
| |
| reward = ( |
| survival_score * 0.30 + |
| savings_ratio * 0.15 + |
| debt_ratio * 0.15 + |
| smart_bonus_ratio * 0.15 - |
| bad_penalty_ratio * 0.15 - |
| bad_state_penalty - |
| bankruptcy_penalty + |
| momentum_bonus |
| ) |
| |
| |
| reward += math.tanh(self.month / 12.0) * 0.10 |
| |
| |
| min_reward_floor = 0.15 |
| |
| |
| if reward > 0.90: |
| reward = 0.90 + 0.10 * math.tanh((reward - 0.90) * 5.0) |
| |
| normalized_reward = max(min_reward_floor, min(1.0, reward)) |
| return normalized_reward |
|
|
| def step(self, action: ActionEnum) -> Tuple[Observation, float, bool, Dict[str, Any]]: |
| self.month += 1 |
| |
| |
| self._apply_action(action) |
| |
| |
| self._trigger_event() |
| |
| |
| self._apply_event() |
| |
| |
| self._update_economy() |
| |
| |
| self._check_bankruptcy() |
| |
| |
| reward = self._compute_reward() |
| |
| |
| self.previous_savings = self.savings |
| self.previous_debt = self.debt |
| self.previous_inflation = self.inflation |
| |
| |
| done = self.month >= self.max_months or self.bankrupt |
| |
| return self.state(), reward, done, {"bankrupt": self.bankrupt, "month": self.month} |
|
|