Spaces:
Sleeping
Sleeping
| """ | |
| environment.py β BEACON reinforcement learning environment. | |
| BEACON (Budget Environment for Agent Control and Optimization of Needs) is a | |
| dual-scale budget management environment with two operating modes: | |
| - "household": personal finance simulation (income in Indian Rupees) | |
| - "corporate": organisational finance simulation | |
| New systems in this version | |
| ββββββββββββββββββββββββββββ | |
| 1. Credit score (CIBIL-style, 300β900) that gates shock probability. | |
| 2. Debt balance with per-period compound interest; auto-funded missed bills. | |
| 3. Compound interest on savings balance. | |
| 4. Per-category inflation that raises minimum requirements each period. | |
| 5. Seasonal income variation (bonus / slow periods). | |
| 6. Expanded shock catalogue (12 per mode) with secondary income / cost effects. | |
| 7. Episode history recorded after every step. | |
| 8. Expanded reward formula (bills, savings, credit, debt, efficiency, penalties). | |
| 9. Four preset load_scenario() class-method shortcuts. | |
| 10. state() extended with all new fields. | |
| """ | |
| import random | |
| from copy import deepcopy | |
| from models import Observation, Action, Reward | |
| # --------------------------------------------------------------------------- | |
| # Module-level configuration constants | |
| # --------------------------------------------------------------------------- | |
| MODES = ("household", "corporate") | |
| # Spending categories available in each mode | |
| CATEGORIES: dict[str, list[str]] = { | |
| "household": [ | |
| "rent", "food", "utilities", "transport", | |
| "education", "medical", "discretionary", | |
| ], | |
| "corporate": [ | |
| "payroll", "operations", "marketing", "logistics", | |
| "capex", "reserves", "miscellaneous", | |
| ], | |
| } | |
| # Income sampling range (inclusive) per mode β household values in Indian Rupees | |
| INCOME_RANGE: dict[str, tuple[float, float]] = { | |
| "household": (30_000.0, 100_000.0), | |
| "corporate": (1_000_000.0, 50_000_000.0), | |
| } | |
| # Expanded shock catalogues β 12 per mode | |
| SHOCKS: dict[str, list[str]] = { | |
| "household": [ | |
| "medical_emergency", | |
| "appliance_repair", | |
| "school_fee_spike", | |
| "utility_surge", | |
| "job_loss_partial", | |
| "flood_damage", | |
| "vehicle_breakdown", | |
| "wedding_expense", | |
| "festival_overspend", | |
| "rent_hike", | |
| "medical_followup", | |
| "education_fee_hike", | |
| ], | |
| "corporate": [ | |
| "vendor_default", | |
| "regulatory_fine", | |
| "equipment_failure", | |
| "key_employee_exit", | |
| "cyber_attack", | |
| "supply_chain_disruption", | |
| "currency_fluctuation", | |
| "client_payment_delay", | |
| "raw_material_spike", | |
| "legal_dispute", | |
| "tax_reassessment", | |
| "market_downturn", | |
| ], | |
| } | |
| # Each shock's direct cost is sampled in [10%, 25%] of total_income | |
| SHOCK_COST_RANGE: tuple[float, float] = (0.10, 0.25) | |
| # Secondary effects: shock name β ("income" | "cost_category", multiplier_delta) | |
| # E.g. "income" with -0.15 means total_income *= (1 - 0.15) | |
| # E.g. "cost_category" with a dict means that category minimum fraction += delta | |
| SHOCK_SECONDARY_EFFECTS: dict[str, dict] = { | |
| # Household | |
| "medical_emergency": {"kind": "income", "delta": -0.15}, | |
| "job_loss_partial": {"kind": "income", "delta": -0.30}, | |
| # Corporate | |
| "vendor_default": {"kind": "cost_cat", "category": "operations", "delta": 0.20}, | |
| "key_employee_exit": {"kind": "cost_cat", "category": "payroll", "delta": 0.15}, | |
| } | |
| # Per-category monthly / quarterly inflation multipliers | |
| INFLATION_RATES: dict[str, dict[str, float]] = { | |
| "household": { | |
| "rent": 1.0, | |
| "food": 1.008, | |
| "utilities": 1.005, | |
| "transport": 1.004, | |
| "education": 1.006, | |
| "medical": 1.010, | |
| "discretionary": 1.003, | |
| }, | |
| "corporate": { | |
| "payroll": 1.015, | |
| "operations": 1.008, | |
| "marketing": 1.005, | |
| "logistics": 1.010, | |
| "capex": 1.003, | |
| "reserves": 1.0, | |
| "miscellaneous": 1.005, | |
| }, | |
| } | |
| # Per-period savings interest rates | |
| SAVINGS_RATE: dict[str, float] = { | |
| "household": 0.005, # 0.5% per month | |
| "corporate": 0.008, # 0.8% per quarter | |
| } | |
| # Per-period debt interest rates | |
| DEBT_INTEREST_RATE: dict[str, float] = { | |
| "household": 0.015, # 18% p.a. β 1.5% per month | |
| "corporate": 0.010, # 12% p.a. β 1% per quarter | |
| } | |
| # Seasonal income multipliers: {period: multiplier} | |
| SEASONAL_INCOME: dict[str, dict[int, float]] = { | |
| "household": {3: 1.30, 6: 0.85}, | |
| "corporate": {2: 1.40, 4: 0.75}, | |
| } | |
| # Credit score β shock probability | |
| CREDIT_SHOCK_PROB: list[tuple[float, float]] = [ | |
| # (min_score_exclusive, probability) β evaluated top-down | |
| (700.0, 0.20), # score > 700 | |
| (500.0, 0.35), # 500 < score β€ 700 | |
| (0.0, 0.50), # score β€ 500 | |
| ] | |
| # --------------------------------------------------------------------------- | |
| # Environment class | |
| # --------------------------------------------------------------------------- | |
| class BEACONEnvironment: | |
| """ | |
| BEACON: Budget Environment for Agent Control and Optimization of Needs. | |
| An OpenEnv-compatible, dual-scale budget management RL environment. | |
| The agent manages a budget over `total_periods` steps, allocating funds | |
| across spending categories, growing savings, and weathering random | |
| financial shocks. | |
| Episode flow:: | |
| obs = env.reset() | |
| while True: | |
| action = agent.act(obs) | |
| obs, reward, done, info = env.step(action) | |
| if done: | |
| break | |
| New in this version: credit scoring, debt management, compound interest, | |
| per-category inflation, seasonal income, expanded shocks with secondary | |
| effects, episode history, expanded reward formula, and preset scenarios. | |
| """ | |
| # ------------------------------------------------------------------ | |
| # Minimum category allocations as a *fraction* of total_income. | |
| # These are mutated each period by the inflation engine, so we store | |
| # live copies on the instance (see reset()). | |
| # Categories with 0.0 are non-essential (no penalty for zero spend). | |
| # ------------------------------------------------------------------ | |
| _BASE_MIN_REQUIREMENTS: dict[str, dict[str, float]] = { | |
| "household": { | |
| "rent": 0.25, | |
| "food": 0.20, | |
| "utilities": 0.08, | |
| "transport": 0.05, | |
| "education": 0.10, | |
| "medical": 0.05, | |
| "discretionary": 0.00, # non-essential | |
| }, | |
| "corporate": { | |
| "payroll": 0.35, | |
| "operations": 0.20, | |
| "marketing": 0.05, | |
| "logistics": 0.08, | |
| "capex": 0.05, | |
| "reserves": 0.10, | |
| "miscellaneous": 0.00, # non-essential | |
| }, | |
| } | |
| def __init__( | |
| self, | |
| mode: str = "household", | |
| total_periods: int = 6, | |
| seed: int = 42, | |
| scenario: str | None = None, | |
| ) -> None: | |
| """ | |
| Initialise the BEACON environment. | |
| Args: | |
| mode: Simulation mode β "household" or "corporate". | |
| total_periods: Number of budget periods in one episode. | |
| seed: Random seed for full reproducibility. | |
| scenario: Optional preset scenario name. If provided the | |
| environment is configured as if load_scenario() | |
| had been called (after standard init). | |
| Raises: | |
| ValueError: If an unrecognised mode is supplied. | |
| """ | |
| if mode not in MODES: | |
| raise ValueError( | |
| f"Invalid mode '{mode}'. Choose one of {MODES}." | |
| ) | |
| self.mode = mode | |
| self.total_periods = total_periods | |
| self.seed = seed | |
| # Isolated RNG β does not pollute global random state | |
| self._rng = random.Random(seed) | |
| # ----- Core state fields (all properly initialised in reset()) ----- | |
| self._period: int = 1 | |
| self._total_income: float = 0.0 | |
| self._base_income: float = 0.0 # pre-seasonal income | |
| self._savings_balance: float = 0.0 | |
| self._savings_goal: float = 0.0 | |
| self._category_budgets: dict[str, float] = {} | |
| self._category_spent: dict[str, float] = {} | |
| self._active_shocks: list[str] = [] | |
| self._shock_costs: dict[str, float] = {} | |
| # Live inflated minimum requirements (fractions of total_income) | |
| self._min_requirements: dict[str, float] = {} | |
| # ----- New systems ------------------------------------------------ | |
| self._credit_score: float = 750.0 | |
| self._debt_balance: float = 0.0 | |
| self._debt_interest_rate: float = DEBT_INTEREST_RATE[mode] | |
| self._savings_rate: float = SAVINGS_RATE[mode] | |
| self._history: list[dict] = [] | |
| self._current_scenario: str | None = scenario | |
| # Scenario-specific overrides applied before first reset | |
| self._scenario_forced_shocks: dict[int, str] = {} # period β shock | |
| self._scenario_income_override: float | None = None | |
| self._scenario_income_by_period: dict[int, float] = {} | |
| self._scenario_starting_debt: float = 0.0 | |
| # Apply preset scenario configuration if requested | |
| if scenario is not None: | |
| self._configure_scenario(scenario) | |
| # Start the first episode immediately | |
| self.reset() | |
| # ------------------------------------------------------------------ | |
| # Preset scenario factory | |
| # ------------------------------------------------------------------ | |
| def load_scenario(cls, scenario_name: str) -> "BEACONEnvironment": | |
| """ | |
| Return a pre-configured BEACONEnvironment for the named scenario. | |
| Available scenarios | |
| ------------------- | |
| ``"fresh_graduate"`` | |
| Household mode, 6 periods. Starts with βΉ1,50,000 student-loan | |
| debt and a modest βΉ35,000 monthly income. Savings goal βΉ50,000. | |
| ``"family_crisis"`` | |
| Household mode, 6 periods. Triggers a medical_emergency shock at | |
| period 2. Income βΉ55,000. | |
| ``"startup_survival"`` | |
| Corporate mode, 4 quarters. Income βΉ8,00,000 but reduced by 40% | |
| in periods 1 and 3 to simulate irregular cash flow. | |
| ``"growth_phase"`` | |
| Corporate mode, 4 quarters. Income βΉ50,00,000, savings goal | |
| βΉ80,00,000. | |
| Args: | |
| scenario_name: One of the scenario keys listed above. | |
| Returns: | |
| A fully configured and reset BEACONEnvironment instance. | |
| Raises: | |
| ValueError: If an unknown scenario name is provided. | |
| """ | |
| valid = {"fresh_graduate", "family_crisis", "startup_survival", "growth_phase"} | |
| if scenario_name not in valid: | |
| raise ValueError( | |
| f"Unknown scenario '{scenario_name}'. Choose from {valid}." | |
| ) | |
| return cls(scenario=scenario_name) | |
| def _configure_scenario(self, name: str) -> None: | |
| """Map scenario name onto instance overrides (called before reset).""" | |
| if name == "fresh_graduate": | |
| self.mode = "household" | |
| self.total_periods = 6 | |
| self.seed = 101 | |
| self._scenario_starting_debt = 150_000.0 | |
| self._scenario_income_override = 35_000.0 | |
| self._savings_goal_override = 50_000.0 | |
| elif name == "family_crisis": | |
| self.mode = "household" | |
| self.total_periods = 6 | |
| self.seed = 202 | |
| self._scenario_income_override = 55_000.0 | |
| self._scenario_forced_shocks = {2: "medical_emergency"} | |
| self._savings_goal_override = None | |
| elif name == "startup_survival": | |
| self.mode = "corporate" | |
| self.total_periods = 4 | |
| self.seed = 303 | |
| self._scenario_income_override = 800_000.0 | |
| # Periods 1 and 3 income reduced by 40% | |
| self._scenario_income_by_period = {1: 0.60, 3: 0.60} | |
| self._savings_goal_override = None | |
| elif name == "growth_phase": | |
| self.mode = "corporate" | |
| self.total_periods = 4 | |
| self.seed = 404 | |
| self._scenario_income_override = 5_000_000.0 | |
| self._savings_goal_override = 8_000_000.0 | |
| # Re-derive per-mode constants after possibly changing self.mode | |
| self._debt_interest_rate = DEBT_INTEREST_RATE[self.mode] | |
| self._savings_rate = SAVINGS_RATE[self.mode] | |
| # ------------------------------------------------------------------ | |
| # Core API | |
| # ------------------------------------------------------------------ | |
| def reset(self) -> Observation: | |
| """ | |
| Reset the environment and begin a new episode. | |
| Re-seeds the internal RNG so that consecutive reset() calls always | |
| produce the same starting state (deterministic reproducibility). | |
| Randomly activates zero or one shock at episode start (probability | |
| gated by credit score). | |
| Returns: | |
| The initial Observation for the new episode. | |
| """ | |
| # Fresh RNG from the same seed β identical episode starts every call | |
| self._rng = random.Random(self.seed) | |
| # --- Sample (or override) income --------------------------------- | |
| if getattr(self, "_scenario_income_override", None) is not None: | |
| self._base_income = float(self._scenario_income_override) | |
| else: | |
| lo, hi = INCOME_RANGE[self.mode] | |
| self._base_income = self._rng.uniform(lo, hi) | |
| self._total_income = self._base_income | |
| # --- Savings goal ------------------------------------------------ | |
| if getattr(self, "_savings_goal_override", None) is not None: | |
| self._savings_goal = float(self._savings_goal_override) | |
| else: | |
| self._savings_goal = 0.20 * self._base_income * self.total_periods | |
| # --- Zero-initialise all category tracking ----------------------- | |
| categories = CATEGORIES[self.mode] | |
| self._category_budgets = {cat: 0.0 for cat in categories} | |
| self._category_spent = {cat: 0.0 for cat in categories} | |
| # --- Live minimum requirements (deep copy so inflation is fresh) - | |
| self._min_requirements = deepcopy( | |
| self._BASE_MIN_REQUIREMENTS[self.mode] | |
| ) | |
| # --- Reset savings, period, and new systems ---------------------- | |
| self._savings_balance = 0.0 | |
| self._period = 1 | |
| self._credit_score = 750.0 | |
| self._history = [] | |
| # Starting debt from scenario (or zero) | |
| self._debt_balance = getattr(self, "_scenario_starting_debt", 0.0) | |
| # --- Clear shock state, then optionally seed one starting shock -- | |
| self._active_shocks = [] | |
| self._shock_costs = {} | |
| shock_prob = self._credit_score_to_shock_prob() | |
| if self._rng.random() < shock_prob: | |
| self._activate_random_shock() | |
| return self._make_observation() | |
| def step(self, action: Action) -> tuple[Observation, Reward, bool, dict]: | |
| """ | |
| Execute one budget period using the agent's action. | |
| Steps performed (in order): | |
| 1. Apply seasonal income variation for the current period. | |
| 2. Apply compound interest to savings balance. | |
| 3. Apply compound interest to debt balance. | |
| 4. Apply scenario-forced shocks for this period (if any). | |
| 5. Apply category allocations β update budgets and spent amounts. | |
| 6. Process debt payment from action. | |
| 7. Add savings contribution. | |
| 8. Apply per-category inflation to minimum requirements. | |
| 9. Identify missed essential bills β add to debt, update credit. | |
| 10. Update credit score based on period outcomes. | |
| 11. Compute multi-component reward. | |
| 12. Record period to episode history. | |
| 13. Advance period counter. | |
| 14. Randomly activate a new shock (probability gated by credit score). | |
| 15. Determine episode termination. | |
| Args: | |
| action: The Action submitted by the agent for this period. | |
| Returns: | |
| observation: New environment state after the step. | |
| reward: Structured Reward for this period. | |
| done: True when the episode has ended. | |
| info: Auxiliary diagnostic data (plain dict). | |
| """ | |
| # ---- 1. Seasonal income adjustment ------------------------------ | |
| season = SEASONAL_INCOME.get(self.mode, {}) | |
| seasonal_factor = season.get(self._period, 1.0) | |
| # Also apply scenario per-period income multipliers | |
| period_override = getattr(self, "_scenario_income_by_period", {}) | |
| scenario_factor = period_override.get(self._period, 1.0) | |
| self._total_income = self._base_income * seasonal_factor * scenario_factor | |
| # ---- 2. Compound interest on savings ---------------------------- | |
| if self._savings_balance > 0: | |
| self._savings_balance *= (1.0 + self._savings_rate) | |
| # ---- 3. Compound interest on debt ------------------------------- | |
| if self._debt_balance > 0: | |
| self._debt_balance *= (1.0 + self._debt_interest_rate) | |
| # ---- 4. Forced shocks (scenario) -------------------------------- | |
| forced_shocks = getattr(self, "_scenario_forced_shocks", {}) | |
| if self._period in forced_shocks: | |
| shock_name = forced_shocks[self._period] | |
| if shock_name not in self._active_shocks: | |
| self._active_shocks.append(shock_name) | |
| cost_fraction = self._rng.uniform(*SHOCK_COST_RANGE) | |
| self._shock_costs[shock_name] = cost_fraction * self._total_income | |
| self._apply_shock_secondary_effect(shock_name) | |
| # ---- 5. Apply category allocations ------------------------------ | |
| for cat, amount in action.allocations.items(): | |
| if cat in self._category_budgets: | |
| self._category_budgets[cat] = amount | |
| self._category_spent[cat] = amount | |
| # ---- 6. Process debt payment ------------------------------------ | |
| debt_payment = max(0.0, getattr(action, "debt_payment", 0.0)) | |
| if debt_payment > 0 and self._debt_balance > 0: | |
| self._debt_balance = max(0.0, self._debt_balance - debt_payment) | |
| # ---- 7. Savings contribution ------------------------------------ | |
| self._savings_balance += action.savings_contribution | |
| # ---- 8. Total spending (allocations + savings + debt payment) --- | |
| total_spent = ( | |
| sum(action.allocations.values()) | |
| + action.savings_contribution | |
| + debt_payment | |
| ) | |
| overspent = total_spent > self._total_income | |
| # ---- 9. Inflation β inflate minimum requirement fractions ------- | |
| inflation = INFLATION_RATES[self.mode] | |
| for cat in self._min_requirements: | |
| self._min_requirements[cat] *= inflation.get(cat, 1.0) | |
| # ---- 10. Identify missed essential bills β add to debt ---------- | |
| minimums = self._min_requirements | |
| essential_cats = { | |
| cat: frac for cat, frac in minimums.items() if frac > 0.0 | |
| } | |
| missed_bills: list[str] = [] | |
| all_essential_paid = True | |
| for cat, min_frac in essential_cats.items(): | |
| min_required = min_frac * self._total_income | |
| allocated = action.allocations.get(cat, 0.0) | |
| if allocated < 0.80 * min_required: | |
| # Missed β shortfall is added to debt | |
| shortfall = min_required - allocated | |
| self._debt_balance += shortfall | |
| missed_bills.append(cat) | |
| all_essential_paid = False | |
| # ---- 11. Update credit score ------------------------------------ | |
| self._update_credit_score( | |
| all_essential_paid=all_essential_paid, | |
| overspent=overspent, | |
| savings_contributed=action.savings_contribution > 0, | |
| ) | |
| # ---- 12. Compute reward ----------------------------------------- | |
| reward = self._calculate_reward(action, total_spent, missed_bills) | |
| # ---- 13. Record period to history -------------------------------- | |
| self._history.append({ | |
| "period": self._period, | |
| "reward": reward.total, | |
| "credit_score": self._credit_score, | |
| "debt_balance": self._debt_balance, | |
| "savings_balance": self._savings_balance, | |
| "shocks": list(self._active_shocks), | |
| "overspent": overspent, | |
| }) | |
| # ---- 14. Advance time period ------------------------------------ | |
| self._period += 1 | |
| # ---- 15. Randomly activate a new shock (credit-gated prob) ------ | |
| shock_prob = self._credit_score_to_shock_prob() | |
| if self._rng.random() < shock_prob: | |
| shock = self._activate_random_shock() | |
| if shock: | |
| self._apply_shock_secondary_effect(shock) | |
| # ---- 16. Episode is done when no periods remain ------------------ | |
| done = self.periods_remaining == 0 | |
| # ---- 17. Diagnostic info dict ------------------------------------ | |
| info: dict = { | |
| "period_completed": self._period - 1, | |
| "total_spent": total_spent, | |
| "total_income": self._total_income, | |
| "overspent": overspent, | |
| "missed_bills": missed_bills, | |
| "active_shocks": list(self._active_shocks), | |
| "shock_costs": dict(self._shock_costs), | |
| "savings_balance": self._savings_balance, | |
| "savings_goal": self._savings_goal, | |
| "periods_remaining": self.periods_remaining, | |
| "credit_score": self._credit_score, | |
| "debt_balance": self._debt_balance, | |
| } | |
| return self._make_observation(), reward, done, info | |
| def state(self) -> dict: | |
| """ | |
| Return the complete current environment state as a plain dictionary. | |
| Useful for logging, checkpointing, or external serialisation without | |
| constructing Pydantic models. | |
| Returns: | |
| A flat dict containing all internal state fields, including the | |
| new credit score, debt, savings rate, inflation rates, and | |
| episode history. | |
| """ | |
| return { | |
| "mode": self.mode, | |
| "period": self._period, | |
| "total_periods": self.total_periods, | |
| "periods_remaining": self.periods_remaining, | |
| "total_income": self._total_income, | |
| "base_income": self._base_income, | |
| "savings_balance": self._savings_balance, | |
| "savings_goal": self._savings_goal, | |
| "savings_rate": self._savings_rate, | |
| "category_budgets": dict(self._category_budgets), | |
| "category_spent": dict(self._category_spent), | |
| "active_shocks": list(self._active_shocks), | |
| "shock_costs": dict(self._shock_costs), | |
| "seed": self.seed, | |
| # --- New fields --- | |
| "credit_score": self._credit_score, | |
| "debt_balance": self._debt_balance, | |
| "debt_interest_rate": self._debt_interest_rate, | |
| "inflation_rates": dict(INFLATION_RATES[self.mode]), | |
| "min_requirements": dict(self._min_requirements), | |
| "episode_history": list(self._history), | |
| "current_scenario": self._current_scenario, | |
| } | |
| # ------------------------------------------------------------------ | |
| # Properties | |
| # ------------------------------------------------------------------ | |
| def periods_remaining(self) -> int: | |
| """Number of budget periods still remaining in the current episode.""" | |
| return max(0, self.total_periods - self._period + 1) | |
| # ------------------------------------------------------------------ | |
| # Private helpers | |
| # ------------------------------------------------------------------ | |
| def _make_observation(self) -> Observation: | |
| """Build and return an Observation from the current internal state.""" | |
| return Observation( | |
| mode=self.mode, | |
| period=self._period, | |
| total_income=self._total_income, | |
| category_budgets=dict(self._category_budgets), | |
| category_spent=dict(self._category_spent), | |
| savings_balance=self._savings_balance, | |
| savings_goal=self._savings_goal, | |
| active_shocks=list(self._active_shocks), | |
| periods_remaining=self.periods_remaining, | |
| # New fields | |
| credit_score=self._credit_score, | |
| debt_balance=self._debt_balance, | |
| debt_interest_rate=self._debt_interest_rate, | |
| episode_history=list(self._history), | |
| ) | |
| # ------------------------------------------------------------------ | |
| # Credit score helpers | |
| # ------------------------------------------------------------------ | |
| def _credit_score_to_shock_prob(self) -> float: | |
| """Return per-period shock probability based on current credit score.""" | |
| score = self._credit_score | |
| for min_score, prob in CREDIT_SHOCK_PROB: | |
| if score > min_score: | |
| return prob | |
| return 0.50 # fallback (score β€ 0 edge case) | |
| def _update_credit_score( | |
| self, | |
| *, | |
| all_essential_paid: bool, | |
| overspent: bool, | |
| savings_contributed: bool, | |
| ) -> None: | |
| """Adjust credit score for the period's financial behaviour.""" | |
| delta = 0.0 | |
| if all_essential_paid: | |
| delta += 15.0 | |
| else: | |
| delta -= 50.0 | |
| if overspent: | |
| delta -= 25.0 | |
| if savings_contributed: | |
| delta += 10.0 | |
| self._credit_score = max(300.0, min(900.0, self._credit_score + delta)) | |
| # ------------------------------------------------------------------ | |
| # Shock helpers | |
| # ------------------------------------------------------------------ | |
| def _activate_random_shock(self) -> str | None: | |
| """ | |
| Select and activate one random shock from the mode's shock pool. | |
| Prefers shocks not currently active. If all shocks are already | |
| active, one is reselected and its cost refreshed. | |
| Cost is sampled uniformly in [10%, 25%] of total_income. | |
| Returns: | |
| The name of the newly activated shock, or None if pool is empty. | |
| """ | |
| available = SHOCKS[self.mode] | |
| if not available: | |
| return None | |
| # Prefer inactive shocks for variety | |
| inactive = [s for s in available if s not in self._active_shocks] | |
| shock = self._rng.choice(inactive if inactive else available) | |
| cost_fraction = self._rng.uniform(*SHOCK_COST_RANGE) | |
| shock_cost = cost_fraction * self._total_income | |
| if shock not in self._active_shocks: | |
| self._active_shocks.append(shock) | |
| # Always refresh the cost (covers re-roll of existing shocks) | |
| self._shock_costs[shock] = shock_cost | |
| return shock | |
| def _apply_shock_secondary_effect(self, shock: str) -> None: | |
| """ | |
| Apply secondary economic effects for shocks that have them. | |
| Secondary effects: | |
| income β self._total_income reduced by |delta| fraction. | |
| cost_cat β the named category's minimum requirement fraction | |
| is increased by delta (higher mandatory spend). | |
| """ | |
| effect = SHOCK_SECONDARY_EFFECTS.get(shock) | |
| if effect is None: | |
| return | |
| if effect["kind"] == "income": | |
| # e.g. delta = -0.15 β income *= 0.85 | |
| self._total_income *= (1.0 + effect["delta"]) | |
| # Also update base_income so seasonal scaling reference stays valid | |
| self._base_income = self._total_income | |
| elif effect["kind"] == "cost_cat": | |
| cat = effect["category"] | |
| delta = effect["delta"] | |
| if cat in self._min_requirements: | |
| self._min_requirements[cat] = min( | |
| 1.0, self._min_requirements[cat] + delta | |
| ) | |
| # ------------------------------------------------------------------ | |
| # Reward computation | |
| # ------------------------------------------------------------------ | |
| def _calculate_reward( | |
| self, | |
| action: Action, | |
| total_spent: float, | |
| missed_bills: list[str], | |
| ) -> Reward: | |
| """ | |
| Compute the structured Reward for the current period. | |
| Component breakdown | |
| ------------------- | |
| bills_paid_score β [0.0, 0.35] | |
| Fraction of essential categories with β₯ 80% of inflated | |
| minimum allocation, scaled by 0.35. | |
| savings_progress_score β [0.0, 0.25] | |
| (savings_balance / savings_goal) Γ 0.25, capped at 0.25. | |
| credit_health_score β [0.0, 0.15] | |
| (credit_score β 300) / 600 Γ 0.15. | |
| debt_management_score β [0.0, 0.15] | |
| Tiered by debt relative to income: | |
| 0 debt β 0.15 | < 20% income β 0.10 | | |
| < 50% income β 0.05 | else β 0.0 | |
| efficiency_score β {0.0, 0.10} | |
| 0.10 if total_spent β€ total_income, else 0.0. | |
| penalties β (ββ, 0.0] | |
| β0.30 per missed essential bill | |
| β0.20 if debt_balance > 50% of income | |
| β0.10 if overspent | |
| β0.15 if credit_score < 500 | |
| total = sum of all, clipped to [β1.0, 1.0]. | |
| Args: | |
| action: Agent's action for this period. | |
| total_spent: Total funds deployed. | |
| missed_bills: List of essential categories underfunded. | |
| Returns: | |
| A fully populated Reward model. | |
| """ | |
| minimums = self._min_requirements | |
| essential_cats = { | |
| cat: frac for cat, frac in minimums.items() if frac > 0.0 | |
| } | |
| total_essential = len(essential_cats) | |
| # --- bills_paid_score --- (max 0.35) ------------------------------ | |
| categories_covered = 0 | |
| for cat, min_frac in essential_cats.items(): | |
| min_required = min_frac * self._total_income | |
| allocated = action.allocations.get(cat, 0.0) | |
| if allocated >= 0.80 * min_required: | |
| categories_covered += 1 | |
| bills_paid_score = ( | |
| (categories_covered / total_essential) * 0.35 | |
| if total_essential > 0 | |
| else 0.35 | |
| ) | |
| # --- savings_progress_score --- (max 0.25) ------------------------- | |
| if self._savings_goal > 0: | |
| raw = (self._savings_balance / self._savings_goal) * 0.25 | |
| savings_progress_score = min(raw, 0.25) | |
| else: | |
| savings_progress_score = 0.0 | |
| # --- credit_health_score --- (max 0.15) --------------------------- | |
| credit_health_score = ((self._credit_score - 300.0) / 600.0) * 0.15 | |
| # --- debt_management_score --- (max 0.15) ------------------------- | |
| if self._debt_balance == 0.0: | |
| debt_management_score = 0.15 | |
| elif self._debt_balance < 0.20 * self._total_income: | |
| debt_management_score = 0.10 | |
| elif self._debt_balance < 0.50 * self._total_income: | |
| debt_management_score = 0.05 | |
| else: | |
| debt_management_score = 0.0 | |
| # --- efficiency_score --- (0.10 or 0.0) --------------------------- | |
| overspent = total_spent > self._total_income | |
| efficiency_score = 0.10 if not overspent else 0.0 | |
| # --- penalties --- (negative) ------------------------------------- | |
| penalties = 0.0 | |
| # β0.30 per essential category underfunded | |
| penalties -= 0.30 * len(missed_bills) | |
| # β0.20 if outstanding debt exceeds 50% of income | |
| if self._debt_balance > 0.50 * self._total_income: | |
| penalties -= 0.20 | |
| # β0.10 for overspending income this period | |
| if overspent: | |
| penalties -= 0.10 | |
| # β0.15 if credit score is dangerously low | |
| if self._credit_score < 500.0: | |
| penalties -= 0.15 | |
| # --- total β clipped to [β1.0, 1.0] ------------------------------ | |
| total = ( | |
| bills_paid_score | |
| + savings_progress_score | |
| + credit_health_score | |
| + debt_management_score | |
| + efficiency_score | |
| + penalties | |
| ) | |
| total = max(-1.0, min(1.0, total)) | |
| return Reward( | |
| total=total, | |
| bills_paid_score=bills_paid_score, | |
| savings_progress_score=savings_progress_score, | |
| efficiency_score=efficiency_score, | |
| credit_health_score=credit_health_score, | |
| debt_management_score=debt_management_score, | |
| shock_resilience_bonus=0.0, # retained for backward compat | |
| penalties=penalties, | |
| ) | |