""" lifestack_gym_env.py — Gymnasium-compatible wrapper for LifeStack Exposes the LifeStack environment as a standard gym.Env with: - observation_space: Box(0, 100, shape=(26,)) — 23 sub-metrics + 3 resources - action_space: Discrete(7) — 7 action types mapped to template actions - Standard reset() / step() / render() API """ '''we are not using this as of now, this was been used in old model :)''' import gymnasium as gym import numpy as np from gymnasium import spaces import random, copy from core.life_state import LifeMetrics, ResourceBudget, DependencyGraph from core.metric_schema import normalize_metric_path from core.reward import compute_reward, compute_task_reward from agent.conflict_generator import generate_conflict, ConflictEvent from intake.simperson import SimPerson # Map discrete action IDs to action types ACTION_TYPE_MAP = { 0: "negotiate", 1: "communicate", 2: "delegate", 3: "spend", 4: "reschedule", 5: "rest", 6: "execute", } class LifeStackGymEnv(gym.Env): """ LifeStack as a Gymnasium environment. Observation: 26-dim vector (23 life sub-metrics + 3 resource values) Action: Discrete(7) — one of 7 action types Reward: float in [-1, 1] """ metadata = {"render_modes": ["human", "ansi"]} def __init__(self, task=None, difficulty: int = None, render_mode: str = None, max_steps: int = 30): super().__init__() self.observation_space = spaces.Box( low=0.0, high=100.0, shape=(26,), dtype=np.float32 ) self.action_space = spaces.Discrete(7) self.render_mode = render_mode self.task = task self.difficulty = difficulty self.max_steps = max_steps from core.lifestack_env import LifeStackEnv self.env = LifeStackEnv() self._metric_keys = list(LifeMetrics().flatten().keys()) def _obs_vector(self) -> np.ndarray: flat = self.env.state.current_metrics.flatten() metric_vals = [flat[k] for k in self._metric_keys] budget = self.env.state.budget resource_vals = [ budget.time_hours, budget.money_dollars, budget.energy_units, ] return np.array(metric_vals + resource_vals, dtype=np.float32) def reset(self, seed=None, options=None): super().reset(seed=seed) conflict = None if self.task is None: from agent.conflict_generator import generate_conflict conflict = generate_conflict(self.difficulty) obs_obj = self.env.reset(task=self.task, conflict=conflict) return self._obs_vector(), obs_obj.metadata def step(self, action: int): from core.lifestack_env import LifeStackAction action_type = ACTION_TYPE_MAP[action] # Build logical action from template metric_changes, resource_cost = self._action_to_changes(action_type) # In this wrapper, we pick a reasonable target if needed target = "" current_task = self.env.state.current_task if action_type == "execute" and current_task: for r in current_task.viable_routes: if r.id not in self.env.state.closed_route_ids: target = r.id break ls_action = LifeStackAction( action_type=action_type, target=target, reasoning=f"Agent chose {action_type} for discrete action {action}.", metric_changes=metric_changes, resource_cost=resource_cost, actions_taken=1 ) obs_obj = self.env.step(ls_action) terminated = obs_obj.done # Truncated only if not naturally terminated truncated = (not terminated) and (self.env.state.step_count >= (self.task.horizon if self.task else self.max_steps)) return self._obs_vector(), obs_obj.reward, terminated, truncated, {"breakdown": obs_obj.metadata.get("breakdown", {})} def _action_to_changes(self, action_type: str): """Maps an action type string to (metric_changes, resource_cost).""" templates = { "negotiate": ( {"career.workload": -15.0, "mental_wellbeing.stress_level": -5.0}, {"time": 1.5, "energy": 20.0}, ), "communicate": ( {"relationships.romantic": 10.0, "mental_wellbeing.stress_level": -5.0}, {"time": 0.5, "energy": 10.0}, ), "delegate": ( {"career.workload": -10.0, "relationships.professional_network": -5.0}, {"time": 1.0, "energy": 15.0}, ), "spend": ( {"finances.liquidity": -20.0, "mental_wellbeing.stress_level": -10.0}, {"time": 1.0, "energy": 15.0}, ), "reschedule": ( {"career.workload": -10.0, "time.free_hours_per_week": 5.0}, {"time": 2.0, "energy": 15.0}, ), "rest": ( {"mental_wellbeing.stress_level": -12.0, "physical_health.energy": 10.0}, {"time": 1.0}, ), "execute": ( {}, # executes a route target {"time": 1.0, "energy": 10.0}, ), } return templates.get(action_type, ({}, {})) def render(self): if self.render_mode == "human": # Delegate to the internal env's render self.env.render() # ── Quick smoke test ── if __name__ == "__main__": env = LifeStackGymEnv(difficulty=3, render_mode="human") obs, info = env.reset() print(f"Conflict: {info['conflict_title']} | Person: {info['person']}") print(f"Obs shape: {obs.shape}, dtype: {obs.dtype}") env.render() total = 0.0 done = False while not done: act = env.action_space.sample() obs, rew, term, trunc, info = env.step(act) total += rew done = term or trunc print(f" Action {act} → reward {rew:.3f}") env.render() print(f"\nTotal reward: {total:.3f}")