| """ |
| run_episode.py β LifeStack Full Episode Runner |
| |
| Orchestrates a complete episode: |
| 1. Generate a Task (with correct horizon from task.horizon) and a ConflictEvent |
| 2. Initialize environment, agent, person, and memory |
| 3. Loop up to task.horizon steps: agent decides β action applied β reward computed β memory updated |
| 4. Print a rich episode summary at the end |
| """ |
|
|
| import sys, os; sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| import random |
| from core.life_state import LifeMetrics, ResourceBudget |
| from core.lifestack_env import LifeStackEnv, LifeStackAction |
| from agent.agent import LifeStackAgent |
| from intake.simperson import SimPerson |
| from agent.conflict_generator import generate_conflict, escalate_conflict, adaptive_escalate, TaskGenerator |
| from core.action_space import apply_action, validate_action |
| from agent.memory import LifeStackMemory |
| from core.reward import compute_reward |
| import copy |
|
|
| _TASK_GENERATOR = TaskGenerator() |
|
|
|
|
| def run_episode( |
| difficulty: int = None, |
| verbose: bool = True, |
| memory: "LifeStackMemory" = None, |
| agent: "LifeStackAgent" = None, |
| agent_history: list = None, |
| model_path: str = None, |
| ) -> dict: |
| """ |
| Runs one full LifeStack episode. |
| |
| Args: |
| memory: Optional shared LifeStackMemory instance (avoids re-loading the |
| sentence-transformer model on every episode). |
| agent: Optional shared LifeStackAgent instance (avoids re-creating the |
| Groq client on every episode). |
| agent_history: Optional list of (conflict_id, reward) tuples from prior |
| episodes. Used by adaptive_escalate to decide difficulty. |
| |
| Returns: |
| summary dict with total_reward, steps, final_metrics, conflicts_seen |
| """ |
| |
| |
| |
| if agent is None: |
| agent = LifeStackAgent(local_model_path=model_path) |
| if memory is None: |
| memory = LifeStackMemory() |
| if agent_history is None: |
| agent_history = [] |
|
|
| |
| person_pool = [ |
| SimPerson(name="Alex (Executive)", openness=0.4, conscientiousness=0.9, extraversion=0.7, agreeableness=0.25, neuroticism=0.8), |
| SimPerson(name="Chloe (Creative)", openness=0.9, conscientiousness=0.2, extraversion=0.5, agreeableness=0.70, neuroticism=0.15), |
| SimPerson(name="Sam (Introvert)", openness=0.5, conscientiousness=0.6, extraversion=0.1, agreeableness=0.65, neuroticism=0.9), |
| SimPerson(name="Maya (Family)", openness=0.5, conscientiousness=0.7, extraversion=0.5, agreeableness=0.95, neuroticism=0.3), |
| SimPerson(name="Leo (Student)", openness=0.85,conscientiousness=0.8, extraversion=0.4, agreeableness=0.4, neuroticism=0.55), |
| ] |
| person = random.choice(person_pool) |
|
|
| |
| |
| domain = "flight_crisis" if (difficulty or 2) <= 3 else "code_merge_crisis" |
| task = _TASK_GENERATOR.generate(domain=domain, difficulty=difficulty or random.randint(1, 3)) |
|
|
| |
| conflict = generate_conflict(difficulty) |
| initial_conflict_id = conflict.id |
|
|
| |
| env = LifeStackEnv(task=task) |
|
|
| |
| obs = env.reset(task=task, conflict=conflict, budget=conflict.resource_budget, |
| person=person, agent_history=agent_history) |
| done = obs.done |
|
|
| |
| |
| |
| total_reward = 0.0 |
| step_log = [] |
| conflicts_seen = [conflict.title] |
| route_taken = [] |
| initial_metrics_flat = env.state.current_metrics.flatten() |
|
|
| if verbose: |
| print("\n" + "β" * 60) |
| print(f" LIFESTACK EPISODE β {conflict.title}") |
| print(f" Person : {person.name}") |
| print(f" Hint : {person.get_personality_hint()}") |
| print(f" Story : {conflict.story}") |
| print("β" * 60) |
| env.render() |
|
|
| while not done: |
| step = obs.step |
|
|
| |
| few_shot = memory.build_few_shot_prompt(conflict.title, env.state.current_metrics.flatten()) |
| |
| |
| metrics_before = copy.deepcopy(env.state.current_metrics) |
| budget_before = copy.deepcopy(env.state.budget) |
| |
| action = agent.get_action(env.state.current_metrics, env.state.budget, conflict, person, few_shot_context=few_shot) |
|
|
| |
| is_valid, reason = validate_action(action, env.state.budget) |
| if not is_valid: |
| if verbose: |
| print(f"\n β οΈ Step {step+1}: Action unaffordable ({reason}). Forcing rest.") |
| action.primary.metric_changes = {"mental_wellbeing.stress_level": -3.0} |
| action.primary.resource_cost = {} |
|
|
| |
| current_stress = env.state.current_metrics.mental_wellbeing.stress_level |
| uptake_score = person.respond_to_action( |
| action.primary.action_type, |
| action.primary.resource_cost, |
| current_stress |
| ) |
| scaled_changes = {} |
| |
| for path, delta in action.primary.metric_changes.items(): |
| if '.' not in path: |
| path = f"{action.primary.target_domain}.{path}" |
| |
| try: |
| scaled_changes[path] = float(delta) * uptake_score |
| except ValueError: |
| pass |
|
|
| |
| env_action = LifeStackAction.from_agent_action(action) |
| |
| env_action.metric_changes = scaled_changes |
| obs = env.step(env_action) |
| step_reward = obs.reward or 0.0 |
| done = obs.done |
| total_reward += step_reward |
|
|
| |
| agent.store_decision(action, step_reward) |
| route_taken.append(f"{action.primary.action_type}({action.primary.target_domain})") |
|
|
| |
| step_log.append({ |
| "step": step + 1, |
| "action": action.primary.action_type, |
| "domain": action.primary.target_domain, |
| "description": action.primary.description, |
| "reward": round(step_reward, 3), |
| "penalties": obs.metadata.get("breakdown", {}).get("penalties_fired", []) |
| }) |
|
|
| if verbose: |
| print(f"\n{'β'*60}") |
| print(f" STEP {step+1} β {action.primary.action_type.upper()} on {action.primary.target_domain}") |
| print(f" \"{action.primary.description}\"") |
| if action.communication: |
| print(f" π¬ [{action.communication.recipient}] ({action.communication.tone}): {action.communication.content}") |
| print(f" Reward: {step_reward:.3f} | Penalties: {obs.metadata.get('breakdown', {}).get('penalties_fired') or 'none'}") |
| |
| |
| for msg in obs.metadata.get("info", []): |
| if msg.startswith("DRIFT:"): |
| print(f"\n[DRIFT] {msg[6:]}") |
| if msg.startswith("ESCALATION:"): |
| parts = msg[11:].split(" -> ") |
| reason = parts[0] |
| new_title = parts[1] |
| conflicts_seen.append(new_title) |
| print(f"\nπ₯ ADAPTIVE ESCALATION: {reason}") |
| print(f" New conflict: {new_title}") |
| |
| env.render() |
|
|
| |
| |
| |
| final_flat = env.state.current_metrics.flatten() |
| |
| |
| diffs = [] |
| for k, v_end in final_flat.items(): |
| v_start = initial_metrics_flat.get(k, 0.0) |
| delta = v_end - v_start |
| if abs(delta) >= 1.0: |
| name = k.split('.')[-1] |
| sign = "+" if delta > 0 else "" |
| diffs.append(f"{name}:{sign}{delta:.1f}") |
| metrics_diff_str = ", ".join(diffs) if diffs else "no_change" |
|
|
| |
| memory.store_trajectory( |
| conflict_title=conflict.title, |
| route_taken=" -> ".join(route_taken), |
| total_reward=total_reward, |
| metrics_diff_str=metrics_diff_str, |
| reasoning=f"Resolved with {env.state.step_count} steps. End critical: {len([k for k, v in final_flat.items() if v < 20])}" |
| ) |
| final_flat = env.state.current_metrics.flatten() |
| critical = [k for k, v in final_flat.items() if v < 20] |
| improved = [k for k, v in final_flat.items() if v > 70] |
| mem_stats = memory.get_stats() |
|
|
| if verbose: |
| print("\n" + "β" * 60) |
| print(" EPISODE COMPLETE β FINAL SUMMARY") |
| print("β" * 60) |
| print(f" Person : {person.name}") |
| print(f" Conflicts Seen : {' β '.join(conflicts_seen)}") |
| print(f" Steps Taken : {env.state.step_count}") |
| print(f" Total Reward : {total_reward:.4f}") |
| print(f" Critical (<20) : {critical or 'None'}") |
| print(f" Thriving (>70) : {len(improved)} metrics") |
| print(f"\n Step-by-Step Log:") |
| for s in step_log: |
| flag = " β οΈ " if s["penalties"] else " β
" |
| print(f" {flag} Step {s['step']}: [{s['action']}] on {s['domain']} β {s['reward']:.3f}") |
| print(f"\n Memory Bank : {mem_stats['total_memories']} decisions stored (avg reward: {mem_stats['average_reward']})") |
| print("β" * 60) |
|
|
| return { |
| "person": person.name, |
| "initial_conflict_id": initial_conflict_id, |
| "total_reward": round(total_reward, 4), |
| "steps": env.state.step_count, |
| "conflicts_seen": conflicts_seen, |
| "critical_metrics": critical, |
| "thriving_count": len(improved), |
| "step_log": step_log, |
| "memory_stats": mem_stats |
| } |
|
|
|
|
| if __name__ == "__main__": |
| import argparse |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--model", default=None, help="Path to trained GRPO model (default: auto-detect ./lifestack_model or LIFESTACK_MODEL_PATH)") |
| parser.add_argument("--difficulty", type=int, default=None, help="Fixed difficulty 1-5 (default: varies)") |
| args = parser.parse_args() |
|
|
| shared_agent = LifeStackAgent(local_model_path=args.model) |
| shared_memory = LifeStackMemory(silent=True) |
|
|
| difficulties = [args.difficulty] * 3 if args.difficulty else [2, 3, 5] |
| for d in difficulties: |
| print(f"\n{'β'*60}") |
| print(f" STARTING EPISODE AT DIFFICULTY {d}") |
| print(f"{'β'*60}") |
| summary = run_episode(difficulty=d, verbose=True, agent=shared_agent, memory=shared_memory) |
| print(f"\n β Total Reward: {summary['total_reward']}") |
|
|