File size: 7,323 Bytes
77da5ce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | {
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# LifeStack Training Notebook\n",
"### AI that handles life's worst Fridays\n",
"End-to-end training pipeline for the LifeStack simulation engine."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install groq openai chromadb sentence-transformers gradio matplotlib numpy pydantic openenv-core -q"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Upload all LifeStack .py files\n",
"from google.colab import files\n",
"print('Upload all LifeStack .py files: life_state.py, reward.py, lifestack_env.py, simperson.py, conflict_generator.py, action_space.py, agent.py, memory.py, run_episode.py, train_trl.py')\n",
"uploaded = files.upload()\n",
"print(f'Uploaded: {list(uploaded.keys())}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from google.colab import userdata\n",
"# Store your GROQ_API_KEY in Colab Secrets (key icon on left sidebar)\n",
"try:\n",
" os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')\n",
" print('\u2705 API key loaded from Colab Secrets')\n",
"except:\n",
" os.environ['GROQ_API_KEY'] = 'your_key_here'\n",
" print('\u26a0\ufe0f Add your GROQ_API_KEY to Colab Secrets or paste it above')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('.')\n",
"from core.life_state import LifeMetrics, ResourceBudget, DependencyGraph\n",
"from core.reward import compute_reward\n",
"from core.lifestack_env import LifeStackEnv\n",
"from intake.simperson import SimPerson\n",
"from agent.conflict_generator import generate_conflict, TaskGenerator\n",
"from agent.agent import LifeStackAgent\n",
"from agent.memory import LifeStackMemory\n",
"\n",
"# Use TaskGenerator \u2014 gives a real task with routes, milestones, and events\n",
"_gen = TaskGenerator()\n",
"task = _gen.generate(domain='flight_crisis', difficulty=3)\n",
"conflict = generate_conflict(difficulty=3) # for initial disruption\n",
"\n",
"env = LifeStackEnv(task=task)\n",
"person = SimPerson()\n",
"print('\\u2705 All modules loaded')\n",
"print(f'\\u2705 Task: {task.goal} | Horizon: {task.horizon} steps | Routes: {len(task.viable_routes)} | Milestones: {len(task.milestones)}')\n",
"print(f'\\u2705 Person: {person.get_personality_hint()}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys, os\n",
"sys.path.insert(0, os.getcwd()) # ensure project root is importable\n",
"from scripts.run_episode import run_episode\n",
"print('Running 3 sample episodes...\\n')\n",
"rewards = []\n",
"for i, diff in enumerate([2, 3, 5], 1):\n",
" result = run_episode(difficulty=diff, verbose=False)\n",
" rewards.append(result['total_reward'])\n",
" print(f'Episode {i} (difficulty {diff}): reward = {result[\"total_reward\"]:.3f} | steps = {result[\"steps\"]} | person = {result[\"person\"]}')\n",
"print(f'\\nAverage reward: {sum(rewards)/len(rewards):.3f}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install \"unsloth==2024.12.4\" \"trl>=0.9\" \"transformers>=4.45\" peft accelerate datasets -q\n",
"\n",
"!python train_trl.py"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import Image, display\n",
"import os\n",
"\n",
"if os.path.exists('grpo_reward_curve.png'):\n",
" display(Image('grpo_reward_curve.png'))\n",
"elif os.path.exists('trl_reward_curve.png'):\n",
" display(Image('trl_reward_curve.png'))\n",
"else:\n",
" print('Reward curve not found. Did training complete?')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import glob\n",
"\n",
"checkpoints = glob.glob('lifestack_model/checkpoint-*')\n",
"print(f\"Found {len(checkpoints)} checkpoints (saved every 50 steps).\")\n",
"for ckpt in sorted(checkpoints):\n",
" print(ckpt)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from memory import LifeStackMemory\n",
"import shutil, os\n",
"\n",
"print('=== BEFORE vs AFTER MEMORY ===\\n')\n",
"\n",
"# Without memory\n",
"if os.path.exists('./lifestack_memory'):\n",
" shutil.move('./lifestack_memory', './lifestack_memory_backup')\n",
"result_no_mem = run_episode(difficulty=5, verbose=False)\n",
"print(f'Without memory | Reward: {result_no_mem[\"total_reward\"]:.3f}')\n",
"\n",
"# With memory\n",
"if os.path.exists('./lifestack_memory_backup'):\n",
" shutil.move('./lifestack_memory_backup', './lifestack_memory')\n",
"result_with_mem = run_episode(difficulty=5, verbose=False)\n",
"print(f'With memory | Reward: {result_with_mem[\"total_reward\"]:.3f}')\n",
"\n",
"improvement = result_with_mem['total_reward'] - result_no_mem['total_reward']\n",
"print(f'Improvement : {improvement:+.3f}')\n",
"print(f'\\nMemory stats: {LifeStackMemory().get_stats()}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Final Summary\n",
"**LifeStack:** Built an AI-driven sandbox for simulating complex life scenarios. It scales based on five fundamental personality traits and models resource budgets.\n",
"#### Cited Research:\n",
"- Generative Agents (Park et al., 2023)\n",
"- Large Language Models as Simulated Economic Agents (Horton, 2023)\n",
"- Evaluating LLMs for Social Scenarios (Li et al., 2023)\n",
"- Role-Playing in LLMs (Shanahan et al., 2023)\n",
"\n",
"**Reward Improvement:** Evaluated baseline against retrieval-augmented dynamic memories.\n",
"**HuggingFace Demo:** Uploaded to HuggingFace Spaces."
]
}
],
"metadata": {
"colab": {
"name": "LifeStack_Training.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
} |