{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# LifeStack Training Notebook\n", "### AI that handles life's worst Fridays\n", "End-to-end training pipeline for the LifeStack simulation engine." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install groq openai chromadb sentence-transformers gradio matplotlib numpy pydantic openenv-core -q" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Upload all LifeStack .py files\n", "from google.colab import files\n", "print('Upload all LifeStack .py files: life_state.py, reward.py, lifestack_env.py, simperson.py, conflict_generator.py, action_space.py, agent.py, memory.py, run_episode.py, train_trl.py')\n", "uploaded = files.upload()\n", "print(f'Uploaded: {list(uploaded.keys())}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "from google.colab import userdata\n", "# Store your GROQ_API_KEY in Colab Secrets (key icon on left sidebar)\n", "try:\n", " os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')\n", " print('\u2705 API key loaded from Colab Secrets')\n", "except:\n", " os.environ['GROQ_API_KEY'] = 'your_key_here'\n", " print('\u26a0\ufe0f Add your GROQ_API_KEY to Colab Secrets or paste it above')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append('.')\n", "from core.life_state import LifeMetrics, ResourceBudget, DependencyGraph\n", "from core.reward import compute_reward\n", "from core.lifestack_env import LifeStackEnv\n", "from intake.simperson import SimPerson\n", "from agent.conflict_generator import generate_conflict, TaskGenerator\n", "from agent.agent import LifeStackAgent\n", "from agent.memory import LifeStackMemory\n", "\n", "# Use TaskGenerator \u2014 gives a real task with routes, milestones, and events\n", "_gen = TaskGenerator()\n", "task = _gen.generate(domain='flight_crisis', difficulty=3)\n", "conflict = generate_conflict(difficulty=3) # for initial disruption\n", "\n", "env = LifeStackEnv(task=task)\n", "person = SimPerson()\n", "print('\\u2705 All modules loaded')\n", "print(f'\\u2705 Task: {task.goal} | Horizon: {task.horizon} steps | Routes: {len(task.viable_routes)} | Milestones: {len(task.milestones)}')\n", "print(f'\\u2705 Person: {person.get_personality_hint()}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys, os\n", "sys.path.insert(0, os.getcwd()) # ensure project root is importable\n", "from scripts.run_episode import run_episode\n", "print('Running 3 sample episodes...\\n')\n", "rewards = []\n", "for i, diff in enumerate([2, 3, 5], 1):\n", " result = run_episode(difficulty=diff, verbose=False)\n", " rewards.append(result['total_reward'])\n", " print(f'Episode {i} (difficulty {diff}): reward = {result[\"total_reward\"]:.3f} | steps = {result[\"steps\"]} | person = {result[\"person\"]}')\n", "print(f'\\nAverage reward: {sum(rewards)/len(rewards):.3f}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install \"unsloth==2024.12.4\" \"trl>=0.9\" \"transformers>=4.45\" peft accelerate datasets -q\n", "\n", "!python train_trl.py" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import Image, display\n", "import os\n", "\n", "if os.path.exists('grpo_reward_curve.png'):\n", " display(Image('grpo_reward_curve.png'))\n", "elif os.path.exists('trl_reward_curve.png'):\n", " display(Image('trl_reward_curve.png'))\n", "else:\n", " print('Reward curve not found. Did training complete?')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import glob\n", "\n", "checkpoints = glob.glob('lifestack_model/checkpoint-*')\n", "print(f\"Found {len(checkpoints)} checkpoints (saved every 50 steps).\")\n", "for ckpt in sorted(checkpoints):\n", " print(ckpt)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from memory import LifeStackMemory\n", "import shutil, os\n", "\n", "print('=== BEFORE vs AFTER MEMORY ===\\n')\n", "\n", "# Without memory\n", "if os.path.exists('./lifestack_memory'):\n", " shutil.move('./lifestack_memory', './lifestack_memory_backup')\n", "result_no_mem = run_episode(difficulty=5, verbose=False)\n", "print(f'Without memory | Reward: {result_no_mem[\"total_reward\"]:.3f}')\n", "\n", "# With memory\n", "if os.path.exists('./lifestack_memory_backup'):\n", " shutil.move('./lifestack_memory_backup', './lifestack_memory')\n", "result_with_mem = run_episode(difficulty=5, verbose=False)\n", "print(f'With memory | Reward: {result_with_mem[\"total_reward\"]:.3f}')\n", "\n", "improvement = result_with_mem['total_reward'] - result_no_mem['total_reward']\n", "print(f'Improvement : {improvement:+.3f}')\n", "print(f'\\nMemory stats: {LifeStackMemory().get_stats()}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Final Summary\n", "**LifeStack:** Built an AI-driven sandbox for simulating complex life scenarios. It scales based on five fundamental personality traits and models resource budgets.\n", "#### Cited Research:\n", "- Generative Agents (Park et al., 2023)\n", "- Large Language Models as Simulated Economic Agents (Horton, 2023)\n", "- Evaluating LLMs for Social Scenarios (Li et al., 2023)\n", "- Role-Playing in LLMs (Shanahan et al., 2023)\n", "\n", "**Reward Improvement:** Evaluated baseline against retrieval-augmented dynamic memories.\n", "**HuggingFace Demo:** Uploaded to HuggingFace Spaces." ] } ], "metadata": { "colab": { "name": "LifeStack_Training.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }