{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# AutoDataLab++ Chief of Staff Training\\n", "Minimal Colab-friendly scaffold for GRPO/PPO over the Chief of Staff discrete action space." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install -q trl transformers accelerate pandas matplotlib\\n", "from pathlib import Path\\n", "import json, random\\n", "ROOT = Path('/content/autodatalab-plus')\\n", "ACTION_SPACE = [\\n", " {'action_type': 'consult', 'expert_id': 'analyst'},\\n", " {'action_type': 'consult', 'expert_id': 'finance'},\\n", " {'action_type': 'consult', 'expert_id': 'hr'},\\n", " {'action_type': 'consult', 'expert_id': 'strategy'},\\n", " {'action_type': 'summarize'},\\n", " {'action_type': 'submit'},\\n", "]\\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from trl import PPOTrainer # swap to GRPOTrainer if available\\n", "print('Use the environment in ceo_brief_env/environment.py to roll out episodes and map actions to token IDs.')\\n", "print('Checkpoint 0 = random / base model, checkpoint final = post-training.')\\n", "# Save reward curve to training/reward_curves/reward_curve.png after evaluation.\\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.10" } }, "nbformat": 4, "nbformat_minor": 5 }