Spaces:
Sleeping
Sleeping
File size: 3,829 Bytes
27cbc22 d53a65c 27cbc22 d53a65c 27cbc22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | {
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# CommitmentOS Training Notebook\\n\n",
"\\n\n",
"This notebook reproduces GRPO training for CommitmentOS using TRL + LoRA."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5bc9c2fe",
"metadata": {},
"outputs": [],
"source": [
"!pip -q install --upgrade pip\\n\n",
"!pip -q install \"openenv-core>=0.2.0\" trl transformers peft datasets torch accelerate bitsandbytes matplotlib pandas pydantic"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!git clone https://github.com/Jayant2304/commitment_os.git\\n\n",
"%cd commitment_os\\n\n",
"!python -m pytest tests/test_environment.py -q"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!python training/train_grpo.py \\\\\\n\n",
" --model Qwen/Qwen2.5-1.5B-Instruct \\\\\\n\n",
" --epochs 2 \\\\\\n\n",
" --lr 5e-6 \\\\\\n\n",
" --batch_size 1 \\\\\\n\n",
" --group_size 2 \\\\\\n\n",
" --lora_rank 16 \\\\\\n\n",
" --lora_alpha 32 \\\\\\n\n",
" --output_dir ./training_output"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\\n\n",
"import matplotlib.pyplot as plt\\n\n",
"from pathlib import Path\\n\n",
"\\n\n",
"p = Path('training_output/training_metrics.json')\\n\n",
"logs = json.loads(p.read_text())\\n\n",
"\\n\n",
"steps = [float(x['step']) for x in logs if 'step' in x and 'loss' in x]\\n\n",
"loss = [float(x['loss']) for x in logs if 'step' in x and 'loss' in x]\\n\n",
"r_steps = [float(x['step']) for x in logs if 'step' in x and 'reward' in x]\\n\n",
"rewards = [float(x['reward']) for x in logs if 'step' in x and 'reward' in x]\\n\n",
"\\n\n",
"plt.figure(figsize=(8,5))\\n\n",
"plt.plot(steps, loss, marker='o')\\n\n",
"plt.title('CommitmentOS GRPO Loss vs Step')\\n\n",
"plt.xlabel('Step'); plt.ylabel('Loss'); plt.grid(alpha=0.3)\\n\n",
"plt.tight_layout(); plt.savefig('loss_curve.png', dpi=200); plt.show()\\n\n",
"\\n\n",
"plt.figure(figsize=(8,5))\\n\n",
"plt.plot(r_steps, rewards, marker='o')\\n\n",
"plt.title('CommitmentOS GRPO Reward vs Step')\\n\n",
"plt.xlabel('Step'); plt.ylabel('Reward'); plt.grid(alpha=0.3)\\n\n",
"plt.tight_layout(); plt.savefig('reward_curve.png', dpi=200); plt.show()"
]
},
{
"cell_type": "markdown",
"id": "e788b455",
"metadata": {},
"source": [
"### Optional: zip `training_output` for download\n",
"\n",
"Run after training completes. On Colab, use **Files** sidebar or `files.download` for the zip.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b3c760a",
"metadata": {},
"outputs": [],
"source": [
"!cd /content/commitment_os && du -sh training_output && zip -r /content/training_output_only.zip training_output\n",
"from google.colab import files\n",
"\n",
"files.download(\"/content/training_output_only.zip\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|