Spaces:

Jayant2304
/

commitment-os

Sleeping

App Files Files Community

jayantaggarwal-sketch commited on 28 days ago

Commit

27cbc22

1 Parent(s): af8810b

Sync latest GitHub commit and notebook

Browse files

Files changed (1) hide show

training/CommitmentOS_Training.ipynb +95 -0

training/CommitmentOS_Training.ipynb ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# CommitmentOS Training Notebook\\n",
+    "\\n",
+    "This notebook reproduces GRPO training for CommitmentOS using TRL + LoRA."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip -q install --upgrade pip\\n",
+    "!pip -q install openenv trl transformers peft datasets torch accelerate bitsandbytes matplotlib pandas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!git clone https://github.com/Jayant2304/commitment_os.git\\n",
+    "%cd commitment_os\\n",
+    "!python -m pytest tests/test_environment.py -q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!python training/train_grpo.py \\\\\\n",
+    "  --model Qwen/Qwen2.5-1.5B-Instruct \\\\\\n",
+    "  --epochs 2 \\\\\\n",
+    "  --lr 5e-6 \\\\\\n",
+    "  --batch_size 1 \\\\\\n",
+    "  --group_size 2 \\\\\\n",
+    "  --lora_rank 16 \\\\\\n",
+    "  --lora_alpha 32 \\\\\\n",
+    "  --output_dir ./training_output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\\n",
+    "import matplotlib.pyplot as plt\\n",
+    "from pathlib import Path\\n",
+    "\\n",
+    "p = Path('training_output/training_metrics.json')\\n",
+    "logs = json.loads(p.read_text())\\n",
+    "\\n",
+    "steps = [float(x['step']) for x in logs if 'step' in x and 'loss' in x]\\n",
+    "loss = [float(x['loss']) for x in logs if 'step' in x and 'loss' in x]\\n",
+    "r_steps = [float(x['step']) for x in logs if 'step' in x and 'reward' in x]\\n",
+    "rewards = [float(x['reward']) for x in logs if 'step' in x and 'reward' in x]\\n",
+    "\\n",
+    "plt.figure(figsize=(8,5))\\n",
+    "plt.plot(steps, loss, marker='o')\\n",
+    "plt.title('CommitmentOS GRPO Loss vs Step')\\n",
+    "plt.xlabel('Step'); plt.ylabel('Loss'); plt.grid(alpha=0.3)\\n",
+    "plt.tight_layout(); plt.savefig('loss_curve.png', dpi=200); plt.show()\\n",
+    "\\n",
+    "plt.figure(figsize=(8,5))\\n",
+    "plt.plot(r_steps, rewards, marker='o')\\n",
+    "plt.title('CommitmentOS GRPO Reward vs Step')\\n",
+    "plt.xlabel('Step'); plt.ylabel('Reward'); plt.grid(alpha=0.3)\\n",
+    "plt.tight_layout(); plt.savefig('reward_curve.png', dpi=200); plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}