shivam2k3 commited on
Commit
ddafb99
·
1 Parent(s): 6ba5cca

train_grpo.ipynb: HF-Jupyter friendly clone + push cells

Browse files

- Clone shivam2k3/opensoc-env from the HF Hub (was a github REPLACE_ME).
- Set git user.email/name from HF_USERNAME so commits don't fail.
- Replace 'unsloth[colab-new]' extra with the plain unsloth install.
- Push trained artifacts via huggingface_hub.HfApi (upload_folder for
the LoRA adapter to its own model repo, upload_file for plots+demo
back to the env repo). No git LFS dance, no manual login.

Made-with: Cursor

Files changed (1) hide show
  1. train_grpo.ipynb +55 -18
train_grpo.ipynb CHANGED
@@ -35,9 +35,9 @@
35
  "source": [
36
  "%%capture\n",
37
  "!pip install --upgrade pip\n",
38
- "!pip install 'unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git'\n",
39
  "!pip install --no-deps trl peft accelerate bitsandbytes\n",
40
- "!pip install datasets pydantic fastapi"
41
  ],
42
  "execution_count": null,
43
  "outputs": []
@@ -46,12 +46,17 @@
46
  "cell_type": "code",
47
  "metadata": {},
48
  "source": [
49
- "# If running in Colab and the source isn't yet here, clone it. Replace\n",
50
- "# the placeholder URL with the public repo before publishing the notebook.\n",
 
51
  "import os\n",
52
- "if not os.path.exists('opensoc'):\n",
53
- " !git clone https://github.com/REPLACE_ME/opensoc.git\n",
54
- "%cd opensoc"
 
 
 
 
55
  ],
56
  "execution_count": null,
57
  "outputs": []
@@ -211,19 +216,51 @@
211
  "cell_type": "code",
212
  "metadata": {},
213
  "source": [
214
- "!huggingface-cli login --token $HF_TOKEN 2>/dev/null || echo \"set HF_TOKEN in the notebook env first\"\n",
215
- "!git lfs install\n",
216
- "!git lfs track \"*.safetensors\" \"*.bin\"\n",
217
- "!git add .gitattributes \\\n",
218
- " checkpoints/defender_grpo/stage4_adversarial/adapter \\\n",
219
- " data/demo_examples.json \\\n",
220
- " eval/results/*.png eval/results/summary.json\n",
221
- "!git commit -m \"trained: SFT+GRPO Qwen2.5-3B; eval results, demo data, training curves\"\n",
222
- "!git push origin main"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  ],
224
- "id": "cbe00541",
225
  "execution_count": null,
226
- "outputs": []
 
227
  },
228
  {
229
  "cell_type": "markdown",
 
35
  "source": [
36
  "%%capture\n",
37
  "!pip install --upgrade pip\n",
38
+ "!pip install \"unsloth @ git+https://github.com/unslothai/unsloth.git\"\n",
39
  "!pip install --no-deps trl peft accelerate bitsandbytes\n",
40
+ "!pip install datasets pydantic fastapi huggingface_hub"
41
  ],
42
  "execution_count": null,
43
  "outputs": []
 
46
  "cell_type": "code",
47
  "metadata": {},
48
  "source": [
49
+ "# Clone the OpenSOC repo from the Hugging Face Hub (the same repo this\n",
50
+ "# Space is built from). On HF Jupyter, HF_TOKEN is already set in the\n",
51
+ "# notebook env, so the clone is authenticated for write-back later.\n",
52
  "import os\n",
53
+ "REPO_URL = \"https://huggingface.co/shivam2k3/opensoc-env\"\n",
54
+ "REPO_DIR = \"opensoc-env\"\n",
55
+ "if not os.path.exists(REPO_DIR):\n",
56
+ " !git clone {REPO_URL} {REPO_DIR}\n",
57
+ "%cd {REPO_DIR}\n",
58
+ "!git config user.email \"{os.environ.get('HF_USERNAME','shivam2k3')}@users.noreply.huggingface.co\"\n",
59
+ "!git config user.name \"{os.environ.get('HF_USERNAME','shivam2k3')}\""
60
  ],
61
  "execution_count": null,
62
  "outputs": []
 
216
  "cell_type": "code",
217
  "metadata": {},
218
  "source": [
219
+ "import os, subprocess\n",
220
+ "from huggingface_hub import HfApi, upload_folder\n",
221
+ "\n",
222
+ "HF_TOKEN = os.environ.get(\"HF_TOKEN\")\n",
223
+ "assert HF_TOKEN, \"HF_TOKEN env var not set. In HF Jupyter: Settings -> Variables and secrets -> add HF_TOKEN.\"\n",
224
+ "\n",
225
+ "api = HfApi(token=HF_TOKEN)\n",
226
+ "\n",
227
+ "# 1) Upload the trained adapter as its own model repo (LoRA, ~30MB).\n",
228
+ "adapter_dir = \"checkpoints/defender_grpo/stage4_adversarial/adapter\"\n",
229
+ "if os.path.isdir(adapter_dir):\n",
230
+ " api.create_repo(\"shivam2k3/opensoc-defender-grpo\", exist_ok=True, private=False)\n",
231
+ " upload_folder(\n",
232
+ " repo_id=\"shivam2k3/opensoc-defender-grpo\",\n",
233
+ " folder_path=adapter_dir,\n",
234
+ " commit_message=\"GRPO-trained Qwen2.5-3B-Instruct LoRA defender adapter\",\n",
235
+ " token=HF_TOKEN,\n",
236
+ " )\n",
237
+ " print(\"✓ adapter uploaded -> https://huggingface.co/shivam2k3/opensoc-defender-grpo\")\n",
238
+ "\n",
239
+ "# 2) Push demo JSON + plots back to the env repo so the Space picks them up.\n",
240
+ "for path in [\n",
241
+ " \"data/demo_examples.json\",\n",
242
+ " \"eval/results/summary.json\",\n",
243
+ " \"eval/results/bar_macro_f1.png\",\n",
244
+ " \"eval/results/bar_dismiss_on_malicious.png\",\n",
245
+ " \"eval/results/confusion_baseline_zero_shot.png\",\n",
246
+ " \"eval/results/confusion_opensoc_grpo.png\",\n",
247
+ " \"eval/results/training_curves.png\",\n",
248
+ " \"eval/results/training_kl_loss.png\",\n",
249
+ "]:\n",
250
+ " if os.path.exists(path):\n",
251
+ " api.upload_file(\n",
252
+ " path_or_fileobj=path,\n",
253
+ " path_in_repo=path,\n",
254
+ " repo_id=\"shivam2k3/opensoc-env\",\n",
255
+ " commit_message=f\"trained: refresh {os.path.basename(path)} from GPU run\",\n",
256
+ " token=HF_TOKEN,\n",
257
+ " )\n",
258
+ " print(f\" pushed {path}\")\n",
259
+ "print(\"✓ refreshed artifacts on https://huggingface.co/shivam2k3/opensoc-env (Space rebuilds automatically)\")"
260
  ],
 
261
  "execution_count": null,
262
+ "outputs": [],
263
+ "id": "cbe00541"
264
  },
265
  {
266
  "cell_type": "markdown",