train_grpo.ipynb: HF-Jupyter friendly clone + push cells
Browse files- Clone shivam2k3/opensoc-env from the HF Hub (was a github REPLACE_ME).
- Set git user.email/name from HF_USERNAME so commits don't fail.
- Replace 'unsloth[colab-new]' extra with the plain unsloth install.
- Push trained artifacts via huggingface_hub.HfApi (upload_folder for
the LoRA adapter to its own model repo, upload_file for plots+demo
back to the env repo). No git LFS dance, no manual login.
Made-with: Cursor
- train_grpo.ipynb +55 -18
train_grpo.ipynb
CHANGED
|
@@ -35,9 +35,9 @@
|
|
| 35 |
"source": [
|
| 36 |
"%%capture\n",
|
| 37 |
"!pip install --upgrade pip\n",
|
| 38 |
-
"!pip install
|
| 39 |
"!pip install --no-deps trl peft accelerate bitsandbytes\n",
|
| 40 |
-
"!pip install datasets pydantic fastapi"
|
| 41 |
],
|
| 42 |
"execution_count": null,
|
| 43 |
"outputs": []
|
|
@@ -46,12 +46,17 @@
|
|
| 46 |
"cell_type": "code",
|
| 47 |
"metadata": {},
|
| 48 |
"source": [
|
| 49 |
-
"#
|
| 50 |
-
"#
|
|
|
|
| 51 |
"import os\n",
|
| 52 |
-
"
|
| 53 |
-
"
|
| 54 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
],
|
| 56 |
"execution_count": null,
|
| 57 |
"outputs": []
|
|
@@ -211,19 +216,51 @@
|
|
| 211 |
"cell_type": "code",
|
| 212 |
"metadata": {},
|
| 213 |
"source": [
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
| 217 |
-
"
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
"
|
| 221 |
-
"
|
| 222 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
],
|
| 224 |
-
"id": "cbe00541",
|
| 225 |
"execution_count": null,
|
| 226 |
-
"outputs": []
|
|
|
|
| 227 |
},
|
| 228 |
{
|
| 229 |
"cell_type": "markdown",
|
|
|
|
| 35 |
"source": [
|
| 36 |
"%%capture\n",
|
| 37 |
"!pip install --upgrade pip\n",
|
| 38 |
+
"!pip install \"unsloth @ git+https://github.com/unslothai/unsloth.git\"\n",
|
| 39 |
"!pip install --no-deps trl peft accelerate bitsandbytes\n",
|
| 40 |
+
"!pip install datasets pydantic fastapi huggingface_hub"
|
| 41 |
],
|
| 42 |
"execution_count": null,
|
| 43 |
"outputs": []
|
|
|
|
| 46 |
"cell_type": "code",
|
| 47 |
"metadata": {},
|
| 48 |
"source": [
|
| 49 |
+
"# Clone the OpenSOC repo from the Hugging Face Hub (the same repo this\n",
|
| 50 |
+
"# Space is built from). On HF Jupyter, HF_TOKEN is already set in the\n",
|
| 51 |
+
"# notebook env, so the clone is authenticated for write-back later.\n",
|
| 52 |
"import os\n",
|
| 53 |
+
"REPO_URL = \"https://huggingface.co/shivam2k3/opensoc-env\"\n",
|
| 54 |
+
"REPO_DIR = \"opensoc-env\"\n",
|
| 55 |
+
"if not os.path.exists(REPO_DIR):\n",
|
| 56 |
+
" !git clone {REPO_URL} {REPO_DIR}\n",
|
| 57 |
+
"%cd {REPO_DIR}\n",
|
| 58 |
+
"!git config user.email \"{os.environ.get('HF_USERNAME','shivam2k3')}@users.noreply.huggingface.co\"\n",
|
| 59 |
+
"!git config user.name \"{os.environ.get('HF_USERNAME','shivam2k3')}\""
|
| 60 |
],
|
| 61 |
"execution_count": null,
|
| 62 |
"outputs": []
|
|
|
|
| 216 |
"cell_type": "code",
|
| 217 |
"metadata": {},
|
| 218 |
"source": [
|
| 219 |
+
"import os, subprocess\n",
|
| 220 |
+
"from huggingface_hub import HfApi, upload_folder\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"HF_TOKEN = os.environ.get(\"HF_TOKEN\")\n",
|
| 223 |
+
"assert HF_TOKEN, \"HF_TOKEN env var not set. In HF Jupyter: Settings -> Variables and secrets -> add HF_TOKEN.\"\n",
|
| 224 |
+
"\n",
|
| 225 |
+
"api = HfApi(token=HF_TOKEN)\n",
|
| 226 |
+
"\n",
|
| 227 |
+
"# 1) Upload the trained adapter as its own model repo (LoRA, ~30MB).\n",
|
| 228 |
+
"adapter_dir = \"checkpoints/defender_grpo/stage4_adversarial/adapter\"\n",
|
| 229 |
+
"if os.path.isdir(adapter_dir):\n",
|
| 230 |
+
" api.create_repo(\"shivam2k3/opensoc-defender-grpo\", exist_ok=True, private=False)\n",
|
| 231 |
+
" upload_folder(\n",
|
| 232 |
+
" repo_id=\"shivam2k3/opensoc-defender-grpo\",\n",
|
| 233 |
+
" folder_path=adapter_dir,\n",
|
| 234 |
+
" commit_message=\"GRPO-trained Qwen2.5-3B-Instruct LoRA defender adapter\",\n",
|
| 235 |
+
" token=HF_TOKEN,\n",
|
| 236 |
+
" )\n",
|
| 237 |
+
" print(\"✓ adapter uploaded -> https://huggingface.co/shivam2k3/opensoc-defender-grpo\")\n",
|
| 238 |
+
"\n",
|
| 239 |
+
"# 2) Push demo JSON + plots back to the env repo so the Space picks them up.\n",
|
| 240 |
+
"for path in [\n",
|
| 241 |
+
" \"data/demo_examples.json\",\n",
|
| 242 |
+
" \"eval/results/summary.json\",\n",
|
| 243 |
+
" \"eval/results/bar_macro_f1.png\",\n",
|
| 244 |
+
" \"eval/results/bar_dismiss_on_malicious.png\",\n",
|
| 245 |
+
" \"eval/results/confusion_baseline_zero_shot.png\",\n",
|
| 246 |
+
" \"eval/results/confusion_opensoc_grpo.png\",\n",
|
| 247 |
+
" \"eval/results/training_curves.png\",\n",
|
| 248 |
+
" \"eval/results/training_kl_loss.png\",\n",
|
| 249 |
+
"]:\n",
|
| 250 |
+
" if os.path.exists(path):\n",
|
| 251 |
+
" api.upload_file(\n",
|
| 252 |
+
" path_or_fileobj=path,\n",
|
| 253 |
+
" path_in_repo=path,\n",
|
| 254 |
+
" repo_id=\"shivam2k3/opensoc-env\",\n",
|
| 255 |
+
" commit_message=f\"trained: refresh {os.path.basename(path)} from GPU run\",\n",
|
| 256 |
+
" token=HF_TOKEN,\n",
|
| 257 |
+
" )\n",
|
| 258 |
+
" print(f\" pushed {path}\")\n",
|
| 259 |
+
"print(\"✓ refreshed artifacts on https://huggingface.co/shivam2k3/opensoc-env (Space rebuilds automatically)\")"
|
| 260 |
],
|
|
|
|
| 261 |
"execution_count": null,
|
| 262 |
+
"outputs": [],
|
| 263 |
+
"id": "cbe00541"
|
| 264 |
},
|
| 265 |
{
|
| 266 |
"cell_type": "markdown",
|