train_grpo.ipynb: HF-Jupyter friendly clone + push cells

- Clone shivam2k3/opensoc-env from the HF Hub (was a github REPLACE_ME).
- Set git user.email/name from HF_USERNAME so commits don't fail.
- Replace 'unsloth[colab-new]' extra with the plain unsloth install.
- Push trained artifacts via huggingface_hub.HfApi (upload_folder for
the LoRA adapter to its own model repo, upload_file for plots+demo
back to the env repo). No git LFS dance, no manual login.

Made-with: Cursor

Files changed (1) hide show

train_grpo.ipynb +55 -18

train_grpo.ipynb CHANGED Viewed

@@ -35,9 +35,9 @@
       "source": [
         "%%capture\n",
         "!pip install --upgrade pip\n",
-        "!pip install 'unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git'\n",
         "!pip install --no-deps trl peft accelerate bitsandbytes\n",
-        "!pip install datasets pydantic fastapi"
       ],
       "execution_count": null,
       "outputs": []
@@ -46,12 +46,17 @@
       "cell_type": "code",
       "metadata": {},
       "source": [
-        "# If running in Colab and the source isn't yet here, clone it. Replace\n",
-        "# the placeholder URL with the public repo before publishing the notebook.\n",
         "import os\n",
-        "if not os.path.exists('opensoc'):\n",
-        "    !git clone https://github.com/REPLACE_ME/opensoc.git\n",
-        "%cd opensoc"
       ],
       "execution_count": null,
       "outputs": []
@@ -211,19 +216,51 @@
       "cell_type": "code",
       "metadata": {},
       "source": [
-        "!huggingface-cli login --token $HF_TOKEN 2>/dev/null || echo \"set HF_TOKEN in the notebook env first\"\n",
-        "!git lfs install\n",
-        "!git lfs track \"*.safetensors\" \"*.bin\"\n",
-        "!git add .gitattributes \\\n",
-        "  checkpoints/defender_grpo/stage4_adversarial/adapter \\\n",
-        "  data/demo_examples.json \\\n",
-        "  eval/results/*.png eval/results/summary.json\n",
-        "!git commit -m \"trained: SFT+GRPO Qwen2.5-3B; eval results, demo data, training curves\"\n",
-        "!git push origin main"
       ],
-      "id": "cbe00541",
       "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",

       "source": [
         "%%capture\n",
         "!pip install --upgrade pip\n",
+        "!pip install \"unsloth @ git+https://github.com/unslothai/unsloth.git\"\n",
         "!pip install --no-deps trl peft accelerate bitsandbytes\n",
+        "!pip install datasets pydantic fastapi huggingface_hub"
       ],
       "execution_count": null,
       "outputs": []
       "cell_type": "code",
       "metadata": {},
       "source": [
+        "# Clone the OpenSOC repo from the Hugging Face Hub (the same repo this\n",
+        "# Space is built from). On HF Jupyter, HF_TOKEN is already set in the\n",
+        "# notebook env, so the clone is authenticated for write-back later.\n",
         "import os\n",
+        "REPO_URL = \"https://huggingface.co/shivam2k3/opensoc-env\"\n",
+        "REPO_DIR = \"opensoc-env\"\n",
+        "if not os.path.exists(REPO_DIR):\n",
+        "    !git clone {REPO_URL} {REPO_DIR}\n",
+        "%cd {REPO_DIR}\n",
+        "!git config user.email \"{os.environ.get('HF_USERNAME','shivam2k3')}@users.noreply.huggingface.co\"\n",
+        "!git config user.name \"{os.environ.get('HF_USERNAME','shivam2k3')}\""
       ],
       "execution_count": null,
       "outputs": []
       "cell_type": "code",
       "metadata": {},
       "source": [
+        "import os, subprocess\n",
+        "from huggingface_hub import HfApi, upload_folder\n",
+        "\n",
+        "HF_TOKEN = os.environ.get(\"HF_TOKEN\")\n",
+        "assert HF_TOKEN, \"HF_TOKEN env var not set. In HF Jupyter: Settings -> Variables and secrets -> add HF_TOKEN.\"\n",
+        "\n",
+        "api = HfApi(token=HF_TOKEN)\n",
+        "\n",
+        "# 1) Upload the trained adapter as its own model repo (LoRA, ~30MB).\n",
+        "adapter_dir = \"checkpoints/defender_grpo/stage4_adversarial/adapter\"\n",
+        "if os.path.isdir(adapter_dir):\n",
+        "    api.create_repo(\"shivam2k3/opensoc-defender-grpo\", exist_ok=True, private=False)\n",
+        "    upload_folder(\n",
+        "        repo_id=\"shivam2k3/opensoc-defender-grpo\",\n",
+        "        folder_path=adapter_dir,\n",
+        "        commit_message=\"GRPO-trained Qwen2.5-3B-Instruct LoRA defender adapter\",\n",
+        "        token=HF_TOKEN,\n",
+        "    )\n",
+        "    print(\"✓ adapter uploaded -> https://huggingface.co/shivam2k3/opensoc-defender-grpo\")\n",
+        "\n",
+        "# 2) Push demo JSON + plots back to the env repo so the Space picks them up.\n",
+        "for path in [\n",
+        "    \"data/demo_examples.json\",\n",
+        "    \"eval/results/summary.json\",\n",
+        "    \"eval/results/bar_macro_f1.png\",\n",
+        "    \"eval/results/bar_dismiss_on_malicious.png\",\n",
+        "    \"eval/results/confusion_baseline_zero_shot.png\",\n",
+        "    \"eval/results/confusion_opensoc_grpo.png\",\n",
+        "    \"eval/results/training_curves.png\",\n",
+        "    \"eval/results/training_kl_loss.png\",\n",
+        "]:\n",
+        "    if os.path.exists(path):\n",
+        "        api.upload_file(\n",
+        "            path_or_fileobj=path,\n",
+        "            path_in_repo=path,\n",
+        "            repo_id=\"shivam2k3/opensoc-env\",\n",
+        "            commit_message=f\"trained: refresh {os.path.basename(path)} from GPU run\",\n",
+        "            token=HF_TOKEN,\n",
+        "        )\n",
+        "        print(f\"  pushed {path}\")\n",
+        "print(\"✓ refreshed artifacts on https://huggingface.co/shivam2k3/opensoc-env (Space rebuilds automatically)\")"
       ],
       "execution_count": null,
+      "outputs": [],
+      "id": "cbe00541"
     },
     {
       "cell_type": "markdown",