Upload DPO-Training/Qwen3_DPO_Training.ipynb with huggingface_hub

Browse files

Files changed (1) hide show

DPO-Training/Qwen3_DPO_Training.ipynb +111 -0

DPO-Training/Qwen3_DPO_Training.ipynb ADDED Viewed

	@@ -0,0 +1,111 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Qwen3-0.6B DPO Training\n",
+        "\n",
+        "Train Qwen3-0.6B with Direct Preference Optimization (DPO) on Google Colab (free GPU).\n",
+        "\n",
+        "**Runtime → Change runtime type → GPU (T4)**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "# Install dependencies\n",
+        "!pip install -q torch transformers peft trl datasets accelerate bitsandbytes"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "# Download training scripts\n",
+        "!wget -q https://raw.githubusercontent.com/bopalvelut-prog/model-efficiency/main/train_dpo_qwen3.py\n",
+        "!wget -q https://raw.githubusercontent.com/bopalvelut-prog/model-efficiency/main/sample_preference_data.jsonl\n",
+        "print('✅ Scripts downloaded!')"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "# Train with default dataset (HelpSteer2)\n",
+        "!python train_dpo_qwen3.py \\\n",
+        "  --model Qwen/Qwen3-0.6B \\\n",
+        "  --beta 0.1 \\\n",
+        "  --epochs 3 \\\n",
+        "  --lr 2e-5 \\\n",
+        "  --batch_size 4 \\\n",
+        "  --lora_r 16 \\\n",
+        "  --lora_alpha 16 \\\n",
+        "  --max_samples 1000 \\\n",
+        "  --output_dir ./qwen3-0.6b-dpo"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "# Merge LoRA adapters\n",
+        "!python merge_lora.py \\\n",
+        "  --base_model Qwen/Qwen3-0.6B \\\n",
+        "  --lora_path ./qwen3-0.6b-dpo \\\n",
+        "  --output_path ./qwen3-0.6b-dpo-merged"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "# Push to HuggingFace (optional)\n",
+        "# First login: from huggingface_hub import login; login()\n",
+        "# Then uncomment and run:\n",
+        "# !python train_dpo_qwen3.py --push_to_hub \"your-username/Qwen3-0.6B-DPO\""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {},
+      "source": [
+        "# Download trained model\n",
+        "from google.colab import files\n",
+        "import shutil\n",
+        "shutil.make_archive('qwen3-0.6b-dpo-merged', 'zip', './qwen3-0.6b-dpo-merged')\n",
+        "files.download('qwen3-0.6b-dpo-merged.zip')\n",
+        "print('✅ Model downloaded!')"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}