Bopalv commited on
Commit
8e3f1e5
·
verified ·
1 Parent(s): 9f91593

Upload DPO-Training/Qwen3_DPO_Training.ipynb with huggingface_hub

Browse files
Files changed (1) hide show
  1. DPO-Training/Qwen3_DPO_Training.ipynb +111 -0
DPO-Training/Qwen3_DPO_Training.ipynb ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU"
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "markdown",
21
+ "metadata": {},
22
+ "source": [
23
+ "# Qwen3-0.6B DPO Training\n",
24
+ "\n",
25
+ "Train Qwen3-0.6B with Direct Preference Optimization (DPO) on Google Colab (free GPU).\n",
26
+ "\n",
27
+ "**Runtime → Change runtime type → GPU (T4)**"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "metadata": {},
33
+ "source": [
34
+ "# Install dependencies\n",
35
+ "!pip install -q torch transformers peft trl datasets accelerate bitsandbytes"
36
+ ],
37
+ "execution_count": null,
38
+ "outputs": []
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "metadata": {},
43
+ "source": [
44
+ "# Download training scripts\n",
45
+ "!wget -q https://raw.githubusercontent.com/bopalvelut-prog/model-efficiency/main/train_dpo_qwen3.py\n",
46
+ "!wget -q https://raw.githubusercontent.com/bopalvelut-prog/model-efficiency/main/sample_preference_data.jsonl\n",
47
+ "print('✅ Scripts downloaded!')"
48
+ ],
49
+ "execution_count": null,
50
+ "outputs": []
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "metadata": {},
55
+ "source": [
56
+ "# Train with default dataset (HelpSteer2)\n",
57
+ "!python train_dpo_qwen3.py \\\n",
58
+ " --model Qwen/Qwen3-0.6B \\\n",
59
+ " --beta 0.1 \\\n",
60
+ " --epochs 3 \\\n",
61
+ " --lr 2e-5 \\\n",
62
+ " --batch_size 4 \\\n",
63
+ " --lora_r 16 \\\n",
64
+ " --lora_alpha 16 \\\n",
65
+ " --max_samples 1000 \\\n",
66
+ " --output_dir ./qwen3-0.6b-dpo"
67
+ ],
68
+ "execution_count": null,
69
+ "outputs": []
70
+ },
71
+ {
72
+ "cell_type": "code",
73
+ "metadata": {},
74
+ "source": [
75
+ "# Merge LoRA adapters\n",
76
+ "!python merge_lora.py \\\n",
77
+ " --base_model Qwen/Qwen3-0.6B \\\n",
78
+ " --lora_path ./qwen3-0.6b-dpo \\\n",
79
+ " --output_path ./qwen3-0.6b-dpo-merged"
80
+ ],
81
+ "execution_count": null,
82
+ "outputs": []
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "metadata": {},
87
+ "source": [
88
+ "# Push to HuggingFace (optional)\n",
89
+ "# First login: from huggingface_hub import login; login()\n",
90
+ "# Then uncomment and run:\n",
91
+ "# !python train_dpo_qwen3.py --push_to_hub \"your-username/Qwen3-0.6B-DPO\""
92
+ ],
93
+ "execution_count": null,
94
+ "outputs": []
95
+ },
96
+ {
97
+ "cell_type": "code",
98
+ "metadata": {},
99
+ "source": [
100
+ "# Download trained model\n",
101
+ "from google.colab import files\n",
102
+ "import shutil\n",
103
+ "shutil.make_archive('qwen3-0.6b-dpo-merged', 'zip', './qwen3-0.6b-dpo-merged')\n",
104
+ "files.download('qwen3-0.6b-dpo-merged.zip')\n",
105
+ "print('✅ Model downloaded!')"
106
+ ],
107
+ "execution_count": null,
108
+ "outputs": []
109
+ }
110
+ ]
111
+ }