Spaces:

miyuki2026
/

OpenMiniMind

Sleeping

App Files Files Community

miyuki2026 commited on 19 days ago

Commit

c4ac4dc

1 Parent(s): d251d39

update

Browse files

Files changed (2) hide show

examples/tutorials/dpo/ultrafeedback-dpo/requirements.txt +1 -0
examples/tutorials/dpo/ultrafeedback-dpo/step_2_train_dpo_model_single_gpu.py +35 -4

examples/tutorials/dpo/ultrafeedback-dpo/requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 transformers
 torch
 modelscope
 datasets

 transformers
+peft
 torch
 modelscope
 datasets

examples/tutorials/dpo/ultrafeedback-dpo/step_2_train_dpo_model_single_gpu.py CHANGED Viewed

@@ -6,6 +6,8 @@ https://huggingface.co/docs/trl/v0.16.1/en/sft_trainer
 单卡 V00 32G 全参微调
 python3 step_2_train_dpo_model_single_gpu.py
 """
 import argparse
 import os
@@ -28,7 +30,9 @@ from datasets import load_dataset
 import torch
 from modelscope import AutoModelForCausalLM, AutoTokenizer
 from trl import DPOConfig, DPOTrainer
 def get_args():
@@ -116,24 +120,51 @@ def main():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model = AutoModelForCausalLM.from_pretrained(
         args.model_name,
         cache_dir=args.model_cache_dir,
         trust_remote_code=True,
     )
     ref_model = AutoModelForCausalLM.from_pretrained(
         args.model_name,
         cache_dir=args.model_cache_dir,
         trust_remote_code=True,
     )
     tokenizer = AutoTokenizer.from_pretrained(
         args.model_name,
         cache_dir=args.model_cache_dir,
         trust_remote_code=True,
     )
-    model = model.to(device)
-    ref_model = ref_model.to(device)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
         tokenizer.pad_token_id = tokenizer.eos_token_id
@@ -168,7 +199,7 @@ def main():
         learning_rate=2e-5,
         warmup_steps=100,
         lr_scheduler_type="cosine",
-        fp16=False,
         gradient_checkpointing=True,  # 如果内存紧张，可以设为 True
         optim="adamw_torch",
         report_to="none",

 单卡 V00 32G 全参微调
 python3 step_2_train_dpo_model_single_gpu.py
+DPO本来就是风格微调，用LoRA 训练更合理，更科学。
 """
 import argparse
 import os
 import torch
 from modelscope import AutoModelForCausalLM, AutoTokenizer
+from transformers import BitsAndBytesConfig
 from trl import DPOConfig, DPOTrainer
+from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
 def get_args():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,  # 4-bit量化
+        bnb_4bit_quant_type="nf4",  # 使用NF4量化
+        bnb_4bit_compute_dtype=torch.float16,  # 计算时用FP16
+        bnb_4bit_use_double_quant=True,  # 双重量化
+        bnb_4bit_quant_storage=torch.uint8,  # 存储类型
+    )
     model = AutoModelForCausalLM.from_pretrained(
         args.model_name,
         cache_dir=args.model_cache_dir,
+        quantization_config=bnb_config,
+        device_map="auto",
         trust_remote_code=True,
+        use_cache=False,  # 训练时禁用KV cache
     )
     ref_model = AutoModelForCausalLM.from_pretrained(
         args.model_name,
         cache_dir=args.model_cache_dir,
         trust_remote_code=True,
+        quantization_config=bnb_config,
+        device_map="auto",
+        use_cache=False,
     )
+    model = prepare_model_for_kbit_training(model)
+    ref_model = prepare_model_for_kbit_training(ref_model)
+    lora_config = LoraConfig(
+        r=16,  # LoRA rank
+        lora_alpha=32,  # LoRA alpha
+        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
+        lora_dropout=0.1,
+        bias="none",
+        task_type="CAUSAL_LM",
+    )
+    model = get_peft_model(model, lora_config)
+    ref_model = get_peft_model(ref_model, lora_config)
+    model.print_trainable_parameters()
     tokenizer = AutoTokenizer.from_pretrained(
         args.model_name,
         cache_dir=args.model_cache_dir,
         trust_remote_code=True,
+        padding_side="left",  # DPO需要left padding
     )
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
         tokenizer.pad_token_id = tokenizer.eos_token_id
         learning_rate=2e-5,
         warmup_steps=100,
         lr_scheduler_type="cosine",
+        fp16=True,
         gradient_checkpointing=True,  # 如果内存紧张，可以设为 True
         optim="adamw_torch",
         report_to="none",