Spaces:

miyuki2026
/

OpenMiniMind

Sleeping

miyuki2026 commited on 19 days ago

Commit

253ae18

1 Parent(s): c4ac4dc

update

Files changed (2) hide show

examples/tutorials/dpo/ultrafeedback-dpo/requirements.txt CHANGED Viewed

@@ -5,3 +5,4 @@ modelscope
 datasets
 trl
 deepspeed

 datasets
 trl
 deepspeed
+bitsandbytes

examples/tutorials/dpo/ultrafeedback-dpo/step_2_train_dpo_model_single_gpu.py CHANGED Viewed

@@ -118,14 +118,12 @@ def main():
     os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     bnb_config = BitsAndBytesConfig(
-        load_in_4bit=True,  # 4-bit量化
-        bnb_4bit_quant_type="nf4",  # 使用NF4量化
-        bnb_4bit_compute_dtype=torch.float16,  # 计算时用FP16
-        bnb_4bit_use_double_quant=True,  # 双重量化
-        bnb_4bit_quant_storage=torch.uint8,  # 存储类型
     )
     model = AutoModelForCausalLM.from_pretrained(
@@ -148,8 +146,8 @@ def main():
     ref_model = prepare_model_for_kbit_training(ref_model)
     lora_config = LoraConfig(
-        r=16,  # LoRA rank
-        lora_alpha=32,  # LoRA alpha
         target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
         lora_dropout=0.1,
         bias="none",
@@ -200,7 +198,7 @@ def main():
         warmup_steps=100,
         lr_scheduler_type="cosine",
         fp16=True,
-        gradient_checkpointing=True,  # 如果内存紧张，可以设为 True
         optim="adamw_torch",
         report_to="none",
         max_length=1024 if debug_mode else 2048,  # prompt + chosen 的最大长度

     os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
     bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.float16,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_storage=torch.uint8,
     )
     model = AutoModelForCausalLM.from_pretrained(
     ref_model = prepare_model_for_kbit_training(ref_model)
     lora_config = LoraConfig(
+        r=16,
+        lora_alpha=32,
         target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
         lora_dropout=0.1,
         bias="none",
         warmup_steps=100,
         lr_scheduler_type="cosine",
         fp16=True,
+        gradient_checkpointing=False,  # 如果内存紧张，可以设为 True
         optim="adamw_torch",
         report_to="none",
         max_length=1024 if debug_mode else 2048,  # prompt + chosen 的最大长度