miyuki2026 commited on
Commit
253ae18
·
1 Parent(s): c4ac4dc
examples/tutorials/dpo/ultrafeedback-dpo/requirements.txt CHANGED
@@ -5,3 +5,4 @@ modelscope
5
  datasets
6
  trl
7
  deepspeed
 
 
5
  datasets
6
  trl
7
  deepspeed
8
+ bitsandbytes
examples/tutorials/dpo/ultrafeedback-dpo/step_2_train_dpo_model_single_gpu.py CHANGED
@@ -118,14 +118,12 @@ def main():
118
 
119
  os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
120
 
121
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
122
-
123
  bnb_config = BitsAndBytesConfig(
124
- load_in_4bit=True, # 4-bit量化
125
- bnb_4bit_quant_type="nf4", # 使用NF4量化
126
- bnb_4bit_compute_dtype=torch.float16, # 计算时用FP16
127
- bnb_4bit_use_double_quant=True, # 双重量化
128
- bnb_4bit_quant_storage=torch.uint8, # 存储类型
129
  )
130
 
131
  model = AutoModelForCausalLM.from_pretrained(
@@ -148,8 +146,8 @@ def main():
148
  ref_model = prepare_model_for_kbit_training(ref_model)
149
 
150
  lora_config = LoraConfig(
151
- r=16, # LoRA rank
152
- lora_alpha=32, # LoRA alpha
153
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
154
  lora_dropout=0.1,
155
  bias="none",
@@ -200,7 +198,7 @@ def main():
200
  warmup_steps=100,
201
  lr_scheduler_type="cosine",
202
  fp16=True,
203
- gradient_checkpointing=True, # 如果内存紧张,可以设为 True
204
  optim="adamw_torch",
205
  report_to="none",
206
  max_length=1024 if debug_mode else 2048, # prompt + chosen 的最大长度
 
118
 
119
  os.environ["MODELSCOPE_CACHE"] = args.model_cache_dir
120
 
 
 
121
  bnb_config = BitsAndBytesConfig(
122
+ load_in_4bit=True,
123
+ bnb_4bit_quant_type="nf4",
124
+ bnb_4bit_compute_dtype=torch.float16,
125
+ bnb_4bit_use_double_quant=True,
126
+ bnb_4bit_quant_storage=torch.uint8,
127
  )
128
 
129
  model = AutoModelForCausalLM.from_pretrained(
 
146
  ref_model = prepare_model_for_kbit_training(ref_model)
147
 
148
  lora_config = LoraConfig(
149
+ r=16,
150
+ lora_alpha=32,
151
  target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
152
  lora_dropout=0.1,
153
  bias="none",
 
198
  warmup_steps=100,
199
  lr_scheduler_type="cosine",
200
  fp16=True,
201
+ gradient_checkpointing=False, # 如果内存紧张,可以设为 True
202
  optim="adamw_torch",
203
  report_to="none",
204
  max_length=1024 if debug_mode else 2048, # prompt + chosen 的最大长度