miyuki2026 commited on
Commit
44f10dc
·
1 Parent(s): ce3fdcb
examples/tutorials/dpo/ultrafeedback-dpo/step_2_train_dpo_model_unsloth_ddp_qlora.py CHANGED
@@ -235,8 +235,8 @@ def main():
235
  dpo_config = DPOConfig(
236
  output_dir=args.output_model_dir,
237
  num_train_epochs=args.num_train_epochs,
238
- per_device_train_batch_size=1 if debug_mode else 2,
239
- per_device_eval_batch_size=1 if debug_mode else 2,
240
  gradient_accumulation_steps=1 if debug_mode else 8,
241
  # gradient_checkpointing=True,
242
  # gradient_checkpointing_kwargs={"use_reentrant": False},
 
235
  dpo_config = DPOConfig(
236
  output_dir=args.output_model_dir,
237
  num_train_epochs=args.num_train_epochs,
238
+ per_device_train_batch_size=1 if debug_mode else 3,
239
+ per_device_eval_batch_size=1 if debug_mode else 3,
240
  gradient_accumulation_steps=1 if debug_mode else 8,
241
  # gradient_checkpointing=True,
242
  # gradient_checkpointing_kwargs={"use_reentrant": False},