abshetty commited on
Commit
df44985
·
verified ·
1 Parent(s): 0c33889

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +38 -1
README.md CHANGED
@@ -65,4 +65,41 @@ Cite TRL as:
65
  publisher = {GitHub},
66
  howpublished = {\url{https://github.com/huggingface/trl}}
67
  }
68
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  publisher = {GitHub},
66
  howpublished = {\url{https://github.com/huggingface/trl}}
67
  }
68
+ ```
69
+ #Train the model
70
+ training_args = DPOConfig(
71
+ output_dir="llava-lora-12-05-dropout",
72
+ bf16=True,
73
+ gradient_checkpointing=True,
74
+ per_device_train_batch_size=8,
75
+ per_device_eval_batch_size=4,
76
+ gradient_accumulation_steps=32,
77
+ evaluation_strategy="steps",
78
+ eval_steps=1,
79
+ learning_rate=2e-6,
80
+ beta=0.1,
81
+ warmup_ratio=0.1,
82
+ lr_scheduler_type="cosine",
83
+ num_train_epochs=3,
84
+ dataset_num_proc=32, # tokenization will use 32 processes
85
+ dataloader_num_workers=32, # data loading will use 32 workers
86
+ logging_steps=1,
87
+ )
88
+
89
+ #Define LoRA configuration with specified rank
90
+ lora_config = LoraConfig(
91
+ r=64, # Set rank to 64
92
+ lora_alpha=128, # Set scaling factor to 128
93
+ target_modules="all-linear", # Target all linear layers
94
+ lora_dropout=0.1,
95
+ )
96
+
97
+ trainer = DPOTrainer(
98
+ model,
99
+ ref_model=None, # not needed when using peft
100
+ args=training_args,
101
+ train_dataset=train_dataset,
102
+ eval_dataset=eval_dataset,
103
+ tokenizer=processor,
104
+ peft_config=lora_config,
105
+ )