abshetty commited on
Commit
ccd488e
·
verified ·
1 Parent(s): 9d85177

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +41 -1
README.md CHANGED
@@ -65,4 +65,44 @@ Cite TRL as:
65
  publisher = {GitHub},
66
  howpublished = {\url{https://github.com/huggingface/trl}}
67
  }
68
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  publisher = {GitHub},
66
  howpublished = {\url{https://github.com/huggingface/trl}}
67
  }
68
+ ```
69
+
70
+
71
+ configuration:
72
+
73
+ #Train the model
74
+ training_args = DPOConfig(
75
+ output_dir="llava-lora-12-05-lr",
76
+ bf16=True,
77
+ gradient_checkpointing=True,
78
+ per_device_train_batch_size=8,
79
+ per_device_eval_batch_size=4,
80
+ gradient_accumulation_steps=32,
81
+ evaluation_strategy="steps",
82
+ eval_steps=1,
83
+ learning_rate=5e-5,
84
+ beta=0.1,
85
+ warmup_ratio=0.1,
86
+ lr_scheduler_type="cosine",
87
+ num_train_epochs=3,
88
+ dataset_num_proc=32, # tokenization will use 32 processes
89
+ dataloader_num_workers=32, # data loading will use 32 workers
90
+ logging_steps=1,
91
+ )
92
+
93
+ #Define LoRA configuration with specified rank
94
+ lora_config = LoraConfig(
95
+ r=64, # Set rank to 64
96
+ lora_alpha=128, # Set scaling factor to 128
97
+ target_modules="all-linear", # Target all linear layers
98
+ )
99
+
100
+ trainer = DPOTrainer(
101
+ model,
102
+ ref_model=None, # not needed when using peft
103
+ args=training_args,
104
+ train_dataset=train_dataset,
105
+ eval_dataset=eval_dataset,
106
+ tokenizer=processor,
107
+ peft_config=lora_config,
108
+ )