Update README.md
Browse files
README.md
CHANGED
|
@@ -65,4 +65,44 @@ Cite TRL as:
|
|
| 65 |
publisher = {GitHub},
|
| 66 |
howpublished = {\url{https://github.com/huggingface/trl}}
|
| 67 |
}
|
| 68 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
publisher = {GitHub},
|
| 66 |
howpublished = {\url{https://github.com/huggingface/trl}}
|
| 67 |
}
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
configuration:
|
| 72 |
+
|
| 73 |
+
#Train the model
|
| 74 |
+
training_args = DPOConfig(
|
| 75 |
+
output_dir="llava-lora-12-05-lr",
|
| 76 |
+
bf16=True,
|
| 77 |
+
gradient_checkpointing=True,
|
| 78 |
+
per_device_train_batch_size=8,
|
| 79 |
+
per_device_eval_batch_size=4,
|
| 80 |
+
gradient_accumulation_steps=32,
|
| 81 |
+
evaluation_strategy="steps",
|
| 82 |
+
eval_steps=1,
|
| 83 |
+
learning_rate=5e-5,
|
| 84 |
+
beta=0.1,
|
| 85 |
+
warmup_ratio=0.1,
|
| 86 |
+
lr_scheduler_type="cosine",
|
| 87 |
+
num_train_epochs=3,
|
| 88 |
+
dataset_num_proc=32, # tokenization will use 32 processes
|
| 89 |
+
dataloader_num_workers=32, # data loading will use 32 workers
|
| 90 |
+
logging_steps=1,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
#Define LoRA configuration with specified rank
|
| 94 |
+
lora_config = LoraConfig(
|
| 95 |
+
r=64, # Set rank to 64
|
| 96 |
+
lora_alpha=128, # Set scaling factor to 128
|
| 97 |
+
target_modules="all-linear", # Target all linear layers
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
trainer = DPOTrainer(
|
| 101 |
+
model,
|
| 102 |
+
ref_model=None, # not needed when using peft
|
| 103 |
+
args=training_args,
|
| 104 |
+
train_dataset=train_dataset,
|
| 105 |
+
eval_dataset=eval_dataset,
|
| 106 |
+
tokenizer=processor,
|
| 107 |
+
peft_config=lora_config,
|
| 108 |
+
)
|