Update README.md
Browse files
README.md
CHANGED
|
@@ -71,25 +71,28 @@ tokenized_dataset = dataset.map(tokenize_function, batched=True)
|
|
| 71 |
|
| 72 |
# Training Arguments
|
| 73 |
training_args = TrainingArguments(
|
| 74 |
-
output_dir="./output/TCLM-beta/",
|
| 75 |
-
num_train_epochs=
|
| 76 |
-
per_device_train_batch_size=
|
| 77 |
-
gradient_accumulation_steps=
|
| 78 |
-
evaluation_strategy="
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
| 91 |
)
|
| 92 |
|
|
|
|
| 93 |
# Trainer Setup
|
| 94 |
trainer = Trainer(
|
| 95 |
model=model,
|
|
|
|
| 71 |
|
| 72 |
# Training Arguments
|
| 73 |
training_args = TrainingArguments(
|
| 74 |
+
output_dir="./output/TCLM-beta/", # Directory to save model checkpoints
|
| 75 |
+
num_train_epochs=3, # Increase epochs for better fine-tuning results
|
| 76 |
+
per_device_train_batch_size=4, # Adjust based on GPU memory, increase if possible
|
| 77 |
+
gradient_accumulation_steps=4, # Accumulate gradients to simulate a larger batch size
|
| 78 |
+
evaluation_strategy="steps", # Evaluate more frequently for detailed tracking
|
| 79 |
+
eval_steps=500, # Evaluate every 500 steps to track progress without over-evaluating
|
| 80 |
+
save_strategy="steps", # Save checkpoints during training
|
| 81 |
+
save_steps=500, # Save model every 500 steps
|
| 82 |
+
save_total_limit=2, # Limit to the two best models to save disk space
|
| 83 |
+
learning_rate=5e-5, # Lower learning rate for fine-tuning
|
| 84 |
+
weight_decay=0.01, # Slight weight decay to prevent overfitting
|
| 85 |
+
lr_scheduler_type="cosine", # Cosine schedule for smoother learning rate decay
|
| 86 |
+
warmup_ratio=0.06, # Warmup to stabilize initial training
|
| 87 |
+
logging_dir="./logs", # Directory to save training logs
|
| 88 |
+
logging_steps=50, # Log progress every 50 steps for better monitoring
|
| 89 |
+
fp16=True, # Enable mixed precision for faster training with less memory
|
| 90 |
+
load_best_model_at_end=True, # Load the best model at the end based on evaluation metric
|
| 91 |
+
metric_for_best_model="eval_loss", # Use evaluation loss to determine the best model
|
| 92 |
+
greater_is_better=False, # Lower loss is better
|
| 93 |
)
|
| 94 |
|
| 95 |
+
|
| 96 |
# Trainer Setup
|
| 97 |
trainer = Trainer(
|
| 98 |
model=model,
|