Spaces:
Build error
Build error
Update train_model.py
Browse files- train_model.py +6 -6
train_model.py
CHANGED
|
@@ -111,25 +111,25 @@ data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, padding=True)
|
|
| 111 |
training_args = TrainingArguments(
|
| 112 |
output_dir="./results",
|
| 113 |
eval_strategy="steps",
|
| 114 |
-
eval_steps=
|
| 115 |
save_strategy="steps",
|
| 116 |
-
save_steps=
|
| 117 |
per_device_train_batch_size=2,
|
| 118 |
per_device_eval_batch_size=2,
|
| 119 |
-
gradient_accumulation_steps=
|
| 120 |
num_train_epochs=10, # 🔹 Mais épocas para um treinamento robusto
|
| 121 |
weight_decay=0.01,
|
| 122 |
logging_dir="./logs",
|
| 123 |
logging_strategy="steps",
|
| 124 |
-
logging_steps=
|
| 125 |
-
save_total_limit=
|
| 126 |
push_to_hub=True,
|
| 127 |
hub_model_id=NEW_MODEL_NAME,
|
| 128 |
hub_token=HF_API_KEY,
|
| 129 |
gradient_checkpointing=True,
|
| 130 |
bf16=True,
|
| 131 |
learning_rate=1e-6, # 🔹 Aprendizado mais lento
|
| 132 |
-
max_grad_norm=0.
|
| 133 |
warmup_ratio=0.1,
|
| 134 |
lr_scheduler_type="cosine",
|
| 135 |
optim="adamw_torch"
|
|
|
|
| 111 |
training_args = TrainingArguments(
|
| 112 |
output_dir="./results",
|
| 113 |
eval_strategy="steps",
|
| 114 |
+
eval_steps=1000, # 🔹 Avaliação frequente
|
| 115 |
save_strategy="steps",
|
| 116 |
+
save_steps=2500, # 🔹 Salvar checkpoints
|
| 117 |
per_device_train_batch_size=2,
|
| 118 |
per_device_eval_batch_size=2,
|
| 119 |
+
gradient_accumulation_steps=4,
|
| 120 |
num_train_epochs=10, # 🔹 Mais épocas para um treinamento robusto
|
| 121 |
weight_decay=0.01,
|
| 122 |
logging_dir="./logs",
|
| 123 |
logging_strategy="steps",
|
| 124 |
+
logging_steps=50, # 🔹 Logs frequentes
|
| 125 |
+
save_total_limit=2,
|
| 126 |
push_to_hub=True,
|
| 127 |
hub_model_id=NEW_MODEL_NAME,
|
| 128 |
hub_token=HF_API_KEY,
|
| 129 |
gradient_checkpointing=True,
|
| 130 |
bf16=True,
|
| 131 |
learning_rate=1e-6, # 🔹 Aprendizado mais lento
|
| 132 |
+
max_grad_norm=0.2,
|
| 133 |
warmup_ratio=0.1,
|
| 134 |
lr_scheduler_type="cosine",
|
| 135 |
optim="adamw_torch"
|