rwayz commited on
Commit
dc0bd9c
·
verified ·
1 Parent(s): 3493028

Update train_model.py

Browse files
Files changed (1) hide show
  1. train_model.py +6 -6
train_model.py CHANGED
@@ -111,25 +111,25 @@ data_collator = DataCollatorForSeq2Seq(tokenizer, model=model, padding=True)
111
  training_args = TrainingArguments(
112
  output_dir="./results",
113
  eval_strategy="steps",
114
- eval_steps=250, # 🔹 Avaliação frequente
115
  save_strategy="steps",
116
- save_steps=500, # 🔹 Salvar checkpoints
117
  per_device_train_batch_size=2,
118
  per_device_eval_batch_size=2,
119
- gradient_accumulation_steps=6,
120
  num_train_epochs=10, # 🔹 Mais épocas para um treinamento robusto
121
  weight_decay=0.01,
122
  logging_dir="./logs",
123
  logging_strategy="steps",
124
- logging_steps=75, # 🔹 Logs frequentes
125
- save_total_limit=5,
126
  push_to_hub=True,
127
  hub_model_id=NEW_MODEL_NAME,
128
  hub_token=HF_API_KEY,
129
  gradient_checkpointing=True,
130
  bf16=True,
131
  learning_rate=1e-6, # 🔹 Aprendizado mais lento
132
- max_grad_norm=0.5,
133
  warmup_ratio=0.1,
134
  lr_scheduler_type="cosine",
135
  optim="adamw_torch"
 
111
  training_args = TrainingArguments(
112
  output_dir="./results",
113
  eval_strategy="steps",
114
+ eval_steps=1000, # 🔹 Avaliação frequente
115
  save_strategy="steps",
116
+ save_steps=2500, # 🔹 Salvar checkpoints
117
  per_device_train_batch_size=2,
118
  per_device_eval_batch_size=2,
119
+ gradient_accumulation_steps=4,
120
  num_train_epochs=10, # 🔹 Mais épocas para um treinamento robusto
121
  weight_decay=0.01,
122
  logging_dir="./logs",
123
  logging_strategy="steps",
124
+ logging_steps=50, # 🔹 Logs frequentes
125
+ save_total_limit=2,
126
  push_to_hub=True,
127
  hub_model_id=NEW_MODEL_NAME,
128
  hub_token=HF_API_KEY,
129
  gradient_checkpointing=True,
130
  bf16=True,
131
  learning_rate=1e-6, # 🔹 Aprendizado mais lento
132
+ max_grad_norm=0.2,
133
  warmup_ratio=0.1,
134
  lr_scheduler_type="cosine",
135
  optim="adamw_torch"