nadiva1243 commited on
Commit
5f905bd
·
verified ·
1 Parent(s): 21d45d4

Upload reproduction/train-phi4.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. reproduction/train-phi4.py +2 -2
reproduction/train-phi4.py CHANGED
@@ -1171,7 +1171,7 @@ trainer = Trainer(
1171
  eval_dataset=tokenized_eval,
1172
  data_collator=data_collator,
1173
  processing_class=tokenizer,
1174
- callbacks=[EarlyStoppingCallback(early_stopping_patience=5)],
1175
  )
1176
 
1177
 
@@ -1187,7 +1187,7 @@ _resume_from = _ckpt_dirs[-1] if _ckpt_dirs else None
1187
  print("\n--> [9] Starting training...")
1188
  print(f" Epochs: {training_args.num_train_epochs}")
1189
  print(f" Effective batch: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
1190
- print(f" LR: {training_args.learning_rate} (cosine, patience=5)")
1191
  print(f" Best checkpoint: load_best_model_at_end=True")
1192
  if _resume_from:
1193
  print(f" Resuming from: {_resume_from}")
 
1171
  eval_dataset=tokenized_eval,
1172
  data_collator=data_collator,
1173
  processing_class=tokenizer,
1174
+ callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
1175
  )
1176
 
1177
 
 
1187
  print("\n--> [9] Starting training...")
1188
  print(f" Epochs: {training_args.num_train_epochs}")
1189
  print(f" Effective batch: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
1190
+ print(f" LR: {training_args.learning_rate} (cosine, patience=3)")
1191
  print(f" Best checkpoint: load_best_model_at_end=True")
1192
  if _resume_from:
1193
  print(f" Resuming from: {_resume_from}")