{ "best_metric": 0.4055454134941101, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_lev/checkpoint-35434", "epoch": 12.0, "eval_steps": 500, "global_step": 60744, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.1182055473327637, "learning_rate": 4.773575540996625e-05, "loss": 0.8553, "step": 5062 }, { "epoch": 1.0, "eval_bleu": 0.3575613099028634, "eval_loss": 0.5277819633483887, "eval_rouge1": 0.5831886589720002, "eval_rouge2": 0.30791501071801863, "eval_rougeL": 0.5813351421522688, "eval_runtime": 48.8805, "eval_samples_per_second": 206.994, "eval_steps_per_second": 25.879, "step": 5062 }, { "epoch": 2.0, "grad_norm": 2.507585287094116, "learning_rate": 4.522334723049435e-05, "loss": 0.4665, "step": 10124 }, { "epoch": 2.0, "eval_bleu": 0.38457648452761506, "eval_loss": 0.46819329261779785, "eval_rouge1": 0.6342397326708439, "eval_rouge2": 0.3690484855204119, "eval_rougeL": 0.6327616070759406, "eval_runtime": 33.2569, "eval_samples_per_second": 304.237, "eval_steps_per_second": 38.037, "step": 10124 }, { "epoch": 3.0, "grad_norm": 3.12933349609375, "learning_rate": 4.271093905102244e-05, "loss": 0.3762, "step": 15186 }, { "epoch": 3.0, "eval_bleu": 0.4071777548010412, "eval_loss": 0.43943917751312256, "eval_rouge1": 0.6630887207777014, "eval_rouge2": 0.4093439150738768, "eval_rougeL": 0.6616155355063926, "eval_runtime": 32.6487, "eval_samples_per_second": 309.905, "eval_steps_per_second": 38.746, "step": 15186 }, { "epoch": 4.0, "grad_norm": 3.2251956462860107, "learning_rate": 4.0198530871550526e-05, "loss": 0.3096, "step": 20248 }, { "epoch": 4.0, "eval_bleu": 0.4282823691369342, "eval_loss": 0.42224156856536865, "eval_rouge1": 0.6859331821051708, "eval_rouge2": 0.4421578092708104, "eval_rougeL": 0.6848333938194515, "eval_runtime": 155.4831, "eval_samples_per_second": 65.075, "eval_steps_per_second": 8.136, "step": 20248 }, { "epoch": 5.0, "grad_norm": 2.957134485244751, "learning_rate": 3.7686122692078615e-05, "loss": 0.2588, "step": 25310 }, { "epoch": 5.0, "eval_bleu": 0.45182026317042456, "eval_loss": 0.4118480682373047, "eval_rouge1": 0.7053306010937058, "eval_rouge2": 0.4744709667500923, "eval_rougeL": 0.7041710642490953, "eval_runtime": 155.3106, "eval_samples_per_second": 65.147, "eval_steps_per_second": 8.145, "step": 25310 }, { "epoch": 6.0, "grad_norm": 1.470399260520935, "learning_rate": 3.517371451260672e-05, "loss": 0.2202, "step": 30372 }, { "epoch": 6.0, "eval_bleu": 0.47794608300398783, "eval_loss": 0.4063816964626312, "eval_rouge1": 0.7202669607556247, "eval_rouge2": 0.5013877675516258, "eval_rougeL": 0.7193022759890708, "eval_runtime": 155.6504, "eval_samples_per_second": 65.005, "eval_steps_per_second": 8.127, "step": 30372 }, { "epoch": 7.0, "grad_norm": 4.297893524169922, "learning_rate": 3.266130633313481e-05, "loss": 0.1906, "step": 35434 }, { "epoch": 7.0, "eval_bleu": 0.5006873092034939, "eval_loss": 0.4055454134941101, "eval_rouge1": 0.7334615308262713, "eval_rouge2": 0.5267444586395549, "eval_rougeL": 0.7327256455313074, "eval_runtime": 155.6299, "eval_samples_per_second": 65.013, "eval_steps_per_second": 8.128, "step": 35434 }, { "epoch": 8.0, "grad_norm": 1.4034435749053955, "learning_rate": 3.0148898153662896e-05, "loss": 0.1676, "step": 40496 }, { "epoch": 8.0, "eval_bleu": 0.5192260163668514, "eval_loss": 0.40759241580963135, "eval_rouge1": 0.7432141723163292, "eval_rouge2": 0.5456323870457441, "eval_rougeL": 0.7422781215292358, "eval_runtime": 32.7322, "eval_samples_per_second": 309.114, "eval_steps_per_second": 38.647, "step": 40496 }, { "epoch": 9.0, "grad_norm": 0.9320669174194336, "learning_rate": 2.763648997419099e-05, "loss": 0.1502, "step": 45558 }, { "epoch": 9.0, "eval_bleu": 0.5340534683820533, "eval_loss": 0.41223180294036865, "eval_rouge1": 0.749556396925456, "eval_rouge2": 0.5601889867058358, "eval_rougeL": 0.7486916826979337, "eval_runtime": 32.4664, "eval_samples_per_second": 311.645, "eval_steps_per_second": 38.963, "step": 45558 }, { "epoch": 10.0, "grad_norm": 0.8942773938179016, "learning_rate": 2.5124081794719078e-05, "loss": 0.1371, "step": 50620 }, { "epoch": 10.0, "eval_bleu": 0.5453185871190115, "eval_loss": 0.4181581735610962, "eval_rouge1": 0.7533937778503056, "eval_rouge2": 0.5667501774119681, "eval_rougeL": 0.7524624935116784, "eval_runtime": 155.8002, "eval_samples_per_second": 64.942, "eval_steps_per_second": 8.119, "step": 50620 }, { "epoch": 11.0, "grad_norm": 1.6828958988189697, "learning_rate": 2.2611673615247174e-05, "loss": 0.1275, "step": 55682 }, { "epoch": 11.0, "eval_bleu": 0.5522581095773064, "eval_loss": 0.4227945804595947, "eval_rouge1": 0.7561517013043558, "eval_rouge2": 0.5730376329889821, "eval_rougeL": 0.7551916709183668, "eval_runtime": 155.5737, "eval_samples_per_second": 65.037, "eval_steps_per_second": 8.131, "step": 55682 }, { "epoch": 12.0, "grad_norm": 2.157827854156494, "learning_rate": 2.0099265435775263e-05, "loss": 0.1202, "step": 60744 }, { "epoch": 12.0, "eval_bleu": 0.5545084381756545, "eval_loss": 0.4293166697025299, "eval_rouge1": 0.7579786234496211, "eval_rouge2": 0.5761973994203244, "eval_rougeL": 0.7572229493785905, "eval_runtime": 155.3766, "eval_samples_per_second": 65.119, "eval_steps_per_second": 8.142, "step": 60744 }, { "epoch": 12.0, "step": 60744, "total_flos": 3.1738359250944e+16, "train_loss": 0.2816361450291773, "train_runtime": 6563.2408, "train_samples_per_second": 123.381, "train_steps_per_second": 15.425 } ], "logging_steps": 500, "max_steps": 101240, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.1738359250944e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }