| { | |
| "best_metric": 0.4055454134941101, | |
| "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_lev/checkpoint-35434", | |
| "epoch": 12.0, | |
| "eval_steps": 500, | |
| "global_step": 60744, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.1182055473327637, | |
| "learning_rate": 4.773575540996625e-05, | |
| "loss": 0.8553, | |
| "step": 5062 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu": 0.3575613099028634, | |
| "eval_loss": 0.5277819633483887, | |
| "eval_rouge1": 0.5831886589720002, | |
| "eval_rouge2": 0.30791501071801863, | |
| "eval_rougeL": 0.5813351421522688, | |
| "eval_runtime": 48.8805, | |
| "eval_samples_per_second": 206.994, | |
| "eval_steps_per_second": 25.879, | |
| "step": 5062 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.507585287094116, | |
| "learning_rate": 4.522334723049435e-05, | |
| "loss": 0.4665, | |
| "step": 10124 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu": 0.38457648452761506, | |
| "eval_loss": 0.46819329261779785, | |
| "eval_rouge1": 0.6342397326708439, | |
| "eval_rouge2": 0.3690484855204119, | |
| "eval_rougeL": 0.6327616070759406, | |
| "eval_runtime": 33.2569, | |
| "eval_samples_per_second": 304.237, | |
| "eval_steps_per_second": 38.037, | |
| "step": 10124 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 3.12933349609375, | |
| "learning_rate": 4.271093905102244e-05, | |
| "loss": 0.3762, | |
| "step": 15186 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bleu": 0.4071777548010412, | |
| "eval_loss": 0.43943917751312256, | |
| "eval_rouge1": 0.6630887207777014, | |
| "eval_rouge2": 0.4093439150738768, | |
| "eval_rougeL": 0.6616155355063926, | |
| "eval_runtime": 32.6487, | |
| "eval_samples_per_second": 309.905, | |
| "eval_steps_per_second": 38.746, | |
| "step": 15186 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 3.2251956462860107, | |
| "learning_rate": 4.0198530871550526e-05, | |
| "loss": 0.3096, | |
| "step": 20248 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu": 0.4282823691369342, | |
| "eval_loss": 0.42224156856536865, | |
| "eval_rouge1": 0.6859331821051708, | |
| "eval_rouge2": 0.4421578092708104, | |
| "eval_rougeL": 0.6848333938194515, | |
| "eval_runtime": 155.4831, | |
| "eval_samples_per_second": 65.075, | |
| "eval_steps_per_second": 8.136, | |
| "step": 20248 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.957134485244751, | |
| "learning_rate": 3.7686122692078615e-05, | |
| "loss": 0.2588, | |
| "step": 25310 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bleu": 0.45182026317042456, | |
| "eval_loss": 0.4118480682373047, | |
| "eval_rouge1": 0.7053306010937058, | |
| "eval_rouge2": 0.4744709667500923, | |
| "eval_rougeL": 0.7041710642490953, | |
| "eval_runtime": 155.3106, | |
| "eval_samples_per_second": 65.147, | |
| "eval_steps_per_second": 8.145, | |
| "step": 25310 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 1.470399260520935, | |
| "learning_rate": 3.517371451260672e-05, | |
| "loss": 0.2202, | |
| "step": 30372 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bleu": 0.47794608300398783, | |
| "eval_loss": 0.4063816964626312, | |
| "eval_rouge1": 0.7202669607556247, | |
| "eval_rouge2": 0.5013877675516258, | |
| "eval_rougeL": 0.7193022759890708, | |
| "eval_runtime": 155.6504, | |
| "eval_samples_per_second": 65.005, | |
| "eval_steps_per_second": 8.127, | |
| "step": 30372 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 4.297893524169922, | |
| "learning_rate": 3.266130633313481e-05, | |
| "loss": 0.1906, | |
| "step": 35434 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_bleu": 0.5006873092034939, | |
| "eval_loss": 0.4055454134941101, | |
| "eval_rouge1": 0.7334615308262713, | |
| "eval_rouge2": 0.5267444586395549, | |
| "eval_rougeL": 0.7327256455313074, | |
| "eval_runtime": 155.6299, | |
| "eval_samples_per_second": 65.013, | |
| "eval_steps_per_second": 8.128, | |
| "step": 35434 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.4034435749053955, | |
| "learning_rate": 3.0148898153662896e-05, | |
| "loss": 0.1676, | |
| "step": 40496 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu": 0.5192260163668514, | |
| "eval_loss": 0.40759241580963135, | |
| "eval_rouge1": 0.7432141723163292, | |
| "eval_rouge2": 0.5456323870457441, | |
| "eval_rougeL": 0.7422781215292358, | |
| "eval_runtime": 32.7322, | |
| "eval_samples_per_second": 309.114, | |
| "eval_steps_per_second": 38.647, | |
| "step": 40496 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.9320669174194336, | |
| "learning_rate": 2.763648997419099e-05, | |
| "loss": 0.1502, | |
| "step": 45558 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bleu": 0.5340534683820533, | |
| "eval_loss": 0.41223180294036865, | |
| "eval_rouge1": 0.749556396925456, | |
| "eval_rouge2": 0.5601889867058358, | |
| "eval_rougeL": 0.7486916826979337, | |
| "eval_runtime": 32.4664, | |
| "eval_samples_per_second": 311.645, | |
| "eval_steps_per_second": 38.963, | |
| "step": 45558 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.8942773938179016, | |
| "learning_rate": 2.5124081794719078e-05, | |
| "loss": 0.1371, | |
| "step": 50620 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bleu": 0.5453185871190115, | |
| "eval_loss": 0.4181581735610962, | |
| "eval_rouge1": 0.7533937778503056, | |
| "eval_rouge2": 0.5667501774119681, | |
| "eval_rougeL": 0.7524624935116784, | |
| "eval_runtime": 155.8002, | |
| "eval_samples_per_second": 64.942, | |
| "eval_steps_per_second": 8.119, | |
| "step": 50620 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 1.6828958988189697, | |
| "learning_rate": 2.2611673615247174e-05, | |
| "loss": 0.1275, | |
| "step": 55682 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_bleu": 0.5522581095773064, | |
| "eval_loss": 0.4227945804595947, | |
| "eval_rouge1": 0.7561517013043558, | |
| "eval_rouge2": 0.5730376329889821, | |
| "eval_rougeL": 0.7551916709183668, | |
| "eval_runtime": 155.5737, | |
| "eval_samples_per_second": 65.037, | |
| "eval_steps_per_second": 8.131, | |
| "step": 55682 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 2.157827854156494, | |
| "learning_rate": 2.0099265435775263e-05, | |
| "loss": 0.1202, | |
| "step": 60744 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_bleu": 0.5545084381756545, | |
| "eval_loss": 0.4293166697025299, | |
| "eval_rouge1": 0.7579786234496211, | |
| "eval_rouge2": 0.5761973994203244, | |
| "eval_rougeL": 0.7572229493785905, | |
| "eval_runtime": 155.3766, | |
| "eval_samples_per_second": 65.119, | |
| "eval_steps_per_second": 8.142, | |
| "step": 60744 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "step": 60744, | |
| "total_flos": 3.1738359250944e+16, | |
| "train_loss": 0.2816361450291773, | |
| "train_runtime": 6563.2408, | |
| "train_samples_per_second": 123.381, | |
| "train_steps_per_second": 15.425 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 101240, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.1738359250944e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |