| { | |
| "best_metric": 1.9084105491638184, | |
| "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/results/checkpoint-8500", | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 9220, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.567398119122257, | |
| "grad_norm": 1.353641152381897, | |
| "learning_rate": 5e-05, | |
| "loss": 3.359, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.567398119122257, | |
| "eval_bleu": 0.11424038411303619, | |
| "eval_loss": 3.128293514251709, | |
| "eval_rouge1": 0.3297614987151056, | |
| "eval_rouge2": 0.08429294540985294, | |
| "eval_rougeL": 0.2561476738686219, | |
| "eval_runtime": 26.8133, | |
| "eval_samples_per_second": 31.589, | |
| "eval_steps_per_second": 3.953, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.134796238244514, | |
| "grad_norm": 1.156111717224121, | |
| "learning_rate": 2.71689497716895e-05, | |
| "loss": 2.9208, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.134796238244514, | |
| "eval_bleu": 0.1490666503828191, | |
| "eval_loss": 2.729825496673584, | |
| "eval_rouge1": 0.40409071928626966, | |
| "eval_rouge2": 0.14297878002377568, | |
| "eval_rougeL": 0.34083403761346187, | |
| "eval_runtime": 27.234, | |
| "eval_samples_per_second": 31.101, | |
| "eval_steps_per_second": 3.892, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.702194357366771, | |
| "grad_norm": 1.1165863275527954, | |
| "learning_rate": 4.337899543378996e-06, | |
| "loss": 2.619, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.702194357366771, | |
| "eval_bleu": 0.16068905926811505, | |
| "eval_loss": 2.6229476928710938, | |
| "eval_rouge1": 0.4264320027787866, | |
| "eval_rouge2": 0.1630682859845051, | |
| "eval_rougeL": 0.367472815476786, | |
| "eval_runtime": 27.3027, | |
| "eval_samples_per_second": 31.023, | |
| "eval_steps_per_second": 3.882, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.3383947939262475, | |
| "grad_norm": 1.10550856590271, | |
| "learning_rate": 4.139908256880734e-05, | |
| "loss": 2.4047, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.3383947939262475, | |
| "eval_bleu": 0.27212534220096674, | |
| "eval_loss": 2.200192451477051, | |
| "eval_rouge1": 0.49764917064550795, | |
| "eval_rouge2": 0.25417403674525624, | |
| "eval_rougeL": 0.4505978761161964, | |
| "eval_runtime": 29.8301, | |
| "eval_samples_per_second": 31.009, | |
| "eval_steps_per_second": 3.889, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.422993492407809, | |
| "grad_norm": 1.0486189126968384, | |
| "learning_rate": 3.8532110091743125e-05, | |
| "loss": 2.19, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 5.422993492407809, | |
| "eval_bleu": 0.2853635265097057, | |
| "eval_loss": 2.099168539047241, | |
| "eval_rouge1": 0.5205238075842558, | |
| "eval_rouge2": 0.27883621341002174, | |
| "eval_rougeL": 0.4772785679427928, | |
| "eval_runtime": 29.5017, | |
| "eval_samples_per_second": 31.354, | |
| "eval_steps_per_second": 3.932, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.507592190889371, | |
| "grad_norm": 1.0022239685058594, | |
| "learning_rate": 3.56651376146789e-05, | |
| "loss": 2.0473, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 6.507592190889371, | |
| "eval_bleu": 0.29294689624288234, | |
| "eval_loss": 2.0362119674682617, | |
| "eval_rouge1": 0.5380910185587349, | |
| "eval_rouge2": 0.29647105961235576, | |
| "eval_rougeL": 0.49649873151947865, | |
| "eval_runtime": 29.6658, | |
| "eval_samples_per_second": 31.181, | |
| "eval_steps_per_second": 3.91, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 7.592190889370933, | |
| "grad_norm": 1.1853405237197876, | |
| "learning_rate": 3.2798165137614676e-05, | |
| "loss": 1.9397, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 7.592190889370933, | |
| "eval_bleu": 0.2996126116957466, | |
| "eval_loss": 1.9933106899261475, | |
| "eval_rouge1": 0.5494053286639744, | |
| "eval_rouge2": 0.31025003697020603, | |
| "eval_rougeL": 0.5101736274334897, | |
| "eval_runtime": 29.6088, | |
| "eval_samples_per_second": 31.241, | |
| "eval_steps_per_second": 3.918, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 8.676789587852495, | |
| "grad_norm": 1.1255462169647217, | |
| "learning_rate": 2.9931192660550462e-05, | |
| "loss": 1.857, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 8.676789587852495, | |
| "eval_bleu": 0.30241485912380783, | |
| "eval_loss": 1.9647237062454224, | |
| "eval_rouge1": 0.5597611557009092, | |
| "eval_rouge2": 0.3191422306947157, | |
| "eval_rougeL": 0.5202653323875917, | |
| "eval_runtime": 29.9377, | |
| "eval_samples_per_second": 30.897, | |
| "eval_steps_per_second": 3.875, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 9.761388286334057, | |
| "grad_norm": 1.1697229146957397, | |
| "learning_rate": 2.7064220183486238e-05, | |
| "loss": 1.784, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 9.761388286334057, | |
| "eval_bleu": 0.3061719577143718, | |
| "eval_loss": 1.9443068504333496, | |
| "eval_rouge1": 0.567492271856554, | |
| "eval_rouge2": 0.3269182124324805, | |
| "eval_rougeL": 0.5278573882748132, | |
| "eval_runtime": 29.751, | |
| "eval_samples_per_second": 31.091, | |
| "eval_steps_per_second": 3.899, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 10.845986984815617, | |
| "grad_norm": 1.070591926574707, | |
| "learning_rate": 2.419724770642202e-05, | |
| "loss": 1.7239, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 10.845986984815617, | |
| "eval_bleu": 0.309858394526436, | |
| "eval_loss": 1.931990385055542, | |
| "eval_rouge1": 0.5723606535196859, | |
| "eval_rouge2": 0.3338521436125379, | |
| "eval_rougeL": 0.5341216118802655, | |
| "eval_runtime": 29.6886, | |
| "eval_samples_per_second": 31.157, | |
| "eval_steps_per_second": 3.907, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 11.93058568329718, | |
| "grad_norm": 1.0755261182785034, | |
| "learning_rate": 2.13302752293578e-05, | |
| "loss": 1.6713, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 11.93058568329718, | |
| "eval_bleu": 0.3115672562854492, | |
| "eval_loss": 1.920640230178833, | |
| "eval_rouge1": 0.5765467952167939, | |
| "eval_rouge2": 0.33826641143296676, | |
| "eval_rougeL": 0.5387314433190069, | |
| "eval_runtime": 29.7016, | |
| "eval_samples_per_second": 31.143, | |
| "eval_steps_per_second": 3.906, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 13.015184381778742, | |
| "grad_norm": 1.0826488733291626, | |
| "learning_rate": 1.8463302752293578e-05, | |
| "loss": 1.6263, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 13.015184381778742, | |
| "eval_bleu": 0.31268695772405475, | |
| "eval_loss": 1.916778564453125, | |
| "eval_rouge1": 0.5780842791223908, | |
| "eval_rouge2": 0.34164409810850394, | |
| "eval_rougeL": 0.5415509673961407, | |
| "eval_runtime": 29.789, | |
| "eval_samples_per_second": 31.052, | |
| "eval_steps_per_second": 3.894, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 14.099783080260304, | |
| "grad_norm": 1.0868735313415527, | |
| "learning_rate": 1.559633027522936e-05, | |
| "loss": 1.5869, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 14.099783080260304, | |
| "eval_bleu": 0.31365743559233084, | |
| "eval_loss": 1.9147837162017822, | |
| "eval_rouge1": 0.5829184758698387, | |
| "eval_rouge2": 0.3448101826360943, | |
| "eval_rougeL": 0.5450794961513086, | |
| "eval_runtime": 29.7645, | |
| "eval_samples_per_second": 31.077, | |
| "eval_steps_per_second": 3.897, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 15.184381778741866, | |
| "grad_norm": 1.0827687978744507, | |
| "learning_rate": 1.2729357798165138e-05, | |
| "loss": 1.5544, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 15.184381778741866, | |
| "eval_bleu": 0.315769500599606, | |
| "eval_loss": 1.9121257066726685, | |
| "eval_rouge1": 0.5844681250407762, | |
| "eval_rouge2": 0.34764910748110744, | |
| "eval_rougeL": 0.5476190296456669, | |
| "eval_runtime": 29.7415, | |
| "eval_samples_per_second": 31.101, | |
| "eval_steps_per_second": 3.9, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 16.268980477223426, | |
| "grad_norm": 1.1430450677871704, | |
| "learning_rate": 9.862385321100918e-06, | |
| "loss": 1.5307, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 16.268980477223426, | |
| "eval_bleu": 0.31648880861794926, | |
| "eval_loss": 1.9105726480484009, | |
| "eval_rouge1": 0.5852713451659596, | |
| "eval_rouge2": 0.34877835378762495, | |
| "eval_rougeL": 0.5486197186684263, | |
| "eval_runtime": 29.7345, | |
| "eval_samples_per_second": 31.109, | |
| "eval_steps_per_second": 3.901, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 17.35357917570499, | |
| "grad_norm": 1.0865087509155273, | |
| "learning_rate": 6.995412844036697e-06, | |
| "loss": 1.5087, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 17.35357917570499, | |
| "eval_bleu": 0.31692571547155524, | |
| "eval_loss": 1.9093118906021118, | |
| "eval_rouge1": 0.5860996975913157, | |
| "eval_rouge2": 0.3503907384934047, | |
| "eval_rougeL": 0.5500340150392318, | |
| "eval_runtime": 29.7497, | |
| "eval_samples_per_second": 31.093, | |
| "eval_steps_per_second": 3.899, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 18.43817787418655, | |
| "grad_norm": 1.1252211332321167, | |
| "learning_rate": 4.128440366972477e-06, | |
| "loss": 1.4937, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 18.43817787418655, | |
| "eval_bleu": 0.31723468269919336, | |
| "eval_loss": 1.9084105491638184, | |
| "eval_rouge1": 0.5868586694605076, | |
| "eval_rouge2": 0.350546625127078, | |
| "eval_rougeL": 0.5503666110741787, | |
| "eval_runtime": 29.7351, | |
| "eval_samples_per_second": 31.108, | |
| "eval_steps_per_second": 3.901, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 19.522776572668114, | |
| "grad_norm": 1.150936245918274, | |
| "learning_rate": 1.261467889908257e-06, | |
| "loss": 1.4824, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 19.522776572668114, | |
| "eval_bleu": 0.3177718226409019, | |
| "eval_loss": 1.9086270332336426, | |
| "eval_rouge1": 0.5875550437490973, | |
| "eval_rouge2": 0.3512666976647323, | |
| "eval_rougeL": 0.5509556223633276, | |
| "eval_runtime": 30.1604, | |
| "eval_samples_per_second": 30.669, | |
| "eval_steps_per_second": 3.846, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 9220, | |
| "total_flos": 2.8862709792768e+16, | |
| "train_loss": 1.4422371688478681, | |
| "train_runtime": 3284.8472, | |
| "train_samples_per_second": 22.412, | |
| "train_steps_per_second": 2.807 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 9220, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.8862709792768e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |