| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 3170, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.15772870662460567, |
| "grad_norm": 8.993573188781738, |
| "learning_rate": 3.390694006309148e-05, |
| "loss": 2.6552, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.31545741324921134, |
| "grad_norm": 6.61809778213501, |
| "learning_rate": 3.280283911671924e-05, |
| "loss": 2.2263, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.47318611987381703, |
| "grad_norm": 10.127768516540527, |
| "learning_rate": 3.1698738170347005e-05, |
| "loss": 1.9796, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6309148264984227, |
| "grad_norm": 4.960940361022949, |
| "learning_rate": 3.0594637223974764e-05, |
| "loss": 2.0323, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7886435331230284, |
| "grad_norm": 5.231279373168945, |
| "learning_rate": 2.9490536277602523e-05, |
| "loss": 2.0473, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7886435331230284, |
| "eval_bleu": 57.32056474546698, |
| "eval_chrf": 81.52338542769736, |
| "eval_loss": 0.7680727243423462, |
| "eval_runtime": 43.2314, |
| "eval_samples_per_second": 5.181, |
| "eval_steps_per_second": 0.648, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9463722397476341, |
| "grad_norm": 4.041931629180908, |
| "learning_rate": 2.838643533123028e-05, |
| "loss": 1.9593, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.1041009463722398, |
| "grad_norm": 5.660284519195557, |
| "learning_rate": 2.728233438485804e-05, |
| "loss": 1.9291, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.2618296529968454, |
| "grad_norm": 4.831106662750244, |
| "learning_rate": 2.6178233438485802e-05, |
| "loss": 1.9062, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.4195583596214512, |
| "grad_norm": 5.246473789215088, |
| "learning_rate": 2.5074132492113564e-05, |
| "loss": 1.8709, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.5772870662460567, |
| "grad_norm": 6.091129779815674, |
| "learning_rate": 2.3970031545741323e-05, |
| "loss": 1.8274, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.5772870662460567, |
| "eval_bleu": 56.853343569870646, |
| "eval_chrf": 81.1312982973412, |
| "eval_loss": 0.8501662015914917, |
| "eval_runtime": 25.1617, |
| "eval_samples_per_second": 8.902, |
| "eval_steps_per_second": 1.113, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.7350157728706623, |
| "grad_norm": 4.976201057434082, |
| "learning_rate": 2.286593059936908e-05, |
| "loss": 1.8565, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.8927444794952681, |
| "grad_norm": 6.352173805236816, |
| "learning_rate": 2.1761829652996844e-05, |
| "loss": 1.7704, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.050473186119874, |
| "grad_norm": 5.1022233963012695, |
| "learning_rate": 2.0657728706624606e-05, |
| "loss": 1.7229, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.2082018927444795, |
| "grad_norm": 3.9384002685546875, |
| "learning_rate": 1.9553627760252364e-05, |
| "loss": 1.6988, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.365930599369085, |
| "grad_norm": 9.005499839782715, |
| "learning_rate": 1.8449526813880123e-05, |
| "loss": 1.7559, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.365930599369085, |
| "eval_bleu": 55.558969113821774, |
| "eval_chrf": 79.91366807649794, |
| "eval_loss": 0.8783891797065735, |
| "eval_runtime": 25.3515, |
| "eval_samples_per_second": 8.836, |
| "eval_steps_per_second": 1.104, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.5236593059936907, |
| "grad_norm": 5.816145420074463, |
| "learning_rate": 1.7345425867507885e-05, |
| "loss": 1.7198, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.6813880126182967, |
| "grad_norm": 4.037476062774658, |
| "learning_rate": 1.6241324921135647e-05, |
| "loss": 1.7311, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.8391167192429023, |
| "grad_norm": 4.826643943786621, |
| "learning_rate": 1.5137223974763406e-05, |
| "loss": 1.7583, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.996845425867508, |
| "grad_norm": 5.530210018157959, |
| "learning_rate": 1.4033123028391166e-05, |
| "loss": 1.6305, |
| "step": 1900 |
| }, |
| { |
| "epoch": 3.1545741324921135, |
| "grad_norm": 5.22488260269165, |
| "learning_rate": 1.2929022082018927e-05, |
| "loss": 1.7053, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.1545741324921135, |
| "eval_bleu": 55.323355114843025, |
| "eval_chrf": 79.52359278149143, |
| "eval_loss": 0.9229835271835327, |
| "eval_runtime": 24.6216, |
| "eval_samples_per_second": 9.098, |
| "eval_steps_per_second": 1.137, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.312302839116719, |
| "grad_norm": 6.181191444396973, |
| "learning_rate": 1.1824921135646685e-05, |
| "loss": 1.6653, |
| "step": 2100 |
| }, |
| { |
| "epoch": 3.470031545741325, |
| "grad_norm": 3.005113363265991, |
| "learning_rate": 1.0720820189274447e-05, |
| "loss": 1.6738, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.6277602523659307, |
| "grad_norm": 4.87603759765625, |
| "learning_rate": 9.616719242902206e-06, |
| "loss": 1.6694, |
| "step": 2300 |
| }, |
| { |
| "epoch": 3.7854889589905363, |
| "grad_norm": 3.4485676288604736, |
| "learning_rate": 8.512618296529968e-06, |
| "loss": 1.6154, |
| "step": 2400 |
| }, |
| { |
| "epoch": 3.943217665615142, |
| "grad_norm": 4.3030219078063965, |
| "learning_rate": 7.4085173501577285e-06, |
| "loss": 1.5943, |
| "step": 2500 |
| }, |
| { |
| "epoch": 3.943217665615142, |
| "eval_bleu": 55.5956316646253, |
| "eval_chrf": 79.72369308541496, |
| "eval_loss": 0.9278033375740051, |
| "eval_runtime": 25.7316, |
| "eval_samples_per_second": 8.705, |
| "eval_steps_per_second": 1.088, |
| "step": 2500 |
| }, |
| { |
| "epoch": 4.100946372239748, |
| "grad_norm": 4.7534284591674805, |
| "learning_rate": 6.304416403785489e-06, |
| "loss": 1.6937, |
| "step": 2600 |
| }, |
| { |
| "epoch": 4.2586750788643535, |
| "grad_norm": 5.381317138671875, |
| "learning_rate": 5.200315457413248e-06, |
| "loss": 1.5835, |
| "step": 2700 |
| }, |
| { |
| "epoch": 4.416403785488959, |
| "grad_norm": 7.387664794921875, |
| "learning_rate": 4.09621451104101e-06, |
| "loss": 1.6145, |
| "step": 2800 |
| }, |
| { |
| "epoch": 4.574132492113565, |
| "grad_norm": 4.751382827758789, |
| "learning_rate": 2.9921135646687696e-06, |
| "loss": 1.6218, |
| "step": 2900 |
| }, |
| { |
| "epoch": 4.73186119873817, |
| "grad_norm": 4.115559101104736, |
| "learning_rate": 1.8880126182965297e-06, |
| "loss": 1.6476, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.73186119873817, |
| "eval_bleu": 55.67318568538766, |
| "eval_chrf": 79.82548413247255, |
| "eval_loss": 0.9428688287734985, |
| "eval_runtime": 24.3618, |
| "eval_samples_per_second": 9.195, |
| "eval_steps_per_second": 1.149, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.889589905362776, |
| "grad_norm": 8.612651824951172, |
| "learning_rate": 7.839116719242902e-07, |
| "loss": 1.6411, |
| "step": 3100 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 3170, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8671674976174080.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|