| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 400, |
| "global_step": 2806, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07127583749109052, |
| "grad_norm": 6.532841205596924, |
| "learning_rate": 4.823592302209551e-05, |
| "loss": 0.9484, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14255167498218105, |
| "grad_norm": 6.6614179611206055, |
| "learning_rate": 4.645402708481825e-05, |
| "loss": 0.8389, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.21382751247327156, |
| "grad_norm": 4.321991443634033, |
| "learning_rate": 4.467213114754098e-05, |
| "loss": 0.8474, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.2851033499643621, |
| "grad_norm": 4.072144985198975, |
| "learning_rate": 4.2890235210263726e-05, |
| "loss": 0.799, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2851033499643621, |
| "eval_bleu": 53.442676314145174, |
| "eval_chrf": 76.35865447513757, |
| "eval_loss": 1.1025807857513428, |
| "eval_runtime": 47.1926, |
| "eval_samples_per_second": 8.984, |
| "eval_steps_per_second": 1.123, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3563791874554526, |
| "grad_norm": 2.360319137573242, |
| "learning_rate": 4.1108339272986455e-05, |
| "loss": 0.7506, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4276550249465431, |
| "grad_norm": 3.7323949337005615, |
| "learning_rate": 3.93264433357092e-05, |
| "loss": 0.7883, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4989308624376336, |
| "grad_norm": 3.8180646896362305, |
| "learning_rate": 3.754454739843193e-05, |
| "loss": 0.824, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5702066999287242, |
| "grad_norm": 5.235339641571045, |
| "learning_rate": 3.5762651461154676e-05, |
| "loss": 0.768, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5702066999287242, |
| "eval_bleu": 54.05983069722315, |
| "eval_chrf": 76.51999122784662, |
| "eval_loss": 1.0922574996948242, |
| "eval_runtime": 46.8721, |
| "eval_samples_per_second": 9.046, |
| "eval_steps_per_second": 1.131, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6414825374198146, |
| "grad_norm": 4.520367622375488, |
| "learning_rate": 3.3980755523877405e-05, |
| "loss": 0.831, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7127583749109052, |
| "grad_norm": 2.8413846492767334, |
| "learning_rate": 3.219885958660015e-05, |
| "loss": 0.7326, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7840342124019958, |
| "grad_norm": 3.517821788787842, |
| "learning_rate": 3.0416963649322883e-05, |
| "loss": 0.7319, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.8553100498930862, |
| "grad_norm": 5.297749042510986, |
| "learning_rate": 2.8635067712045616e-05, |
| "loss": 0.7882, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.8553100498930862, |
| "eval_bleu": 53.80037138937644, |
| "eval_chrf": 76.47119035110367, |
| "eval_loss": 1.1048208475112915, |
| "eval_runtime": 49.1983, |
| "eval_samples_per_second": 8.618, |
| "eval_steps_per_second": 1.077, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.9265858873841768, |
| "grad_norm": 6.76516580581665, |
| "learning_rate": 2.6853171774768355e-05, |
| "loss": 0.7629, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9978617248752673, |
| "grad_norm": 4.310391902923584, |
| "learning_rate": 2.5071275837491094e-05, |
| "loss": 0.7552, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.0691375623663577, |
| "grad_norm": 2.064706563949585, |
| "learning_rate": 2.328937990021383e-05, |
| "loss": 0.6589, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.1404133998574484, |
| "grad_norm": 3.789889097213745, |
| "learning_rate": 2.1507483962936566e-05, |
| "loss": 0.7507, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.1404133998574484, |
| "eval_bleu": 53.46559824716441, |
| "eval_chrf": 76.43243789812155, |
| "eval_loss": 1.1045643091201782, |
| "eval_runtime": 46.1773, |
| "eval_samples_per_second": 9.182, |
| "eval_steps_per_second": 1.148, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.2116892373485388, |
| "grad_norm": 4.300589084625244, |
| "learning_rate": 1.97255880256593e-05, |
| "loss": 0.7268, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.2829650748396293, |
| "grad_norm": 5.685310363769531, |
| "learning_rate": 1.7943692088382037e-05, |
| "loss": 0.737, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.3542409123307197, |
| "grad_norm": 3.9357213973999023, |
| "learning_rate": 1.6161796151104776e-05, |
| "loss": 0.7294, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.4255167498218104, |
| "grad_norm": 3.379863739013672, |
| "learning_rate": 1.4379900213827512e-05, |
| "loss": 0.7357, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.4255167498218104, |
| "eval_bleu": 52.95209070310886, |
| "eval_chrf": 76.15023196589239, |
| "eval_loss": 1.1049425601959229, |
| "eval_runtime": 45.4363, |
| "eval_samples_per_second": 9.332, |
| "eval_steps_per_second": 1.166, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.4967925873129009, |
| "grad_norm": 4.859795570373535, |
| "learning_rate": 1.2598004276550248e-05, |
| "loss": 0.7533, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.5680684248039913, |
| "grad_norm": 2.6143312454223633, |
| "learning_rate": 1.0816108339272987e-05, |
| "loss": 0.6788, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.639344262295082, |
| "grad_norm": 4.200282096862793, |
| "learning_rate": 9.034212401995725e-06, |
| "loss": 0.7189, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.7106200997861725, |
| "grad_norm": 2.8179945945739746, |
| "learning_rate": 7.2523164647184606e-06, |
| "loss": 0.7166, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.7106200997861725, |
| "eval_bleu": 53.72965343495465, |
| "eval_chrf": 76.6875373434398, |
| "eval_loss": 1.110432744026184, |
| "eval_runtime": 47.0759, |
| "eval_samples_per_second": 9.007, |
| "eval_steps_per_second": 1.126, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.781895937277263, |
| "grad_norm": 6.191401481628418, |
| "learning_rate": 5.470420527441197e-06, |
| "loss": 0.7356, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.8531717747683536, |
| "grad_norm": 3.071488857269287, |
| "learning_rate": 3.6885245901639347e-06, |
| "loss": 0.7243, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.924447612259444, |
| "grad_norm": 6.51959753036499, |
| "learning_rate": 1.9066286528866714e-06, |
| "loss": 0.7095, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.9957234497505345, |
| "grad_norm": 3.0172016620635986, |
| "learning_rate": 1.2473271560940842e-07, |
| "loss": 0.7087, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.9957234497505345, |
| "eval_bleu": 53.66818723050837, |
| "eval_chrf": 76.56160951915358, |
| "eval_loss": 1.1105611324310303, |
| "eval_runtime": 47.0822, |
| "eval_samples_per_second": 9.006, |
| "eval_steps_per_second": 1.126, |
| "step": 2800 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2806, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 400, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7682289240637440.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|