| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 9.515256253019661, | |
| "global_step": 32000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0002908060067422617, | |
| "loss": 1.9513, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00027241802022678516, | |
| "loss": 1.0525, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00025403003371130857, | |
| "loss": 1.0107, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.00023564204719583203, | |
| "loss": 0.9832, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 0.0002172632546736132, | |
| "loss": 0.966, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 0.00019887526815813667, | |
| "loss": 0.9513, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 0.00018049647563591785, | |
| "loss": 0.9392, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 0.00016211768311369905, | |
| "loss": 0.9287, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 0.00014376647257125343, | |
| "loss": 0.9208, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 0.0001253784860557769, | |
| "loss": 0.9114, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 0.00010699049954030032, | |
| "loss": 0.9035, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 8.860251302482378e-05, | |
| "loss": 0.8959, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 7.021452650934722e-05, | |
| "loss": 0.8891, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 5.183573398712841e-05, | |
| "loss": 0.8825, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 3.3447747471651855e-05, | |
| "loss": 0.8769, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 1.5059760956175297e-05, | |
| "loss": 0.8722, | |
| "step": 32000 | |
| } | |
| ], | |
| "max_steps": 33630, | |
| "num_train_epochs": 10, | |
| "total_flos": 9.064780510632545e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |