| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 16844, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019956548524376711, | |
| "loss": 0.3719, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001982657170365362, | |
| "loss": 0.3765, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019611199074762167, | |
| "loss": 0.3756, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000193123022894092, | |
| "loss": 0.3751, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00018932478848871238, | |
| "loss": 0.376, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00018475029530941827, | |
| "loss": 0.3765, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00017943929705198342, | |
| "loss": 0.3737, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00017343794785867154, | |
| "loss": 0.37, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00016679840122511857, | |
| "loss": 0.3791, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00015957835677106406, | |
| "loss": 0.3706, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00015184055881362684, | |
| "loss": 0.3785, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00014365225110067207, | |
| "loss": 0.3701, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00013508459244279678, | |
| "loss": 0.3733, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00012621203832226526, | |
| "loss": 0.3713, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00011711169385289445, | |
| "loss": 0.3731, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00010786264371385917, | |
| "loss": 0.377, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 9.854526488049042e-05, | |
| "loss": 0.3731, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 8.924052812463844e-05, | |
| "loss": 0.3762, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.002929435476878e-05, | |
| "loss": 0.3777, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.099161191080386e-05, | |
| "loss": 0.3699, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.220602092042465e-05, | |
| "loss": 0.3778, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.3748870762182066e-05, | |
| "loss": 0.371, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.5693656566864785e-05, | |
| "loss": 0.3747, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.81103805231225e-05, | |
| "loss": 0.3703, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.1064943537786984e-05, | |
| "loss": 0.3738, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.4618572537543038e-05, | |
| "loss": 0.3739, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.882728838886583e-05, | |
| "loss": 0.372, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.3741419060158056e-05, | |
| "loss": 0.3716, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 9.405162256851662e-06, | |
| "loss": 0.3698, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.8562013303037124e-06, | |
| "loss": 0.3743, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.1253777983517363e-06, | |
| "loss": 0.376, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.236423323421776e-06, | |
| "loss": 0.3746, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.0575347737803452e-07, | |
| "loss": 0.3738, | |
| "step": 16500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 16844, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 3.457848488949235e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |