| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 210, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 5.714285714285714e-05, | |
| "loss": 1.446, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00011428571428571428, | |
| "loss": 1.4474, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00017142857142857143, | |
| "loss": 1.3816, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00019682539682539682, | |
| "loss": 1.1761, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00019047619047619048, | |
| "loss": 1.1279, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00018412698412698412, | |
| "loss": 1.1832, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00017777777777777779, | |
| "loss": 1.02, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.00017142857142857143, | |
| "loss": 0.997, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.0001650793650793651, | |
| "loss": 0.9472, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00015873015873015873, | |
| "loss": 0.9666, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 0.00015238095238095237, | |
| "loss": 0.9449, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.00014603174603174603, | |
| "loss": 0.9606, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.00013968253968253967, | |
| "loss": 0.7714, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.7185, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 0.00012698412698412698, | |
| "loss": 0.8102, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 0.00012063492063492063, | |
| "loss": 0.7726, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 0.00011428571428571428, | |
| "loss": 0.8187, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 0.00010793650793650794, | |
| "loss": 0.7518, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 0.00010158730158730159, | |
| "loss": 0.7197, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 9.523809523809524e-05, | |
| "loss": 0.7031, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 0.6438, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 8.253968253968255e-05, | |
| "loss": 0.7566, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 7.619047619047618e-05, | |
| "loss": 0.6884, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 6.984126984126984e-05, | |
| "loss": 0.6602, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 6.349206349206349e-05, | |
| "loss": 0.6261, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 5.714285714285714e-05, | |
| "loss": 0.6977, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 5.0793650793650794e-05, | |
| "loss": 0.6703, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.6544, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 3.809523809523809e-05, | |
| "loss": 0.6531, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 3.1746031746031745e-05, | |
| "loss": 0.6687, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 2.5396825396825397e-05, | |
| "loss": 0.6178, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 0.5971, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 1.2698412698412699e-05, | |
| "loss": 0.6674, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 6.349206349206349e-06, | |
| "loss": 0.5963, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.636, | |
| "step": 210 | |
| } | |
| ], | |
| "logging_steps": 6, | |
| "max_steps": 210, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "total_flos": 6.657988228861133e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |