| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "global_step": 690, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0001927536231884058, | |
| "loss": 2.497, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0001855072463768116, | |
| "loss": 1.0576, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0001782608695652174, | |
| "loss": 1.063, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001710144927536232, | |
| "loss": 1.5434, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.000163768115942029, | |
| "loss": 1.1751, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001565217391304348, | |
| "loss": 0.9868, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014927536231884058, | |
| "loss": 1.0466, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00014202898550724638, | |
| "loss": 0.7962, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0001347826086956522, | |
| "loss": 1.0158, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00012753623188405797, | |
| "loss": 0.8366, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012028985507246378, | |
| "loss": 1.2794, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00011304347826086956, | |
| "loss": 1.6535, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00010579710144927538, | |
| "loss": 0.878, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 9.855072463768117e-05, | |
| "loss": 0.6866, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.130434782608696e-05, | |
| "loss": 0.5021, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.405797101449276e-05, | |
| "loss": 0.4383, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 7.681159420289855e-05, | |
| "loss": 0.4791, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 6.956521739130436e-05, | |
| "loss": 0.5432, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 6.231884057971015e-05, | |
| "loss": 0.3643, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 5.507246376811594e-05, | |
| "loss": 1.4377, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 4.782608695652174e-05, | |
| "loss": 0.4916, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 4.057971014492754e-05, | |
| "loss": 0.5831, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.7232, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.608695652173913e-05, | |
| "loss": 0.7072, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.8840579710144928e-05, | |
| "loss": 0.5308, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.1594202898550725e-05, | |
| "loss": 0.4856, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 4.347826086956522e-06, | |
| "loss": 0.8379, | |
| "step": 675 | |
| } | |
| ], | |
| "max_steps": 690, | |
| "num_train_epochs": 2, | |
| "start_time": 1678594586.128044, | |
| "total_flos": 3.1496986193182065e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |