| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.474576271186441, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.994067796610171e-06, | |
| "loss": 2.3903, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.986440677966102e-06, | |
| "loss": 1.2215, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 9.977966101694917e-06, | |
| "loss": 1.1136, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 9.96949152542373e-06, | |
| "loss": 1.1659, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 9.961016949152543e-06, | |
| "loss": 0.9895, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 9.953389830508475e-06, | |
| "loss": 1.2496, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 9.944915254237288e-06, | |
| "loss": 1.058, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.936440677966102e-06, | |
| "loss": 0.8977, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 9.927966101694915e-06, | |
| "loss": 1.0084, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 9.91949152542373e-06, | |
| "loss": 1.1805, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.911016949152543e-06, | |
| "loss": 0.971, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 9.902542372881356e-06, | |
| "loss": 0.8166, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 9.89406779661017e-06, | |
| "loss": 0.713, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 9.885593220338984e-06, | |
| "loss": 0.7842, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 9.877118644067798e-06, | |
| "loss": 0.6229, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 9.86949152542373e-06, | |
| "loss": 0.9259, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 9.861016949152544e-06, | |
| "loss": 0.8035, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 9.852542372881356e-06, | |
| "loss": 1.7852, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.844067796610171e-06, | |
| "loss": 0.79, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 9.835593220338984e-06, | |
| "loss": 0.8361, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 9.827118644067797e-06, | |
| "loss": 0.789, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 9.818644067796612e-06, | |
| "loss": 0.9851, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 9.810169491525425e-06, | |
| "loss": 0.7422, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 9.801694915254238e-06, | |
| "loss": 0.7643, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 9.79322033898305e-06, | |
| "loss": 0.6639, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 9.784745762711865e-06, | |
| "loss": 0.5698, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 9.776271186440678e-06, | |
| "loss": 0.5661, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 9.767796610169491e-06, | |
| "loss": 0.8881, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.759322033898306e-06, | |
| "loss": 0.5563, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 9.750847457627119e-06, | |
| "loss": 0.7705, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 9.742372881355932e-06, | |
| "loss": 0.5988, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 9.733898305084747e-06, | |
| "loss": 0.7516, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 9.72542372881356e-06, | |
| "loss": 1.0188, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 9.717796610169492e-06, | |
| "loss": 1.6122, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 9.709322033898307e-06, | |
| "loss": 1.5343, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 9.70084745762712e-06, | |
| "loss": 0.8529, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 9.692372881355932e-06, | |
| "loss": 0.7866, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 9.683898305084747e-06, | |
| "loss": 0.8368, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 9.67542372881356e-06, | |
| "loss": 0.6965, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 9.666949152542375e-06, | |
| "loss": 0.9716, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 9.659322033898307e-06, | |
| "loss": 0.8647, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 9.65084745762712e-06, | |
| "loss": 0.7379, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 9.642372881355933e-06, | |
| "loss": 0.5591, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 9.633898305084746e-06, | |
| "loss": 0.5949, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 9.62542372881356e-06, | |
| "loss": 0.6818, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 9.616949152542374e-06, | |
| "loss": 0.607, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 9.608474576271187e-06, | |
| "loss": 0.6291, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 9.600000000000001e-06, | |
| "loss": 0.3769, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 9.591525423728814e-06, | |
| "loss": 0.5266, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 9.583050847457627e-06, | |
| "loss": 0.6024, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 9.57457627118644e-06, | |
| "loss": 0.5602, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 9.566101694915255e-06, | |
| "loss": 0.6886, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 9.557627118644068e-06, | |
| "loss": 0.6933, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 9.549152542372883e-06, | |
| "loss": 0.4223, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 9.540677966101696e-06, | |
| "loss": 0.4604, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 9.532203389830508e-06, | |
| "loss": 0.3966, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 9.523728813559323e-06, | |
| "loss": 0.4864, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 9.515254237288136e-06, | |
| "loss": 0.6848, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 9.506779661016949e-06, | |
| "loss": 0.488, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 9.498305084745764e-06, | |
| "loss": 0.5127, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 9.489830508474577e-06, | |
| "loss": 0.4368, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 9.481355932203391e-06, | |
| "loss": 0.4431, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 9.472881355932204e-06, | |
| "loss": 0.7604, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 9.464406779661017e-06, | |
| "loss": 0.4196, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 9.455932203389832e-06, | |
| "loss": 0.5376, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 9.447457627118645e-06, | |
| "loss": 0.4184, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 9.43898305084746e-06, | |
| "loss": 0.3217, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 9.430508474576273e-06, | |
| "loss": 0.4748, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 9.422033898305086e-06, | |
| "loss": 0.6145, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 9.413559322033899e-06, | |
| "loss": 0.3761, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 9.405084745762713e-06, | |
| "loss": 0.3156, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 9.396610169491526e-06, | |
| "loss": 0.4638, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 9.38813559322034e-06, | |
| "loss": 0.488, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 6.27, | |
| "learning_rate": 9.379661016949152e-06, | |
| "loss": 0.5872, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 9.371186440677967e-06, | |
| "loss": 0.5414, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 9.36271186440678e-06, | |
| "loss": 0.3205, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 9.354237288135593e-06, | |
| "loss": 0.4158, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 9.346610169491525e-06, | |
| "loss": 0.3371, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 9.33813559322034e-06, | |
| "loss": 0.4453, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 9.329661016949153e-06, | |
| "loss": 0.4018, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 9.321186440677967e-06, | |
| "loss": 0.4046, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 9.31271186440678e-06, | |
| "loss": 0.438, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 9.304237288135593e-06, | |
| "loss": 0.4067, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 9.295762711864408e-06, | |
| "loss": 0.3016, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 9.287288135593221e-06, | |
| "loss": 0.3699, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 9.278813559322036e-06, | |
| "loss": 0.3857, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 9.270338983050849e-06, | |
| "loss": 0.4237, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 9.261864406779662e-06, | |
| "loss": 0.3207, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 7.54, | |
| "learning_rate": 9.253389830508476e-06, | |
| "loss": 0.2815, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 9.24491525423729e-06, | |
| "loss": 0.2975, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 9.236440677966102e-06, | |
| "loss": 0.3149, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 9.227966101694917e-06, | |
| "loss": 0.3816, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 9.21949152542373e-06, | |
| "loss": 0.3822, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 9.211016949152543e-06, | |
| "loss": 0.3699, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 9.202542372881356e-06, | |
| "loss": 0.2304, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 9.19406779661017e-06, | |
| "loss": 0.3732, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 9.185593220338984e-06, | |
| "loss": 0.3458, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 9.177118644067797e-06, | |
| "loss": 0.3233, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 9.168644067796611e-06, | |
| "loss": 0.3938, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 9.160169491525424e-06, | |
| "loss": 0.3358, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 11800, | |
| "num_train_epochs": 100, | |
| "save_steps": 1000, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |