| { | |
| "best_metric": 1.2033756971359253, | |
| "best_model_checkpoint": "output/rihanna/checkpoint-917", | |
| "epoch": 7.0, | |
| "global_step": 917, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00013670742670262692, | |
| "loss": 3.4044, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00013523678052634687, | |
| "loss": 2.8194, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00013280918103490095, | |
| "loss": 2.7475, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00012945949034742042, | |
| "loss": 2.9331, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00012523581249268407, | |
| "loss": 2.8944, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00012019880259978666, | |
| "loss": 2.6589, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00011442079584574986, | |
| "loss": 2.7123, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00010798476866903087, | |
| "loss": 2.5847, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00010098314716666811, | |
| "loss": 2.391, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 9.351647978736063e-05, | |
| "loss": 2.5899, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 8.5691993381587e-05, | |
| "loss": 2.3867, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 7.762205334494898e-05, | |
| "loss": 2.4515, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 6.942254996821776e-05, | |
| "loss": 2.7458, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 6.121123416728538e-05, | |
| "loss": 2.4949, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 5.310602649316754e-05, | |
| "loss": 2.5412, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.5223323705920566e-05, | |
| "loss": 2.5738, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.7676327231320786e-05, | |
| "loss": 2.2778, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.0573417504900444e-05, | |
| "loss": 2.0957, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.401659754895943e-05, | |
| "loss": 2.3988, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.8100028133934438e-05, | |
| "loss": 2.3799, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.2908675560288951e-05, | |
| "loss": 2.5075, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.517091479772992e-06, | |
| "loss": 2.1984, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.988342278719811e-06, | |
| "loss": 2.4497, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.3731033982246404e-06, | |
| "loss": 2.3199, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 7.089315974356758e-07, | |
| "loss": 2.3814, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.9725610793441152e-08, | |
| "loss": 2.1807, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.336484432220459, | |
| "eval_runtime": 8.0556, | |
| "eval_samples_per_second": 22.593, | |
| "eval_steps_per_second": 2.855, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.153829445781081e-07, | |
| "loss": 2.3816, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.5916577414195624e-06, | |
| "loss": 2.1418, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.8302217539407e-06, | |
| "loss": 2.1866, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 6.998927551907465e-06, | |
| "loss": 2.3514, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.1052270183036815e-05, | |
| "loss": 2.3302, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.5932040657672757e-05, | |
| "loss": 2.0237, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.1568161872809022e-05, | |
| "loss": 2.1338, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.7879694970972374e-05, | |
| "loss": 2.1066, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.477600168191081e-05, | |
| "loss": 2.1939, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 4.215804595500327e-05, | |
| "loss": 2.1481, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.991981618998877e-05, | |
| "loss": 2.4364, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 5.794984764173325e-05, | |
| "loss": 2.1677, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 6.613282313617852e-05, | |
| "loss": 2.3095, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 7.435122911001204e-05, | |
| "loss": 2.4404, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 8.248704319210595e-05, | |
| "loss": 2.0718, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 9.042342909181217e-05, | |
| "loss": 2.2015, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 9.804641445426643e-05, | |
| "loss": 2.1899, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00010524652758746261, | |
| "loss": 2.1465, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00011192036955648884, | |
| "loss": 2.3478, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.00011797209906849287, | |
| "loss": 2.3704, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.0001233148088243337, | |
| "loss": 2.2886, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00012787177357149405, | |
| "loss": 2.1345, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00013157755193529395, | |
| "loss": 2.0747, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00013437892620529645, | |
| "loss": 2.2118, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.000136235666580879, | |
| "loss": 2.1172, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.00013712110890084145, | |
| "loss": 2.2353, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.237405776977539, | |
| "eval_runtime": 8.3034, | |
| "eval_samples_per_second": 22.521, | |
| "eval_steps_per_second": 2.89, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 0.0001366998287631265, | |
| "loss": 2.0176, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.00013520660867542716, | |
| "loss": 2.0043, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.00013274211424821946, | |
| "loss": 1.743, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 0.00012934228335981023, | |
| "loss": 2.0598, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 0.00012505669320030482, | |
| "loss": 1.9087, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 0.00011994783732453754, | |
| "loss": 1.6869, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 0.00011409021435531858, | |
| "loss": 1.726, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.00010756924162575734, | |
| "loss": 1.966, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 0.00010048000960220251, | |
| "loss": 2.0242, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 9.292589525111797e-05, | |
| "loss": 1.804, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 8.501705457012652e-05, | |
| "loss": 1.7316, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 7.686881626551514e-05, | |
| "loss": 2.1338, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 6.860000000000001e-05, | |
| "loss": 2.067, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 6.03311837344849e-05, | |
| "loss": 1.993, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 5.218294542987351e-05, | |
| "loss": 1.8933, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 4.427410474888207e-05, | |
| "loss": 1.7142, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 3.6719990397797524e-05, | |
| "loss": 1.9927, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 2.9630758374242683e-05, | |
| "loss": 1.8104, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.310978564468145e-05, | |
| "loss": 1.8292, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 1.7252162675462497e-05, | |
| "loss": 2.0388, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.214330679969522e-05, | |
| "loss": 1.8121, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 7.8577166401898e-06, | |
| "loss": 1.8632, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 4.457885751780558e-06, | |
| "loss": 2.0386, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.9933913245728472e-06, | |
| "loss": 1.7312, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 5.001712368734975e-07, | |
| "loss": 2.0118, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.7726, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.8738644123077393, | |
| "eval_runtime": 4.2323, | |
| "eval_samples_per_second": 45.838, | |
| "eval_steps_per_second": 5.907, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 5.001712368734899e-07, | |
| "loss": 1.519, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.9933913245728396e-06, | |
| "loss": 1.6714, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 4.457885751780535e-06, | |
| "loss": 1.6067, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 7.857716640189778e-06, | |
| "loss": 1.4835, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 1.2143306799695189e-05, | |
| "loss": 1.9692, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 1.725216267546246e-05, | |
| "loss": 1.5852, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 2.310978564468141e-05, | |
| "loss": 1.766, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 2.9630758374242642e-05, | |
| "loss": 1.7481, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 3.671999039779748e-05, | |
| "loss": 1.9411, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 4.427410474888202e-05, | |
| "loss": 1.5399, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 5.218294542987346e-05, | |
| "loss": 1.9887, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 6.033118373448485e-05, | |
| "loss": 1.6023, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 6.859999999999997e-05, | |
| "loss": 1.665, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 7.68688162655151e-05, | |
| "loss": 1.6708, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 8.501705457012648e-05, | |
| "loss": 1.7596, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 9.292589525111793e-05, | |
| "loss": 2.0391, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 0.00010048000960220248, | |
| "loss": 1.6346, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 0.00010756924162575731, | |
| "loss": 1.5059, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 0.00011409021435531856, | |
| "loss": 1.7107, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 0.0001199478373245375, | |
| "loss": 1.6263, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 0.0001250566932003048, | |
| "loss": 1.7098, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 0.00012934228335981018, | |
| "loss": 1.5807, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 0.00013274211424821943, | |
| "loss": 1.9171, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 0.00013520660867542716, | |
| "loss": 1.6038, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 0.00013669982876312649, | |
| "loss": 1.548, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 1.8101, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.8380614519119263, | |
| "eval_runtime": 4.2339, | |
| "eval_samples_per_second": 45.821, | |
| "eval_steps_per_second": 5.905, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 0.0001366998287631265, | |
| "loss": 1.7809, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 0.0001352066086754272, | |
| "loss": 1.71, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 0.0001327421142482195, | |
| "loss": 1.3283, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 0.00012934228335981015, | |
| "loss": 1.4905, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 0.00012505669320030482, | |
| "loss": 1.2511, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 0.00011994783732453755, | |
| "loss": 1.6209, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 0.00011409021435531858, | |
| "loss": 1.6988, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 0.00010756924162575738, | |
| "loss": 1.2228, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 0.00010048000960220263, | |
| "loss": 1.6827, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 9.292589525111788e-05, | |
| "loss": 1.6977, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 8.501705457012643e-05, | |
| "loss": 1.4269, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 7.686881626551516e-05, | |
| "loss": 1.6831, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 6.860000000000003e-05, | |
| "loss": 1.0505, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 6.033118373448492e-05, | |
| "loss": 1.4459, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 5.218294542987365e-05, | |
| "loss": 1.4365, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 4.42741047488822e-05, | |
| "loss": 1.6545, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 3.671999039779743e-05, | |
| "loss": 1.6826, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 2.9630758374242696e-05, | |
| "loss": 1.6816, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 2.3109785644681465e-05, | |
| "loss": 1.43, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 1.7252162675462504e-05, | |
| "loss": 1.1238, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.2143306799695228e-05, | |
| "loss": 1.1441, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 7.857716640189861e-06, | |
| "loss": 1.5854, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 4.4578857517805195e-06, | |
| "loss": 1.2825, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 1.9933913245728244e-06, | |
| "loss": 1.3848, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 5.001712368734975e-07, | |
| "loss": 1.4917, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.0341, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.7726789712905884, | |
| "eval_runtime": 4.2405, | |
| "eval_samples_per_second": 45.75, | |
| "eval_steps_per_second": 5.896, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 5.001712368734899e-07, | |
| "loss": 1.3946, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "learning_rate": 1.9933913245728015e-06, | |
| "loss": 1.1575, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 4.457885751780535e-06, | |
| "loss": 1.2407, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 7.857716640189824e-06, | |
| "loss": 1.299, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 1.2143306799695106e-05, | |
| "loss": 1.4759, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 1.725216267546245e-05, | |
| "loss": 1.2347, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 2.3109785644681495e-05, | |
| "loss": 1.4233, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 2.963075837424263e-05, | |
| "loss": 1.4062, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 3.6719990397797463e-05, | |
| "loss": 1.2485, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 4.4274104748882125e-05, | |
| "loss": 1.4536, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 5.2182945429873444e-05, | |
| "loss": 1.2116, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 6.033118373448483e-05, | |
| "loss": 1.1743, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 6.859999999999984e-05, | |
| "loss": 1.2798, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 7.686881626551508e-05, | |
| "loss": 1.1637, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 8.501705457012647e-05, | |
| "loss": 1.4529, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 9.292589525111778e-05, | |
| "loss": 1.1575, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 0.00010048000960220244, | |
| "loss": 1.4035, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 0.0001075692416257573, | |
| "loss": 1.2311, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 0.00011409021435531843, | |
| "loss": 1.3514, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 0.00011994783732453749, | |
| "loss": 1.0519, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 0.00012505669320030485, | |
| "loss": 1.39, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 0.00012934228335981013, | |
| "loss": 1.2267, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 0.00013274211424821943, | |
| "loss": 1.431, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 0.00013520660867542716, | |
| "loss": 1.5557, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 0.00013669982876312649, | |
| "loss": 1.2241, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 1.1435, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.752521276473999, | |
| "eval_runtime": 4.2466, | |
| "eval_samples_per_second": 45.684, | |
| "eval_steps_per_second": 5.887, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 0.00013718027438920657, | |
| "loss": 1.3027, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.2558701038360596, | |
| "eval_runtime": 3.8315, | |
| "eval_samples_per_second": 47.762, | |
| "eval_steps_per_second": 6.003, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 0.0001368846170554219, | |
| "loss": 1.0307, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 0.00013560834225858045, | |
| "loss": 1.3396, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 6.11, | |
| "learning_rate": 0.0001333697782460593, | |
| "loss": 1.3763, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 0.0001302010724480925, | |
| "loss": 1.3968, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 6.18, | |
| "learning_rate": 0.00012614772981696314, | |
| "loss": 1.2598, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 0.00012126795934232731, | |
| "loss": 1.6357, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 0.00011563183812719099, | |
| "loss": 1.5069, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 0.00010932030502902761, | |
| "loss": 1.3309, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 0.00010242399831808912, | |
| "loss": 1.2963, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 9.504195404499683e-05, | |
| "loss": 1.5269, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 8.728018381001126e-05, | |
| "loss": 1.1606, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 7.925015235826672e-05, | |
| "loss": 1.4031, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 7.10671768638214e-05, | |
| "loss": 1.1422, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 6.284877088998812e-05, | |
| "loss": 1.2245, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 5.4712956807894146e-05, | |
| "loss": 1.5305, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 4.677657090818787e-05, | |
| "loss": 1.4329, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 3.915358554573355e-05, | |
| "loss": 1.0966, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 3.1953472412537526e-05, | |
| "loss": 1.1648, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 2.5279630443511272e-05, | |
| "loss": 1.5295, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 1.9227900931507197e-05, | |
| "loss": 1.1119, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 1.388519117566634e-05, | |
| "loss": 1.0333, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 9.328226428505963e-06, | |
| "loss": 1.3515, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 5.622448064706006e-06, | |
| "loss": 1.2529, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 2.8210737947035045e-06, | |
| "loss": 1.0788, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 9.643334191210031e-07, | |
| "loss": 1.4072, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 7.88910991585708e-08, | |
| "loss": 0.9972, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.2033756971359253, | |
| "eval_runtime": 4.6484, | |
| "eval_samples_per_second": 39.368, | |
| "eval_steps_per_second": 4.948, | |
| "step": 917 | |
| } | |
| ], | |
| "max_steps": 917, | |
| "num_train_epochs": 7, | |
| "total_flos": 953846562816000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |