| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 272547, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9908272701589084e-05, | |
| "loss": 0.886, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.981654540317817e-05, | |
| "loss": 0.8774, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9724818104767253e-05, | |
| "loss": 0.864, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9633090806356335e-05, | |
| "loss": 0.8635, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.954136350794542e-05, | |
| "loss": 0.8624, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9449636209534504e-05, | |
| "loss": 0.8613, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.935790891112359e-05, | |
| "loss": 0.8532, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.926618161271267e-05, | |
| "loss": 0.8571, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9174454314301755e-05, | |
| "loss": 0.8498, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.908272701589084e-05, | |
| "loss": 0.8409, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.8990999717479925e-05, | |
| "loss": 0.8411, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.8899272419069006e-05, | |
| "loss": 0.8389, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.880754512065809e-05, | |
| "loss": 0.846, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.8715817822247176e-05, | |
| "loss": 0.848, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.862409052383626e-05, | |
| "loss": 0.8395, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.8532363225425346e-05, | |
| "loss": 0.8416, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.844063592701443e-05, | |
| "loss": 0.841, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.8348908628603515e-05, | |
| "loss": 0.8311, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.825718133019259e-05, | |
| "loss": 0.8279, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.816545403178168e-05, | |
| "loss": 0.8227, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.807372673337076e-05, | |
| "loss": 0.8297, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.798199943495985e-05, | |
| "loss": 0.8227, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.789027213654893e-05, | |
| "loss": 0.8332, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.779854483813801e-05, | |
| "loss": 0.8297, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.770681753972709e-05, | |
| "loss": 0.8286, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.761509024131618e-05, | |
| "loss": 0.8063, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.752336294290526e-05, | |
| "loss": 0.81, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.743163564449435e-05, | |
| "loss": 0.8186, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.733990834608343e-05, | |
| "loss": 0.8282, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.724818104767251e-05, | |
| "loss": 0.8184, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.71564537492616e-05, | |
| "loss": 0.8139, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.706472645085068e-05, | |
| "loss": 0.83, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.697299915243977e-05, | |
| "loss": 0.8087, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.6881271854028845e-05, | |
| "loss": 0.8123, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.678954455561793e-05, | |
| "loss": 0.8136, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.6697817257207015e-05, | |
| "loss": 0.8117, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.66060899587961e-05, | |
| "loss": 0.8119, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.6514362660385184e-05, | |
| "loss": 0.809, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.6422635361974266e-05, | |
| "loss": 0.7984, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.633090806356335e-05, | |
| "loss": 0.8055, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.6239180765152435e-05, | |
| "loss": 0.8073, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.614745346674152e-05, | |
| "loss": 0.8068, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.6055726168330605e-05, | |
| "loss": 0.8015, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.5963998869919686e-05, | |
| "loss": 0.8144, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.587227157150877e-05, | |
| "loss": 0.8053, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.5780544273097856e-05, | |
| "loss": 0.807, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.568881697468694e-05, | |
| "loss": 0.8036, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.5597089676276025e-05, | |
| "loss": 0.8063, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.55053623778651e-05, | |
| "loss": 0.8027, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.541363507945419e-05, | |
| "loss": 0.7906, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.532190778104327e-05, | |
| "loss": 0.8034, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.523018048263236e-05, | |
| "loss": 0.8017, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.513845318422144e-05, | |
| "loss": 0.7927, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.504672588581053e-05, | |
| "loss": 0.7967, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.49549985873996e-05, | |
| "loss": 0.7948, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.486327128898869e-05, | |
| "loss": 0.8007, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.477154399057777e-05, | |
| "loss": 0.7963, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.467981669216686e-05, | |
| "loss": 0.7984, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.458808939375594e-05, | |
| "loss": 0.7949, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.449636209534502e-05, | |
| "loss": 0.7931, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.440463479693411e-05, | |
| "loss": 0.7918, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.431290749852319e-05, | |
| "loss": 0.7901, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.422118020011228e-05, | |
| "loss": 0.7867, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.412945290170136e-05, | |
| "loss": 0.795, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.403772560329044e-05, | |
| "loss": 0.7965, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.3945998304879525e-05, | |
| "loss": 0.7882, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.385427100646861e-05, | |
| "loss": 0.7883, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.3762543708057694e-05, | |
| "loss": 0.7849, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.367081640964678e-05, | |
| "loss": 0.7847, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.357908911123586e-05, | |
| "loss": 0.7794, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.3487361812824945e-05, | |
| "loss": 0.7817, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.339563451441403e-05, | |
| "loss": 0.7915, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.3303907216003115e-05, | |
| "loss": 0.7957, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.3212179917592196e-05, | |
| "loss": 0.7862, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.312045261918128e-05, | |
| "loss": 0.7916, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.3028725320770366e-05, | |
| "loss": 0.7802, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.293699802235945e-05, | |
| "loss": 0.7816, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.2845270723948536e-05, | |
| "loss": 0.7707, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.275354342553762e-05, | |
| "loss": 0.7779, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.26618161271267e-05, | |
| "loss": 0.7893, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.257008882871578e-05, | |
| "loss": 0.775, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.247836153030487e-05, | |
| "loss": 0.7816, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.238663423189395e-05, | |
| "loss": 0.7732, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.229490693348304e-05, | |
| "loss": 0.778, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.220317963507211e-05, | |
| "loss": 0.7798, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.21114523366612e-05, | |
| "loss": 0.7789, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.201972503825028e-05, | |
| "loss": 0.7832, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.192799773983937e-05, | |
| "loss": 0.7756, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.183627044142845e-05, | |
| "loss": 0.7782, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.174454314301754e-05, | |
| "loss": 0.7747, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.165281584460662e-05, | |
| "loss": 0.7736, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.15610885461957e-05, | |
| "loss": 0.7728, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.146936124778479e-05, | |
| "loss": 0.7686, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.137763394937387e-05, | |
| "loss": 0.7649, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.128590665096296e-05, | |
| "loss": 0.7685, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.1194179352552035e-05, | |
| "loss": 0.7735, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.110245205414112e-05, | |
| "loss": 0.7581, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.1010724755730205e-05, | |
| "loss": 0.7719, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.091899745731929e-05, | |
| "loss": 0.7685, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.0827270158908374e-05, | |
| "loss": 0.7688, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.0735542860497456e-05, | |
| "loss": 0.7693, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.064381556208654e-05, | |
| "loss": 0.7582, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.0552088263675625e-05, | |
| "loss": 0.7715, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.0460360965264707e-05, | |
| "loss": 0.7747, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.0368633666853795e-05, | |
| "loss": 0.7657, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.0276906368442876e-05, | |
| "loss": 0.7682, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.018517907003196e-05, | |
| "loss": 0.7674, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.0093451771621046e-05, | |
| "loss": 0.7729, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.000172447321013e-05, | |
| "loss": 0.7647, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.9909997174799215e-05, | |
| "loss": 0.7643, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.981826987638829e-05, | |
| "loss": 0.7682, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.972654257797738e-05, | |
| "loss": 0.762, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.963481527956646e-05, | |
| "loss": 0.7552, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.954308798115555e-05, | |
| "loss": 0.7639, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.945136068274463e-05, | |
| "loss": 0.761, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.935963338433371e-05, | |
| "loss": 0.7628, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.926790608592279e-05, | |
| "loss": 0.7565, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.917617878751188e-05, | |
| "loss": 0.7687, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.908445148910096e-05, | |
| "loss": 0.7582, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.899272419069005e-05, | |
| "loss": 0.7586, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.890099689227913e-05, | |
| "loss": 0.7619, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.880926959386821e-05, | |
| "loss": 0.7566, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.87175422954573e-05, | |
| "loss": 0.7594, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.862581499704638e-05, | |
| "loss": 0.7543, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.853408769863547e-05, | |
| "loss": 0.7521, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.844236040022455e-05, | |
| "loss": 0.7588, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.835063310181363e-05, | |
| "loss": 0.7558, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.8258905803402715e-05, | |
| "loss": 0.7595, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.81671785049918e-05, | |
| "loss": 0.7572, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.8075451206580884e-05, | |
| "loss": 0.764, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.798372390816997e-05, | |
| "loss": 0.7643, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.7891996609759054e-05, | |
| "loss": 0.7526, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.7800269311348135e-05, | |
| "loss": 0.7628, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.7708542012937224e-05, | |
| "loss": 0.7435, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.7616814714526305e-05, | |
| "loss": 0.7505, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.752508741611539e-05, | |
| "loss": 0.7569, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.743336011770447e-05, | |
| "loss": 0.7508, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.7341632819293556e-05, | |
| "loss": 0.7572, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.724990552088264e-05, | |
| "loss": 0.7498, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.7158178222471726e-05, | |
| "loss": 0.7475, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.706645092406081e-05, | |
| "loss": 0.7477, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.697472362564989e-05, | |
| "loss": 0.7458, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.688299632723897e-05, | |
| "loss": 0.7458, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.679126902882806e-05, | |
| "loss": 0.7565, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.669954173041714e-05, | |
| "loss": 0.7546, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.660781443200623e-05, | |
| "loss": 0.7454, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.651608713359531e-05, | |
| "loss": 0.757, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.642435983518439e-05, | |
| "loss": 0.7569, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.633263253677348e-05, | |
| "loss": 0.7521, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.624090523836256e-05, | |
| "loss": 0.7539, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.614917793995165e-05, | |
| "loss": 0.7448, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.605745064154072e-05, | |
| "loss": 0.748, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.596572334312981e-05, | |
| "loss": 0.733, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.587399604471889e-05, | |
| "loss": 0.7535, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.578226874630798e-05, | |
| "loss": 0.7423, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.569054144789706e-05, | |
| "loss": 0.7527, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.5598814149486144e-05, | |
| "loss": 0.7463, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.5507086851075225e-05, | |
| "loss": 0.7477, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.541535955266431e-05, | |
| "loss": 0.7439, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.5323632254253395e-05, | |
| "loss": 0.7445, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.523190495584248e-05, | |
| "loss": 0.7402, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.5140177657431564e-05, | |
| "loss": 0.739, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.5048450359020646e-05, | |
| "loss": 0.7412, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.4956723060609734e-05, | |
| "loss": 0.7491, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.4864995762198815e-05, | |
| "loss": 0.7425, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.47732684637879e-05, | |
| "loss": 0.747, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.4681541165376985e-05, | |
| "loss": 0.741, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.4589813866966066e-05, | |
| "loss": 0.7359, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.449808656855515e-05, | |
| "loss": 0.7355, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.4406359270144236e-05, | |
| "loss": 0.7331, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.431463197173332e-05, | |
| "loss": 0.7357, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.4222904673322405e-05, | |
| "loss": 0.7427, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.413117737491148e-05, | |
| "loss": 0.7365, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.403945007650057e-05, | |
| "loss": 0.734, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.394772277808965e-05, | |
| "loss": 0.7342, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.385599547967874e-05, | |
| "loss": 0.7375, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.376426818126782e-05, | |
| "loss": 0.732, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.36725408828569e-05, | |
| "loss": 0.737, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.358081358444599e-05, | |
| "loss": 0.739, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.348908628603507e-05, | |
| "loss": 0.7361, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.339735898762416e-05, | |
| "loss": 0.732, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.330563168921324e-05, | |
| "loss": 0.7338, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.321390439080232e-05, | |
| "loss": 0.7233, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.31221770923914e-05, | |
| "loss": 0.7292, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.303044979398049e-05, | |
| "loss": 0.7194, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.293872249556957e-05, | |
| "loss": 0.7221, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.284699519715866e-05, | |
| "loss": 0.7254, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.2755267898747735e-05, | |
| "loss": 0.7245, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.266354060033682e-05, | |
| "loss": 0.7286, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.2571813301925905e-05, | |
| "loss": 0.723, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.248008600351499e-05, | |
| "loss": 0.7352, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.2388358705104074e-05, | |
| "loss": 0.7269, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.2296631406693156e-05, | |
| "loss": 0.7264, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.2204904108282244e-05, | |
| "loss": 0.724, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.2113176809871325e-05, | |
| "loss": 0.716, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.2021449511460414e-05, | |
| "loss": 0.7297, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.1929722213049495e-05, | |
| "loss": 0.7355, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.183799491463858e-05, | |
| "loss": 0.7294, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.174626761622766e-05, | |
| "loss": 0.7243, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.1654540317816746e-05, | |
| "loss": 0.7208, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.156281301940583e-05, | |
| "loss": 0.7177, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.1471085720994916e-05, | |
| "loss": 0.7175, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.1379358422584e-05, | |
| "loss": 0.7218, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.128763112417308e-05, | |
| "loss": 0.7262, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.119590382576216e-05, | |
| "loss": 0.7219, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.110417652735125e-05, | |
| "loss": 0.7138, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.101244922894033e-05, | |
| "loss": 0.7195, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.092072193052942e-05, | |
| "loss": 0.7221, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.08289946321185e-05, | |
| "loss": 0.7141, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.073726733370758e-05, | |
| "loss": 0.7221, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.064554003529667e-05, | |
| "loss": 0.7255, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.055381273688575e-05, | |
| "loss": 0.7216, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.0462085438474835e-05, | |
| "loss": 0.7221, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0370358140063916e-05, | |
| "loss": 0.7176, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0278630841653e-05, | |
| "loss": 0.7167, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.0186903543242086e-05, | |
| "loss": 0.7201, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.009517624483117e-05, | |
| "loss": 0.7103, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.0003448946420255e-05, | |
| "loss": 0.7222, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.9911721648009333e-05, | |
| "loss": 0.7145, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.9819994349598418e-05, | |
| "loss": 0.7197, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.9728267051187503e-05, | |
| "loss": 0.7178, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.9636539752776588e-05, | |
| "loss": 0.7101, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.9544812454365673e-05, | |
| "loss": 0.719, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.945308515595475e-05, | |
| "loss": 0.7106, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.9361357857543836e-05, | |
| "loss": 0.724, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.926963055913292e-05, | |
| "loss": 0.7236, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.9177903260722005e-05, | |
| "loss": 0.7108, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.908617596231109e-05, | |
| "loss": 0.7153, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.899444866390017e-05, | |
| "loss": 0.7213, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.8902721365489256e-05, | |
| "loss": 0.702, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.881099406707834e-05, | |
| "loss": 0.7245, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.8719266768667426e-05, | |
| "loss": 0.7108, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.862753947025651e-05, | |
| "loss": 0.7073, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.8535812171845595e-05, | |
| "loss": 0.7184, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.8444084873434673e-05, | |
| "loss": 0.7152, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.8352357575023758e-05, | |
| "loss": 0.7089, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.8260630276612843e-05, | |
| "loss": 0.7085, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.8168902978201928e-05, | |
| "loss": 0.7044, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.8077175679791013e-05, | |
| "loss": 0.718, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.798544838138009e-05, | |
| "loss": 0.7101, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.7893721082969175e-05, | |
| "loss": 0.7149, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.780199378455826e-05, | |
| "loss": 0.7078, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7710266486147345e-05, | |
| "loss": 0.7135, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.761853918773643e-05, | |
| "loss": 0.7081, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.752681188932551e-05, | |
| "loss": 0.7055, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.7435084590914596e-05, | |
| "loss": 0.715, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.734335729250368e-05, | |
| "loss": 0.7027, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.7251629994092766e-05, | |
| "loss": 0.7114, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.715990269568185e-05, | |
| "loss": 0.7034, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.706817539727093e-05, | |
| "loss": 0.6932, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.6976448098860013e-05, | |
| "loss": 0.7031, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.6884720800449098e-05, | |
| "loss": 0.7063, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.6792993502038183e-05, | |
| "loss": 0.7069, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6701266203627268e-05, | |
| "loss": 0.7065, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6609538905216346e-05, | |
| "loss": 0.7077, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.651781160680543e-05, | |
| "loss": 0.7033, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.6426084308394515e-05, | |
| "loss": 0.7209, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.63343570099836e-05, | |
| "loss": 0.7046, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.6242629711572685e-05, | |
| "loss": 0.7026, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.6150902413161766e-05, | |
| "loss": 0.7012, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.605917511475085e-05, | |
| "loss": 0.6982, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.5967447816339936e-05, | |
| "loss": 0.705, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.587572051792902e-05, | |
| "loss": 0.7058, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.5783993219518106e-05, | |
| "loss": 0.6957, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.5692265921107184e-05, | |
| "loss": 0.6904, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.560053862269627e-05, | |
| "loss": 0.6932, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.5508811324285353e-05, | |
| "loss": 0.7045, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.5417084025874438e-05, | |
| "loss": 0.6939, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5325356727463523e-05, | |
| "loss": 0.698, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5233629429052608e-05, | |
| "loss": 0.7059, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5141902130641686e-05, | |
| "loss": 0.7057, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.505017483223077e-05, | |
| "loss": 0.7016, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.4958447533819855e-05, | |
| "loss": 0.6934, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.486672023540894e-05, | |
| "loss": 0.709, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.4774992936998025e-05, | |
| "loss": 0.6983, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.468326563858711e-05, | |
| "loss": 0.6995, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.459153834017619e-05, | |
| "loss": 0.7006, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.4499811041765276e-05, | |
| "loss": 0.705, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.440808374335436e-05, | |
| "loss": 0.6982, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.4316356444943442e-05, | |
| "loss": 0.6891, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.4224629146532527e-05, | |
| "loss": 0.7012, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.4132901848121608e-05, | |
| "loss": 0.689, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.4041174549710693e-05, | |
| "loss": 0.702, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.3949447251299778e-05, | |
| "loss": 0.7073, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.385771995288886e-05, | |
| "loss": 0.6913, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.3765992654477944e-05, | |
| "loss": 0.6937, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.3674265356067025e-05, | |
| "loss": 0.691, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.358253805765611e-05, | |
| "loss": 0.6925, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.3490810759245195e-05, | |
| "loss": 0.697, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.339908346083428e-05, | |
| "loss": 0.6873, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.3307356162423365e-05, | |
| "loss": 0.6889, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.3215628864012446e-05, | |
| "loss": 0.6913, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.312390156560153e-05, | |
| "loss": 0.6933, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.3032174267190616e-05, | |
| "loss": 0.6935, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.2940446968779697e-05, | |
| "loss": 0.6865, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2848719670368782e-05, | |
| "loss": 0.6993, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2756992371957867e-05, | |
| "loss": 0.6955, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.2665265073546948e-05, | |
| "loss": 0.6876, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.2573537775136033e-05, | |
| "loss": 0.6938, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.2481810476725114e-05, | |
| "loss": 0.6875, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.23900831783142e-05, | |
| "loss": 0.6953, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.2298355879903284e-05, | |
| "loss": 0.7005, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.2206628581492365e-05, | |
| "loss": 0.6945, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.211490128308145e-05, | |
| "loss": 0.6928, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.2023173984670535e-05, | |
| "loss": 0.6887, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.193144668625962e-05, | |
| "loss": 0.6989, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.1839719387848705e-05, | |
| "loss": 0.6921, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.1747992089437786e-05, | |
| "loss": 0.6926, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.165626479102687e-05, | |
| "loss": 0.6882, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.1564537492615952e-05, | |
| "loss": 0.6821, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.1472810194205037e-05, | |
| "loss": 0.6891, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1381082895794122e-05, | |
| "loss": 0.6875, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1289355597383203e-05, | |
| "loss": 0.6896, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1197628298972288e-05, | |
| "loss": 0.6876, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1105901000561373e-05, | |
| "loss": 0.6799, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.1014173702150454e-05, | |
| "loss": 0.6788, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.092244640373954e-05, | |
| "loss": 0.6863, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.083071910532862e-05, | |
| "loss": 0.6873, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0738991806917705e-05, | |
| "loss": 0.6778, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.064726450850679e-05, | |
| "loss": 0.68, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.0555537210095875e-05, | |
| "loss": 0.6827, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.046380991168496e-05, | |
| "loss": 0.6856, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.037208261327404e-05, | |
| "loss": 0.6871, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.0280355314863126e-05, | |
| "loss": 0.676, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.018862801645221e-05, | |
| "loss": 0.673, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.0096900718041292e-05, | |
| "loss": 0.6801, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0005173419630377e-05, | |
| "loss": 0.6829, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9913446121219462e-05, | |
| "loss": 0.6884, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9821718822808543e-05, | |
| "loss": 0.6751, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.9729991524397628e-05, | |
| "loss": 0.674, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.963826422598671e-05, | |
| "loss": 0.6833, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.9546536927575794e-05, | |
| "loss": 0.6918, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.945480962916488e-05, | |
| "loss": 0.6889, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9363082330753964e-05, | |
| "loss": 0.6737, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.927135503234305e-05, | |
| "loss": 0.6735, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.917962773393213e-05, | |
| "loss": 0.685, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.9087900435521215e-05, | |
| "loss": 0.6725, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.89961731371103e-05, | |
| "loss": 0.6745, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.890444583869938e-05, | |
| "loss": 0.6721, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8812718540288466e-05, | |
| "loss": 0.6808, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8720991241877547e-05, | |
| "loss": 0.6848, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8629263943466632e-05, | |
| "loss": 0.6736, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8537536645055717e-05, | |
| "loss": 0.6768, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8445809346644798e-05, | |
| "loss": 0.6848, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.8354082048233883e-05, | |
| "loss": 0.6751, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.8262354749822968e-05, | |
| "loss": 0.6884, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.817062745141205e-05, | |
| "loss": 0.677, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8078900153001134e-05, | |
| "loss": 0.6789, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.798717285459022e-05, | |
| "loss": 0.6623, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.7895445556179304e-05, | |
| "loss": 0.6765, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.780371825776839e-05, | |
| "loss": 0.6769, | |
| "step": 175500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.771199095935747e-05, | |
| "loss": 0.6798, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.7620263660946555e-05, | |
| "loss": 0.6725, | |
| "step": 176500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7528536362535636e-05, | |
| "loss": 0.6689, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.743680906412472e-05, | |
| "loss": 0.6718, | |
| "step": 177500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.7345081765713806e-05, | |
| "loss": 0.6697, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.7253354467302887e-05, | |
| "loss": 0.6663, | |
| "step": 178500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7161627168891972e-05, | |
| "loss": 0.665, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.7069899870481053e-05, | |
| "loss": 0.6698, | |
| "step": 179500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.6978172572070138e-05, | |
| "loss": 0.6769, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.6886445273659223e-05, | |
| "loss": 0.6712, | |
| "step": 180500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.6794717975248304e-05, | |
| "loss": 0.6708, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.670299067683739e-05, | |
| "loss": 0.6812, | |
| "step": 181500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6611263378426474e-05, | |
| "loss": 0.6648, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.651953608001556e-05, | |
| "loss": 0.6659, | |
| "step": 182500 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.6427808781604644e-05, | |
| "loss": 0.6612, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6336081483193725e-05, | |
| "loss": 0.6629, | |
| "step": 183500 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.624435418478281e-05, | |
| "loss": 0.6657, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.6152626886371895e-05, | |
| "loss": 0.6702, | |
| "step": 184500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.6060899587960976e-05, | |
| "loss": 0.6682, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.596917228955006e-05, | |
| "loss": 0.6636, | |
| "step": 185500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.5877444991139142e-05, | |
| "loss": 0.6704, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.5785717692728227e-05, | |
| "loss": 0.6649, | |
| "step": 186500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5693990394317312e-05, | |
| "loss": 0.6637, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5602263095906393e-05, | |
| "loss": 0.6612, | |
| "step": 187500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.5510535797495478e-05, | |
| "loss": 0.6621, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.541880849908456e-05, | |
| "loss": 0.6624, | |
| "step": 188500 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5327081200673644e-05, | |
| "loss": 0.66, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.523535390226273e-05, | |
| "loss": 0.6648, | |
| "step": 189500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5143626603851812e-05, | |
| "loss": 0.6661, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.5051899305440897e-05, | |
| "loss": 0.6652, | |
| "step": 190500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.4960172007029982e-05, | |
| "loss": 0.662, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4868444708619065e-05, | |
| "loss": 0.6507, | |
| "step": 191500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.477671741020815e-05, | |
| "loss": 0.6572, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4684990111797231e-05, | |
| "loss": 0.6598, | |
| "step": 192500 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4593262813386316e-05, | |
| "loss": 0.6579, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.45015355149754e-05, | |
| "loss": 0.6592, | |
| "step": 193500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4409808216564482e-05, | |
| "loss": 0.66, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4318080918153567e-05, | |
| "loss": 0.6636, | |
| "step": 194500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.422635361974265e-05, | |
| "loss": 0.6648, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.4134626321331735e-05, | |
| "loss": 0.6613, | |
| "step": 195500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.404289902292082e-05, | |
| "loss": 0.6648, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.3951171724509901e-05, | |
| "loss": 0.6556, | |
| "step": 196500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3859444426098986e-05, | |
| "loss": 0.6487, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3767717127688067e-05, | |
| "loss": 0.6587, | |
| "step": 197500 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.3675989829277152e-05, | |
| "loss": 0.6548, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.3584262530866237e-05, | |
| "loss": 0.6635, | |
| "step": 198500 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.349253523245532e-05, | |
| "loss": 0.6649, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3400807934044405e-05, | |
| "loss": 0.6565, | |
| "step": 199500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.330908063563349e-05, | |
| "loss": 0.6528, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3217353337222571e-05, | |
| "loss": 0.6572, | |
| "step": 200500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3125626038811656e-05, | |
| "loss": 0.6548, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.3033898740400737e-05, | |
| "loss": 0.6617, | |
| "step": 201500 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.2942171441989822e-05, | |
| "loss": 0.661, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2850444143578907e-05, | |
| "loss": 0.6536, | |
| "step": 202500 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.275871684516799e-05, | |
| "loss": 0.6608, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2666989546757075e-05, | |
| "loss": 0.6541, | |
| "step": 203500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2575262248346156e-05, | |
| "loss": 0.6597, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2483534949935241e-05, | |
| "loss": 0.6497, | |
| "step": 204500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2391807651524324e-05, | |
| "loss": 0.6577, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2300080353113407e-05, | |
| "loss": 0.6687, | |
| "step": 205500 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2208353054702492e-05, | |
| "loss": 0.6542, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2116625756291577e-05, | |
| "loss": 0.65, | |
| "step": 206500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.202489845788066e-05, | |
| "loss": 0.6519, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.1933171159469743e-05, | |
| "loss": 0.6564, | |
| "step": 207500 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1841443861058828e-05, | |
| "loss": 0.6525, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.174971656264791e-05, | |
| "loss": 0.6505, | |
| "step": 208500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1657989264236994e-05, | |
| "loss": 0.6514, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1566261965826079e-05, | |
| "loss": 0.6419, | |
| "step": 209500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1474534667415162e-05, | |
| "loss": 0.6497, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1382807369004247e-05, | |
| "loss": 0.6519, | |
| "step": 210500 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.129108007059333e-05, | |
| "loss": 0.65, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1199352772182413e-05, | |
| "loss": 0.6516, | |
| "step": 211500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1107625473771496e-05, | |
| "loss": 0.653, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.101589817536058e-05, | |
| "loss": 0.6574, | |
| "step": 212500 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0924170876949664e-05, | |
| "loss": 0.6539, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0832443578538749e-05, | |
| "loss": 0.6503, | |
| "step": 213500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0740716280127832e-05, | |
| "loss": 0.6543, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0648988981716915e-05, | |
| "loss": 0.6574, | |
| "step": 214500 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.0557261683306e-05, | |
| "loss": 0.6465, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.0465534384895083e-05, | |
| "loss": 0.6464, | |
| "step": 215500 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0373807086484166e-05, | |
| "loss": 0.6462, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0282079788073249e-05, | |
| "loss": 0.6544, | |
| "step": 216500 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0190352489662334e-05, | |
| "loss": 0.6498, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0098625191251419e-05, | |
| "loss": 0.6504, | |
| "step": 217500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.0006897892840502e-05, | |
| "loss": 0.6267, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.915170594429585e-06, | |
| "loss": 0.6397, | |
| "step": 218500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.823443296018668e-06, | |
| "loss": 0.6553, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.731715997607753e-06, | |
| "loss": 0.6429, | |
| "step": 219500 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.639988699196836e-06, | |
| "loss": 0.6426, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.548261400785919e-06, | |
| "loss": 0.6417, | |
| "step": 220500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.456534102375004e-06, | |
| "loss": 0.6481, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.364806803964089e-06, | |
| "loss": 0.6468, | |
| "step": 221500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.273079505553172e-06, | |
| "loss": 0.6466, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.181352207142255e-06, | |
| "loss": 0.6532, | |
| "step": 222500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.089624908731338e-06, | |
| "loss": 0.6487, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 8.997897610320421e-06, | |
| "loss": 0.643, | |
| "step": 223500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.906170311909506e-06, | |
| "loss": 0.6409, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.814443013498589e-06, | |
| "loss": 0.6442, | |
| "step": 224500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.722715715087674e-06, | |
| "loss": 0.6381, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.630988416676757e-06, | |
| "loss": 0.6469, | |
| "step": 225500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.539261118265842e-06, | |
| "loss": 0.6436, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.447533819854925e-06, | |
| "loss": 0.6418, | |
| "step": 226500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.355806521444008e-06, | |
| "loss": 0.6362, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.264079223033091e-06, | |
| "loss": 0.6505, | |
| "step": 227500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.172351924622176e-06, | |
| "loss": 0.6437, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.08062462621126e-06, | |
| "loss": 0.6448, | |
| "step": 228500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 7.988897327800344e-06, | |
| "loss": 0.646, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.897170029389427e-06, | |
| "loss": 0.6436, | |
| "step": 229500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.80544273097851e-06, | |
| "loss": 0.6398, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.713715432567595e-06, | |
| "loss": 0.6404, | |
| "step": 230500 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.621988134156678e-06, | |
| "loss": 0.6461, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.530260835745762e-06, | |
| "loss": 0.6497, | |
| "step": 231500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.438533537334845e-06, | |
| "loss": 0.6477, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.346806238923929e-06, | |
| "loss": 0.6452, | |
| "step": 232500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.255078940513013e-06, | |
| "loss": 0.6361, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.163351642102097e-06, | |
| "loss": 0.6375, | |
| "step": 233500 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.07162434369118e-06, | |
| "loss": 0.6436, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 6.979897045280264e-06, | |
| "loss": 0.646, | |
| "step": 234500 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.8881697468693486e-06, | |
| "loss": 0.6362, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.796442448458432e-06, | |
| "loss": 0.6411, | |
| "step": 235500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.704715150047515e-06, | |
| "loss": 0.6458, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.612987851636599e-06, | |
| "loss": 0.637, | |
| "step": 236500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.521260553225682e-06, | |
| "loss": 0.6307, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.429533254814767e-06, | |
| "loss": 0.6413, | |
| "step": 237500 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.33780595640385e-06, | |
| "loss": 0.6428, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.246078657992934e-06, | |
| "loss": 0.6348, | |
| "step": 238500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.154351359582018e-06, | |
| "loss": 0.6405, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.062624061171101e-06, | |
| "loss": 0.6361, | |
| "step": 239500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 5.970896762760185e-06, | |
| "loss": 0.6309, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.879169464349269e-06, | |
| "loss": 0.63, | |
| "step": 240500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.787442165938353e-06, | |
| "loss": 0.6323, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.695714867527436e-06, | |
| "loss": 0.6344, | |
| "step": 241500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.60398756911652e-06, | |
| "loss": 0.6435, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.512260270705604e-06, | |
| "loss": 0.6328, | |
| "step": 242500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.420532972294687e-06, | |
| "loss": 0.6425, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.328805673883771e-06, | |
| "loss": 0.6382, | |
| "step": 243500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.237078375472855e-06, | |
| "loss": 0.6409, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.145351077061939e-06, | |
| "loss": 0.6347, | |
| "step": 244500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.053623778651022e-06, | |
| "loss": 0.6478, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.961896480240106e-06, | |
| "loss": 0.6287, | |
| "step": 245500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.87016918182919e-06, | |
| "loss": 0.6268, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.778441883418273e-06, | |
| "loss": 0.6386, | |
| "step": 246500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.686714585007357e-06, | |
| "loss": 0.6385, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.59498728659644e-06, | |
| "loss": 0.6361, | |
| "step": 247500 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.503259988185525e-06, | |
| "loss": 0.6337, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.411532689774608e-06, | |
| "loss": 0.6379, | |
| "step": 248500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.319805391363692e-06, | |
| "loss": 0.644, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.228078092952776e-06, | |
| "loss": 0.6366, | |
| "step": 249500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.13635079454186e-06, | |
| "loss": 0.6393, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.044623496130943e-06, | |
| "loss": 0.6332, | |
| "step": 250500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.952896197720026e-06, | |
| "loss": 0.6377, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.861168899309111e-06, | |
| "loss": 0.6277, | |
| "step": 251500 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.7694416008981937e-06, | |
| "loss": 0.639, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.6777143024872777e-06, | |
| "loss": 0.636, | |
| "step": 252500 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.5859870040763612e-06, | |
| "loss": 0.6293, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.494259705665445e-06, | |
| "loss": 0.6384, | |
| "step": 253500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.4025324072545287e-06, | |
| "loss": 0.6307, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.3108051088436127e-06, | |
| "loss": 0.6273, | |
| "step": 254500 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.219077810432696e-06, | |
| "loss": 0.6318, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.1273505120217797e-06, | |
| "loss": 0.632, | |
| "step": 255500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.0356232136108637e-06, | |
| "loss": 0.6353, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.943895915199947e-06, | |
| "loss": 0.6327, | |
| "step": 256500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.852168616789031e-06, | |
| "loss": 0.6336, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.7604413183781147e-06, | |
| "loss": 0.6397, | |
| "step": 257500 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.6687140199671982e-06, | |
| "loss": 0.6336, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.576986721556282e-06, | |
| "loss": 0.6271, | |
| "step": 258500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.4852594231453657e-06, | |
| "loss": 0.6288, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.3935321247344497e-06, | |
| "loss": 0.6263, | |
| "step": 259500 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.301804826323533e-06, | |
| "loss": 0.6297, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.210077527912617e-06, | |
| "loss": 0.6344, | |
| "step": 260500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.1183502295017007e-06, | |
| "loss": 0.6322, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.0266229310907846e-06, | |
| "loss": 0.6302, | |
| "step": 261500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.934895632679868e-06, | |
| "loss": 0.6196, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.8431683342689517e-06, | |
| "loss": 0.6275, | |
| "step": 262500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.7514410358580354e-06, | |
| "loss": 0.6376, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.6597137374471192e-06, | |
| "loss": 0.6267, | |
| "step": 263500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.567986439036203e-06, | |
| "loss": 0.6303, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.4762591406252867e-06, | |
| "loss": 0.6322, | |
| "step": 264500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.3845318422143704e-06, | |
| "loss": 0.6256, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.2928045438034542e-06, | |
| "loss": 0.626, | |
| "step": 265500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.201077245392538e-06, | |
| "loss": 0.6283, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.1093499469816216e-06, | |
| "loss": 0.6314, | |
| "step": 266500 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.0176226485707054e-06, | |
| "loss": 0.638, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 9.25895350159789e-07, | |
| "loss": 0.625, | |
| "step": 267500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.341680517488727e-07, | |
| "loss": 0.6378, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.424407533379564e-07, | |
| "loss": 0.6262, | |
| "step": 268500 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.507134549270401e-07, | |
| "loss": 0.6305, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.589861565161238e-07, | |
| "loss": 0.6291, | |
| "step": 269500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.672588581052076e-07, | |
| "loss": 0.6203, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.7553155969429127e-07, | |
| "loss": 0.6245, | |
| "step": 270500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.8380426128337496e-07, | |
| "loss": 0.6245, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.920769628724587e-07, | |
| "loss": 0.6292, | |
| "step": 271500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.0034966446154242e-07, | |
| "loss": 0.6343, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 8.62236605062613e-09, | |
| "loss": 0.6237, | |
| "step": 272500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 272547, | |
| "total_flos": 1.1358886111610301e+18, | |
| "train_loss": 0.7085280003033105, | |
| "train_runtime": 180307.3408, | |
| "train_samples_per_second": 24.185, | |
| "train_steps_per_second": 1.512 | |
| } | |
| ], | |
| "max_steps": 272547, | |
| "num_train_epochs": 3, | |
| "total_flos": 1.1358886111610301e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |