| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "global_step": 272547, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9908272701589084e-05, |
| "loss": 0.8705, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.981654540317817e-05, |
| "loss": 0.877, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9724818104767253e-05, |
| "loss": 0.8699, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9633090806356335e-05, |
| "loss": 0.8724, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.954136350794542e-05, |
| "loss": 0.875, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9449636209534504e-05, |
| "loss": 0.8736, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.935790891112359e-05, |
| "loss": 0.8723, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.926618161271267e-05, |
| "loss": 0.8776, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9174454314301755e-05, |
| "loss": 0.8679, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.908272701589084e-05, |
| "loss": 0.8635, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.8990999717479925e-05, |
| "loss": 0.863, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.8899272419069006e-05, |
| "loss": 0.8591, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.880754512065809e-05, |
| "loss": 0.8687, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.8715817822247176e-05, |
| "loss": 0.8716, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.862409052383626e-05, |
| "loss": 0.8662, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.8532363225425346e-05, |
| "loss": 0.8644, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.844063592701443e-05, |
| "loss": 0.8674, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.8348908628603515e-05, |
| "loss": 0.8583, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.825718133019259e-05, |
| "loss": 0.8552, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.816545403178168e-05, |
| "loss": 0.8537, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.807372673337076e-05, |
| "loss": 0.8591, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.798199943495985e-05, |
| "loss": 0.8513, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.789027213654893e-05, |
| "loss": 0.8655, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.779854483813801e-05, |
| "loss": 0.8571, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.770681753972709e-05, |
| "loss": 0.8558, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.761509024131618e-05, |
| "loss": 0.8371, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.752336294290526e-05, |
| "loss": 0.8426, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.743163564449435e-05, |
| "loss": 0.8498, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.733990834608343e-05, |
| "loss": 0.8612, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.724818104767251e-05, |
| "loss": 0.8525, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.71564537492616e-05, |
| "loss": 0.844, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.706472645085068e-05, |
| "loss": 0.8655, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.697299915243977e-05, |
| "loss": 0.8448, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.6881271854028845e-05, |
| "loss": 0.8461, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.678954455561793e-05, |
| "loss": 0.8478, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.6697817257207015e-05, |
| "loss": 0.8457, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.66060899587961e-05, |
| "loss": 0.8446, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.6514362660385184e-05, |
| "loss": 0.8412, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.6422635361974266e-05, |
| "loss": 0.8327, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.633090806356335e-05, |
| "loss": 0.8374, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.6239180765152435e-05, |
| "loss": 0.8418, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.614745346674152e-05, |
| "loss": 0.8402, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.6055726168330605e-05, |
| "loss": 0.8362, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.5963998869919686e-05, |
| "loss": 0.8493, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.587227157150877e-05, |
| "loss": 0.8413, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.5780544273097856e-05, |
| "loss": 0.842, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.568881697468694e-05, |
| "loss": 0.8386, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.5597089676276025e-05, |
| "loss": 0.8402, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.55053623778651e-05, |
| "loss": 0.8378, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.541363507945419e-05, |
| "loss": 0.8254, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.532190778104327e-05, |
| "loss": 0.8344, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.523018048263236e-05, |
| "loss": 0.8399, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.513845318422144e-05, |
| "loss": 0.8303, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.504672588581053e-05, |
| "loss": 0.8334, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.49549985873996e-05, |
| "loss": 0.8334, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.486327128898869e-05, |
| "loss": 0.8407, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.477154399057777e-05, |
| "loss": 0.8347, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.467981669216686e-05, |
| "loss": 0.8354, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.458808939375594e-05, |
| "loss": 0.8304, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.449636209534502e-05, |
| "loss": 0.8282, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.440463479693411e-05, |
| "loss": 0.8293, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.431290749852319e-05, |
| "loss": 0.8267, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.422118020011228e-05, |
| "loss": 0.8226, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.412945290170136e-05, |
| "loss": 0.834, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.403772560329044e-05, |
| "loss": 0.8332, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.3945998304879525e-05, |
| "loss": 0.8248, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.385427100646861e-05, |
| "loss": 0.8275, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.3762543708057694e-05, |
| "loss": 0.822, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.367081640964678e-05, |
| "loss": 0.8222, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.357908911123586e-05, |
| "loss": 0.8194, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.3487361812824945e-05, |
| "loss": 0.8213, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.339563451441403e-05, |
| "loss": 0.8315, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.3303907216003115e-05, |
| "loss": 0.8329, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.3212179917592196e-05, |
| "loss": 0.8219, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.312045261918128e-05, |
| "loss": 0.8298, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.3028725320770366e-05, |
| "loss": 0.8178, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.293699802235945e-05, |
| "loss": 0.8185, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.2845270723948536e-05, |
| "loss": 0.8076, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.275354342553762e-05, |
| "loss": 0.8174, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.26618161271267e-05, |
| "loss": 0.8256, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.257008882871578e-05, |
| "loss": 0.8134, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.247836153030487e-05, |
| "loss": 0.8197, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.238663423189395e-05, |
| "loss": 0.8144, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.229490693348304e-05, |
| "loss": 0.8135, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.220317963507211e-05, |
| "loss": 0.8183, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.21114523366612e-05, |
| "loss": 0.8145, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.201972503825028e-05, |
| "loss": 0.8237, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.192799773983937e-05, |
| "loss": 0.8147, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.183627044142845e-05, |
| "loss": 0.815, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.174454314301754e-05, |
| "loss": 0.8131, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.165281584460662e-05, |
| "loss": 0.8129, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.15610885461957e-05, |
| "loss": 0.814, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.146936124778479e-05, |
| "loss": 0.8066, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.137763394937387e-05, |
| "loss": 0.8027, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.128590665096296e-05, |
| "loss": 0.8055, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.1194179352552035e-05, |
| "loss": 0.8104, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.110245205414112e-05, |
| "loss": 0.798, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.1010724755730205e-05, |
| "loss": 0.8099, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.091899745731929e-05, |
| "loss": 0.8088, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.0827270158908374e-05, |
| "loss": 0.8061, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.0735542860497456e-05, |
| "loss": 0.8075, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.064381556208654e-05, |
| "loss": 0.7942, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.0552088263675625e-05, |
| "loss": 0.8103, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.0460360965264707e-05, |
| "loss": 0.8129, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.0368633666853795e-05, |
| "loss": 0.8072, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.0276906368442876e-05, |
| "loss": 0.8036, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.018517907003196e-05, |
| "loss": 0.8049, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.0093451771621046e-05, |
| "loss": 0.8079, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.000172447321013e-05, |
| "loss": 0.8005, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.9909997174799215e-05, |
| "loss": 0.8023, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.981826987638829e-05, |
| "loss": 0.8062, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.972654257797738e-05, |
| "loss": 0.8024, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.963481527956646e-05, |
| "loss": 0.7952, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.954308798115555e-05, |
| "loss": 0.8003, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.945136068274463e-05, |
| "loss": 0.7995, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.935963338433371e-05, |
| "loss": 0.8014, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.926790608592279e-05, |
| "loss": 0.795, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.917617878751188e-05, |
| "loss": 0.8074, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.908445148910096e-05, |
| "loss": 0.7955, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.899272419069005e-05, |
| "loss": 0.7974, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 3.890099689227913e-05, |
| "loss": 0.8003, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 3.880926959386821e-05, |
| "loss": 0.7929, |
| "step": 61000 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 3.87175422954573e-05, |
| "loss": 0.7987, |
| "step": 61500 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 3.862581499704638e-05, |
| "loss": 0.7923, |
| "step": 62000 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 3.853408769863547e-05, |
| "loss": 0.7888, |
| "step": 62500 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 3.844236040022455e-05, |
| "loss": 0.7967, |
| "step": 63000 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 3.835063310181363e-05, |
| "loss": 0.7941, |
| "step": 63500 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 3.8258905803402715e-05, |
| "loss": 0.7964, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 3.81671785049918e-05, |
| "loss": 0.7906, |
| "step": 64500 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.8075451206580884e-05, |
| "loss": 0.8005, |
| "step": 65000 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.798372390816997e-05, |
| "loss": 0.8007, |
| "step": 65500 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.7891996609759054e-05, |
| "loss": 0.7897, |
| "step": 66000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.7800269311348135e-05, |
| "loss": 0.797, |
| "step": 66500 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.7708542012937224e-05, |
| "loss": 0.7811, |
| "step": 67000 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.7616814714526305e-05, |
| "loss": 0.7853, |
| "step": 67500 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.752508741611539e-05, |
| "loss": 0.7939, |
| "step": 68000 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.743336011770447e-05, |
| "loss": 0.7885, |
| "step": 68500 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 3.7341632819293556e-05, |
| "loss": 0.7942, |
| "step": 69000 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.724990552088264e-05, |
| "loss": 0.7861, |
| "step": 69500 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.7158178222471726e-05, |
| "loss": 0.7837, |
| "step": 70000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.706645092406081e-05, |
| "loss": 0.7832, |
| "step": 70500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.697472362564989e-05, |
| "loss": 0.7834, |
| "step": 71000 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.688299632723897e-05, |
| "loss": 0.7818, |
| "step": 71500 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.679126902882806e-05, |
| "loss": 0.795, |
| "step": 72000 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.669954173041714e-05, |
| "loss": 0.7933, |
| "step": 72500 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.660781443200623e-05, |
| "loss": 0.7812, |
| "step": 73000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 3.651608713359531e-05, |
| "loss": 0.7944, |
| "step": 73500 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 3.642435983518439e-05, |
| "loss": 0.7979, |
| "step": 74000 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 3.633263253677348e-05, |
| "loss": 0.7912, |
| "step": 74500 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 3.624090523836256e-05, |
| "loss": 0.791, |
| "step": 75000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 3.614917793995165e-05, |
| "loss": 0.7825, |
| "step": 75500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.605745064154072e-05, |
| "loss": 0.7833, |
| "step": 76000 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.596572334312981e-05, |
| "loss": 0.7707, |
| "step": 76500 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.587399604471889e-05, |
| "loss": 0.7888, |
| "step": 77000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.578226874630798e-05, |
| "loss": 0.7792, |
| "step": 77500 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.569054144789706e-05, |
| "loss": 0.7874, |
| "step": 78000 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.5598814149486144e-05, |
| "loss": 0.7805, |
| "step": 78500 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 3.5507086851075225e-05, |
| "loss": 0.782, |
| "step": 79000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.541535955266431e-05, |
| "loss": 0.7809, |
| "step": 79500 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.5323632254253395e-05, |
| "loss": 0.7795, |
| "step": 80000 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.523190495584248e-05, |
| "loss": 0.7777, |
| "step": 80500 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.5140177657431564e-05, |
| "loss": 0.7728, |
| "step": 81000 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 3.5048450359020646e-05, |
| "loss": 0.7776, |
| "step": 81500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 3.4956723060609734e-05, |
| "loss": 0.7854, |
| "step": 82000 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.4864995762198815e-05, |
| "loss": 0.7795, |
| "step": 82500 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.47732684637879e-05, |
| "loss": 0.7808, |
| "step": 83000 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.4681541165376985e-05, |
| "loss": 0.7779, |
| "step": 83500 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.4589813866966066e-05, |
| "loss": 0.7738, |
| "step": 84000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 3.449808656855515e-05, |
| "loss": 0.7725, |
| "step": 84500 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.4406359270144236e-05, |
| "loss": 0.7697, |
| "step": 85000 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.431463197173332e-05, |
| "loss": 0.7743, |
| "step": 85500 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 3.4222904673322405e-05, |
| "loss": 0.7807, |
| "step": 86000 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 3.413117737491148e-05, |
| "loss": 0.7714, |
| "step": 86500 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.403945007650057e-05, |
| "loss": 0.7727, |
| "step": 87000 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.394772277808965e-05, |
| "loss": 0.772, |
| "step": 87500 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.385599547967874e-05, |
| "loss": 0.7763, |
| "step": 88000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.376426818126782e-05, |
| "loss": 0.7669, |
| "step": 88500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.36725408828569e-05, |
| "loss": 0.7712, |
| "step": 89000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.358081358444599e-05, |
| "loss": 0.7717, |
| "step": 89500 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.348908628603507e-05, |
| "loss": 0.7715, |
| "step": 90000 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.339735898762416e-05, |
| "loss": 0.765, |
| "step": 90500 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.330563168921324e-05, |
| "loss": 0.7689, |
| "step": 91000 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 3.321390439080232e-05, |
| "loss": 0.7605, |
| "step": 91500 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 3.31221770923914e-05, |
| "loss": 0.7668, |
| "step": 92000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 3.303044979398049e-05, |
| "loss": 0.7567, |
| "step": 92500 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 3.293872249556957e-05, |
| "loss": 0.7614, |
| "step": 93000 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 3.284699519715866e-05, |
| "loss": 0.7624, |
| "step": 93500 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 3.2755267898747735e-05, |
| "loss": 0.7622, |
| "step": 94000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.266354060033682e-05, |
| "loss": 0.7649, |
| "step": 94500 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 3.2571813301925905e-05, |
| "loss": 0.7574, |
| "step": 95000 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 3.248008600351499e-05, |
| "loss": 0.7709, |
| "step": 95500 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 3.2388358705104074e-05, |
| "loss": 0.7627, |
| "step": 96000 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 3.2296631406693156e-05, |
| "loss": 0.7617, |
| "step": 96500 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 3.2204904108282244e-05, |
| "loss": 0.7571, |
| "step": 97000 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 3.2113176809871325e-05, |
| "loss": 0.7492, |
| "step": 97500 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.2021449511460414e-05, |
| "loss": 0.764, |
| "step": 98000 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.1929722213049495e-05, |
| "loss": 0.772, |
| "step": 98500 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 3.183799491463858e-05, |
| "loss": 0.7645, |
| "step": 99000 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 3.174626761622766e-05, |
| "loss": 0.7609, |
| "step": 99500 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 3.1654540317816746e-05, |
| "loss": 0.7576, |
| "step": 100000 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 3.156281301940583e-05, |
| "loss": 0.7515, |
| "step": 100500 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 3.1471085720994916e-05, |
| "loss": 0.7522, |
| "step": 101000 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 3.1379358422584e-05, |
| "loss": 0.7556, |
| "step": 101500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 3.128763112417308e-05, |
| "loss": 0.7622, |
| "step": 102000 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.119590382576216e-05, |
| "loss": 0.7582, |
| "step": 102500 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.110417652735125e-05, |
| "loss": 0.7507, |
| "step": 103000 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 3.101244922894033e-05, |
| "loss": 0.7548, |
| "step": 103500 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 3.092072193052942e-05, |
| "loss": 0.7588, |
| "step": 104000 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 3.08289946321185e-05, |
| "loss": 0.7466, |
| "step": 104500 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.073726733370758e-05, |
| "loss": 0.7546, |
| "step": 105000 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.064554003529667e-05, |
| "loss": 0.7601, |
| "step": 105500 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.055381273688575e-05, |
| "loss": 0.7544, |
| "step": 106000 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.0462085438474835e-05, |
| "loss": 0.7531, |
| "step": 106500 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 3.0370358140063916e-05, |
| "loss": 0.7519, |
| "step": 107000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 3.0278630841653e-05, |
| "loss": 0.7508, |
| "step": 107500 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.0186903543242086e-05, |
| "loss": 0.7528, |
| "step": 108000 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.009517624483117e-05, |
| "loss": 0.746, |
| "step": 108500 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 3.0003448946420255e-05, |
| "loss": 0.7567, |
| "step": 109000 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 2.9911721648009333e-05, |
| "loss": 0.7482, |
| "step": 109500 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 2.9819994349598418e-05, |
| "loss": 0.7518, |
| "step": 110000 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 2.9728267051187503e-05, |
| "loss": 0.7525, |
| "step": 110500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 2.9636539752776588e-05, |
| "loss": 0.7455, |
| "step": 111000 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 2.9544812454365673e-05, |
| "loss": 0.7542, |
| "step": 111500 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 2.945308515595475e-05, |
| "loss": 0.7445, |
| "step": 112000 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 2.9361357857543836e-05, |
| "loss": 0.7586, |
| "step": 112500 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 2.926963055913292e-05, |
| "loss": 0.7586, |
| "step": 113000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.9177903260722005e-05, |
| "loss": 0.7467, |
| "step": 113500 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.908617596231109e-05, |
| "loss": 0.7477, |
| "step": 114000 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 2.899444866390017e-05, |
| "loss": 0.7552, |
| "step": 114500 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 2.8902721365489256e-05, |
| "loss": 0.7335, |
| "step": 115000 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 2.881099406707834e-05, |
| "loss": 0.76, |
| "step": 115500 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2.8719266768667426e-05, |
| "loss": 0.7453, |
| "step": 116000 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2.862753947025651e-05, |
| "loss": 0.7414, |
| "step": 116500 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 2.8535812171845595e-05, |
| "loss": 0.7515, |
| "step": 117000 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 2.8444084873434673e-05, |
| "loss": 0.7482, |
| "step": 117500 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 2.8352357575023758e-05, |
| "loss": 0.7425, |
| "step": 118000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 2.8260630276612843e-05, |
| "loss": 0.7414, |
| "step": 118500 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 2.8168902978201928e-05, |
| "loss": 0.7389, |
| "step": 119000 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 2.8077175679791013e-05, |
| "loss": 0.7531, |
| "step": 119500 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 2.798544838138009e-05, |
| "loss": 0.7423, |
| "step": 120000 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 2.7893721082969175e-05, |
| "loss": 0.7497, |
| "step": 120500 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 2.780199378455826e-05, |
| "loss": 0.7427, |
| "step": 121000 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 2.7710266486147345e-05, |
| "loss": 0.7469, |
| "step": 121500 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 2.761853918773643e-05, |
| "loss": 0.7423, |
| "step": 122000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 2.752681188932551e-05, |
| "loss": 0.7419, |
| "step": 122500 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 2.7435084590914596e-05, |
| "loss": 0.7465, |
| "step": 123000 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 2.734335729250368e-05, |
| "loss": 0.7327, |
| "step": 123500 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 2.7251629994092766e-05, |
| "loss": 0.745, |
| "step": 124000 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 2.715990269568185e-05, |
| "loss": 0.7372, |
| "step": 124500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 2.706817539727093e-05, |
| "loss": 0.7294, |
| "step": 125000 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 2.6976448098860013e-05, |
| "loss": 0.7336, |
| "step": 125500 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 2.6884720800449098e-05, |
| "loss": 0.7389, |
| "step": 126000 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 2.6792993502038183e-05, |
| "loss": 0.7399, |
| "step": 126500 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.6701266203627268e-05, |
| "loss": 0.7394, |
| "step": 127000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.6609538905216346e-05, |
| "loss": 0.7394, |
| "step": 127500 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2.651781160680543e-05, |
| "loss": 0.7357, |
| "step": 128000 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2.6426084308394515e-05, |
| "loss": 0.7528, |
| "step": 128500 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 2.63343570099836e-05, |
| "loss": 0.7406, |
| "step": 129000 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 2.6242629711572685e-05, |
| "loss": 0.7347, |
| "step": 129500 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 2.6150902413161766e-05, |
| "loss": 0.7312, |
| "step": 130000 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2.605917511475085e-05, |
| "loss": 0.7334, |
| "step": 130500 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2.5967447816339936e-05, |
| "loss": 0.7403, |
| "step": 131000 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 2.587572051792902e-05, |
| "loss": 0.7366, |
| "step": 131500 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 2.5783993219518106e-05, |
| "loss": 0.7271, |
| "step": 132000 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.5692265921107184e-05, |
| "loss": 0.721, |
| "step": 132500 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.560053862269627e-05, |
| "loss": 0.7259, |
| "step": 133000 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.5508811324285353e-05, |
| "loss": 0.7371, |
| "step": 133500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.5417084025874438e-05, |
| "loss": 0.7276, |
| "step": 134000 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 2.5325356727463523e-05, |
| "loss": 0.732, |
| "step": 134500 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2.5233629429052608e-05, |
| "loss": 0.7427, |
| "step": 135000 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2.5141902130641686e-05, |
| "loss": 0.7362, |
| "step": 135500 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.505017483223077e-05, |
| "loss": 0.7344, |
| "step": 136000 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.4958447533819855e-05, |
| "loss": 0.7248, |
| "step": 136500 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 2.486672023540894e-05, |
| "loss": 0.7405, |
| "step": 137000 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 2.4774992936998025e-05, |
| "loss": 0.7319, |
| "step": 137500 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.468326563858711e-05, |
| "loss": 0.7354, |
| "step": 138000 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.459153834017619e-05, |
| "loss": 0.733, |
| "step": 138500 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 2.4499811041765276e-05, |
| "loss": 0.7373, |
| "step": 139000 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.440808374335436e-05, |
| "loss": 0.7303, |
| "step": 139500 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.4316356444943442e-05, |
| "loss": 0.721, |
| "step": 140000 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 2.4224629146532527e-05, |
| "loss": 0.7325, |
| "step": 140500 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 2.4132901848121608e-05, |
| "loss": 0.7225, |
| "step": 141000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2.4041174549710693e-05, |
| "loss": 0.7313, |
| "step": 141500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2.3949447251299778e-05, |
| "loss": 0.7372, |
| "step": 142000 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2.385771995288886e-05, |
| "loss": 0.7266, |
| "step": 142500 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2.3765992654477944e-05, |
| "loss": 0.7259, |
| "step": 143000 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 2.3674265356067025e-05, |
| "loss": 0.725, |
| "step": 143500 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 2.358253805765611e-05, |
| "loss": 0.7242, |
| "step": 144000 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 2.3490810759245195e-05, |
| "loss": 0.729, |
| "step": 144500 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.339908346083428e-05, |
| "loss": 0.7183, |
| "step": 145000 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.3307356162423365e-05, |
| "loss": 0.7232, |
| "step": 145500 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 2.3215628864012446e-05, |
| "loss": 0.722, |
| "step": 146000 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 2.312390156560153e-05, |
| "loss": 0.7255, |
| "step": 146500 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2.3032174267190616e-05, |
| "loss": 0.7248, |
| "step": 147000 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2.2940446968779697e-05, |
| "loss": 0.7202, |
| "step": 147500 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 2.2848719670368782e-05, |
| "loss": 0.7292, |
| "step": 148000 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 2.2756992371957867e-05, |
| "loss": 0.7305, |
| "step": 148500 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 2.2665265073546948e-05, |
| "loss": 0.7222, |
| "step": 149000 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 2.2573537775136033e-05, |
| "loss": 0.7262, |
| "step": 149500 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 2.2481810476725114e-05, |
| "loss": 0.7189, |
| "step": 150000 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2.23900831783142e-05, |
| "loss": 0.7277, |
| "step": 150500 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2.2298355879903284e-05, |
| "loss": 0.7338, |
| "step": 151000 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.2206628581492365e-05, |
| "loss": 0.7227, |
| "step": 151500 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.211490128308145e-05, |
| "loss": 0.7247, |
| "step": 152000 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 2.2023173984670535e-05, |
| "loss": 0.7201, |
| "step": 152500 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 2.193144668625962e-05, |
| "loss": 0.7294, |
| "step": 153000 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 2.1839719387848705e-05, |
| "loss": 0.7258, |
| "step": 153500 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.1747992089437786e-05, |
| "loss": 0.7239, |
| "step": 154000 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.165626479102687e-05, |
| "loss": 0.7181, |
| "step": 154500 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.1564537492615952e-05, |
| "loss": 0.7127, |
| "step": 155000 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.1472810194205037e-05, |
| "loss": 0.7214, |
| "step": 155500 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.1381082895794122e-05, |
| "loss": 0.7173, |
| "step": 156000 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.1289355597383203e-05, |
| "loss": 0.7225, |
| "step": 156500 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.1197628298972288e-05, |
| "loss": 0.7164, |
| "step": 157000 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.1105901000561373e-05, |
| "loss": 0.7113, |
| "step": 157500 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.1014173702150454e-05, |
| "loss": 0.7088, |
| "step": 158000 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.092244640373954e-05, |
| "loss": 0.7178, |
| "step": 158500 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.083071910532862e-05, |
| "loss": 0.7199, |
| "step": 159000 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.0738991806917705e-05, |
| "loss": 0.7073, |
| "step": 159500 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.064726450850679e-05, |
| "loss": 0.7113, |
| "step": 160000 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 2.0555537210095875e-05, |
| "loss": 0.7121, |
| "step": 160500 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 2.046380991168496e-05, |
| "loss": 0.714, |
| "step": 161000 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2.037208261327404e-05, |
| "loss": 0.7151, |
| "step": 161500 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2.0280355314863126e-05, |
| "loss": 0.7062, |
| "step": 162000 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.018862801645221e-05, |
| "loss": 0.7053, |
| "step": 162500 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.0096900718041292e-05, |
| "loss": 0.7097, |
| "step": 163000 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 2.0005173419630377e-05, |
| "loss": 0.7139, |
| "step": 163500 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.9913446121219462e-05, |
| "loss": 0.7163, |
| "step": 164000 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.9821718822808543e-05, |
| "loss": 0.7031, |
| "step": 164500 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.9729991524397628e-05, |
| "loss": 0.7035, |
| "step": 165000 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.963826422598671e-05, |
| "loss": 0.7138, |
| "step": 165500 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.9546536927575794e-05, |
| "loss": 0.72, |
| "step": 166000 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.945480962916488e-05, |
| "loss": 0.7167, |
| "step": 166500 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.9363082330753964e-05, |
| "loss": 0.7039, |
| "step": 167000 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.927135503234305e-05, |
| "loss": 0.701, |
| "step": 167500 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.917962773393213e-05, |
| "loss": 0.7144, |
| "step": 168000 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.9087900435521215e-05, |
| "loss": 0.7003, |
| "step": 168500 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 1.89961731371103e-05, |
| "loss": 0.7058, |
| "step": 169000 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.890444583869938e-05, |
| "loss": 0.7048, |
| "step": 169500 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.8812718540288466e-05, |
| "loss": 0.7109, |
| "step": 170000 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.8720991241877547e-05, |
| "loss": 0.7145, |
| "step": 170500 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.8629263943466632e-05, |
| "loss": 0.7051, |
| "step": 171000 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.8537536645055717e-05, |
| "loss": 0.7063, |
| "step": 171500 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.8445809346644798e-05, |
| "loss": 0.7145, |
| "step": 172000 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.8354082048233883e-05, |
| "loss": 0.7064, |
| "step": 172500 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.8262354749822968e-05, |
| "loss": 0.7164, |
| "step": 173000 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 1.817062745141205e-05, |
| "loss": 0.7076, |
| "step": 173500 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.8078900153001134e-05, |
| "loss": 0.7097, |
| "step": 174000 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.798717285459022e-05, |
| "loss": 0.6915, |
| "step": 174500 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.7895445556179304e-05, |
| "loss": 0.7079, |
| "step": 175000 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.780371825776839e-05, |
| "loss": 0.7072, |
| "step": 175500 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.771199095935747e-05, |
| "loss": 0.7064, |
| "step": 176000 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.7620263660946555e-05, |
| "loss": 0.7004, |
| "step": 176500 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.7528536362535636e-05, |
| "loss": 0.6992, |
| "step": 177000 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.743680906412472e-05, |
| "loss": 0.7017, |
| "step": 177500 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.7345081765713806e-05, |
| "loss": 0.6989, |
| "step": 178000 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.7253354467302887e-05, |
| "loss": 0.6957, |
| "step": 178500 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7161627168891972e-05, |
| "loss": 0.6949, |
| "step": 179000 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.7069899870481053e-05, |
| "loss": 0.6979, |
| "step": 179500 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.6978172572070138e-05, |
| "loss": 0.7044, |
| "step": 180000 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.6886445273659223e-05, |
| "loss": 0.6996, |
| "step": 180500 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.6794717975248304e-05, |
| "loss": 0.6996, |
| "step": 181000 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.670299067683739e-05, |
| "loss": 0.7085, |
| "step": 181500 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.6611263378426474e-05, |
| "loss": 0.6935, |
| "step": 182000 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.651953608001556e-05, |
| "loss": 0.6947, |
| "step": 182500 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.6427808781604644e-05, |
| "loss": 0.6891, |
| "step": 183000 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.6336081483193725e-05, |
| "loss": 0.6922, |
| "step": 183500 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.624435418478281e-05, |
| "loss": 0.6943, |
| "step": 184000 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.6152626886371895e-05, |
| "loss": 0.6984, |
| "step": 184500 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.6060899587960976e-05, |
| "loss": 0.6979, |
| "step": 185000 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.596917228955006e-05, |
| "loss": 0.6936, |
| "step": 185500 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.5877444991139142e-05, |
| "loss": 0.6989, |
| "step": 186000 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.5785717692728227e-05, |
| "loss": 0.6923, |
| "step": 186500 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.5693990394317312e-05, |
| "loss": 0.6899, |
| "step": 187000 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.5602263095906393e-05, |
| "loss": 0.6926, |
| "step": 187500 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.5510535797495478e-05, |
| "loss": 0.6898, |
| "step": 188000 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.541880849908456e-05, |
| "loss": 0.6887, |
| "step": 188500 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.5327081200673644e-05, |
| "loss": 0.6907, |
| "step": 189000 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.523535390226273e-05, |
| "loss": 0.6941, |
| "step": 189500 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.5143626603851812e-05, |
| "loss": 0.6956, |
| "step": 190000 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.5051899305440897e-05, |
| "loss": 0.6927, |
| "step": 190500 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.4960172007029982e-05, |
| "loss": 0.6887, |
| "step": 191000 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.4868444708619065e-05, |
| "loss": 0.6817, |
| "step": 191500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.477671741020815e-05, |
| "loss": 0.6842, |
| "step": 192000 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 1.4684990111797231e-05, |
| "loss": 0.6888, |
| "step": 192500 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 1.4593262813386316e-05, |
| "loss": 0.6862, |
| "step": 193000 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.45015355149754e-05, |
| "loss": 0.6882, |
| "step": 193500 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 1.4409808216564482e-05, |
| "loss": 0.6889, |
| "step": 194000 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 1.4318080918153567e-05, |
| "loss": 0.6904, |
| "step": 194500 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.422635361974265e-05, |
| "loss": 0.6932, |
| "step": 195000 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.4134626321331735e-05, |
| "loss": 0.687, |
| "step": 195500 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.404289902292082e-05, |
| "loss": 0.6928, |
| "step": 196000 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.3951171724509901e-05, |
| "loss": 0.6837, |
| "step": 196500 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.3859444426098986e-05, |
| "loss": 0.6757, |
| "step": 197000 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.3767717127688067e-05, |
| "loss": 0.6889, |
| "step": 197500 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.3675989829277152e-05, |
| "loss": 0.6837, |
| "step": 198000 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.3584262530866237e-05, |
| "loss": 0.6903, |
| "step": 198500 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.349253523245532e-05, |
| "loss": 0.6916, |
| "step": 199000 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.3400807934044405e-05, |
| "loss": 0.6816, |
| "step": 199500 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.330908063563349e-05, |
| "loss": 0.6796, |
| "step": 200000 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.3217353337222571e-05, |
| "loss": 0.6864, |
| "step": 200500 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.3125626038811656e-05, |
| "loss": 0.6848, |
| "step": 201000 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.3033898740400737e-05, |
| "loss": 0.6887, |
| "step": 201500 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.2942171441989822e-05, |
| "loss": 0.687, |
| "step": 202000 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.2850444143578907e-05, |
| "loss": 0.6797, |
| "step": 202500 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.275871684516799e-05, |
| "loss": 0.6891, |
| "step": 203000 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.2666989546757075e-05, |
| "loss": 0.6847, |
| "step": 203500 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.2575262248346156e-05, |
| "loss": 0.6854, |
| "step": 204000 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.2483534949935241e-05, |
| "loss": 0.6755, |
| "step": 204500 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1.2391807651524324e-05, |
| "loss": 0.682, |
| "step": 205000 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1.2300080353113407e-05, |
| "loss": 0.6939, |
| "step": 205500 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.2208353054702492e-05, |
| "loss": 0.6786, |
| "step": 206000 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.2116625756291577e-05, |
| "loss": 0.6778, |
| "step": 206500 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.202489845788066e-05, |
| "loss": 0.6784, |
| "step": 207000 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.1933171159469743e-05, |
| "loss": 0.6819, |
| "step": 207500 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.1841443861058828e-05, |
| "loss": 0.6806, |
| "step": 208000 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.174971656264791e-05, |
| "loss": 0.6775, |
| "step": 208500 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.1657989264236994e-05, |
| "loss": 0.6793, |
| "step": 209000 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 1.1566261965826079e-05, |
| "loss": 0.6723, |
| "step": 209500 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 1.1474534667415162e-05, |
| "loss": 0.6744, |
| "step": 210000 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.1382807369004247e-05, |
| "loss": 0.6802, |
| "step": 210500 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.129108007059333e-05, |
| "loss": 0.6739, |
| "step": 211000 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.1199352772182413e-05, |
| "loss": 0.6806, |
| "step": 211500 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.1107625473771496e-05, |
| "loss": 0.6803, |
| "step": 212000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.101589817536058e-05, |
| "loss": 0.6821, |
| "step": 212500 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.0924170876949664e-05, |
| "loss": 0.6778, |
| "step": 213000 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 1.0832443578538749e-05, |
| "loss": 0.6777, |
| "step": 213500 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.0740716280127832e-05, |
| "loss": 0.6766, |
| "step": 214000 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.0648988981716915e-05, |
| "loss": 0.6837, |
| "step": 214500 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.0557261683306e-05, |
| "loss": 0.6731, |
| "step": 215000 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.0465534384895083e-05, |
| "loss": 0.6738, |
| "step": 215500 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1.0373807086484166e-05, |
| "loss": 0.673, |
| "step": 216000 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1.0282079788073249e-05, |
| "loss": 0.6807, |
| "step": 216500 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.0190352489662334e-05, |
| "loss": 0.6743, |
| "step": 217000 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.0098625191251419e-05, |
| "loss": 0.677, |
| "step": 217500 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1.0006897892840502e-05, |
| "loss": 0.6549, |
| "step": 218000 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 9.915170594429585e-06, |
| "loss": 0.6653, |
| "step": 218500 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 9.823443296018668e-06, |
| "loss": 0.6823, |
| "step": 219000 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 9.731715997607753e-06, |
| "loss": 0.6695, |
| "step": 219500 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 9.639988699196836e-06, |
| "loss": 0.6694, |
| "step": 220000 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 9.548261400785919e-06, |
| "loss": 0.6707, |
| "step": 220500 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 9.456534102375004e-06, |
| "loss": 0.6751, |
| "step": 221000 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 9.364806803964089e-06, |
| "loss": 0.6709, |
| "step": 221500 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 9.273079505553172e-06, |
| "loss": 0.6726, |
| "step": 222000 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 9.181352207142255e-06, |
| "loss": 0.6773, |
| "step": 222500 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 9.089624908731338e-06, |
| "loss": 0.6724, |
| "step": 223000 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 8.997897610320421e-06, |
| "loss": 0.6686, |
| "step": 223500 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 8.906170311909506e-06, |
| "loss": 0.6654, |
| "step": 224000 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 8.814443013498589e-06, |
| "loss": 0.6686, |
| "step": 224500 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 8.722715715087674e-06, |
| "loss": 0.6622, |
| "step": 225000 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 8.630988416676757e-06, |
| "loss": 0.6732, |
| "step": 225500 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 8.539261118265842e-06, |
| "loss": 0.6694, |
| "step": 226000 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 8.447533819854925e-06, |
| "loss": 0.6663, |
| "step": 226500 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 8.355806521444008e-06, |
| "loss": 0.6632, |
| "step": 227000 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 8.264079223033091e-06, |
| "loss": 0.6753, |
| "step": 227500 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 8.172351924622176e-06, |
| "loss": 0.6691, |
| "step": 228000 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 8.08062462621126e-06, |
| "loss": 0.6682, |
| "step": 228500 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 7.988897327800344e-06, |
| "loss": 0.6718, |
| "step": 229000 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 7.897170029389427e-06, |
| "loss": 0.6662, |
| "step": 229500 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 7.80544273097851e-06, |
| "loss": 0.6651, |
| "step": 230000 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 7.713715432567595e-06, |
| "loss": 0.6678, |
| "step": 230500 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 7.621988134156678e-06, |
| "loss": 0.6716, |
| "step": 231000 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 7.530260835745762e-06, |
| "loss": 0.6742, |
| "step": 231500 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 7.438533537334845e-06, |
| "loss": 0.6729, |
| "step": 232000 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 7.346806238923929e-06, |
| "loss": 0.6708, |
| "step": 232500 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 7.255078940513013e-06, |
| "loss": 0.6605, |
| "step": 233000 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 7.163351642102097e-06, |
| "loss": 0.6619, |
| "step": 233500 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 7.07162434369118e-06, |
| "loss": 0.6688, |
| "step": 234000 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 6.979897045280264e-06, |
| "loss": 0.6693, |
| "step": 234500 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 6.8881697468693486e-06, |
| "loss": 0.6598, |
| "step": 235000 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 6.796442448458432e-06, |
| "loss": 0.6647, |
| "step": 235500 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 6.704715150047515e-06, |
| "loss": 0.6691, |
| "step": 236000 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 6.612987851636599e-06, |
| "loss": 0.661, |
| "step": 236500 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 6.521260553225682e-06, |
| "loss": 0.6545, |
| "step": 237000 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 6.429533254814767e-06, |
| "loss": 0.6663, |
| "step": 237500 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 6.33780595640385e-06, |
| "loss": 0.6671, |
| "step": 238000 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.246078657992934e-06, |
| "loss": 0.6586, |
| "step": 238500 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.154351359582018e-06, |
| "loss": 0.6642, |
| "step": 239000 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 6.062624061171101e-06, |
| "loss": 0.6614, |
| "step": 239500 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 5.970896762760185e-06, |
| "loss": 0.6537, |
| "step": 240000 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 5.879169464349269e-06, |
| "loss": 0.6559, |
| "step": 240500 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 5.787442165938353e-06, |
| "loss": 0.6559, |
| "step": 241000 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 5.695714867527436e-06, |
| "loss": 0.6585, |
| "step": 241500 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 5.60398756911652e-06, |
| "loss": 0.6646, |
| "step": 242000 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 5.512260270705604e-06, |
| "loss": 0.6567, |
| "step": 242500 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 5.420532972294687e-06, |
| "loss": 0.6684, |
| "step": 243000 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.328805673883771e-06, |
| "loss": 0.6617, |
| "step": 243500 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.237078375472855e-06, |
| "loss": 0.6658, |
| "step": 244000 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.145351077061939e-06, |
| "loss": 0.6589, |
| "step": 244500 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 5.053623778651022e-06, |
| "loss": 0.6683, |
| "step": 245000 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 4.961896480240106e-06, |
| "loss": 0.6542, |
| "step": 245500 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.87016918182919e-06, |
| "loss": 0.6531, |
| "step": 246000 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.778441883418273e-06, |
| "loss": 0.662, |
| "step": 246500 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.686714585007357e-06, |
| "loss": 0.6601, |
| "step": 247000 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.59498728659644e-06, |
| "loss": 0.6576, |
| "step": 247500 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.503259988185525e-06, |
| "loss": 0.6587, |
| "step": 248000 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 4.411532689774608e-06, |
| "loss": 0.6617, |
| "step": 248500 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 4.319805391363692e-06, |
| "loss": 0.6674, |
| "step": 249000 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.228078092952776e-06, |
| "loss": 0.6614, |
| "step": 249500 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.13635079454186e-06, |
| "loss": 0.6621, |
| "step": 250000 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 4.044623496130943e-06, |
| "loss": 0.6566, |
| "step": 250500 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 3.952896197720026e-06, |
| "loss": 0.6596, |
| "step": 251000 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.861168899309111e-06, |
| "loss": 0.6507, |
| "step": 251500 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.7694416008981937e-06, |
| "loss": 0.6612, |
| "step": 252000 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 3.6777143024872777e-06, |
| "loss": 0.6589, |
| "step": 252500 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 3.5859870040763612e-06, |
| "loss": 0.6532, |
| "step": 253000 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 3.494259705665445e-06, |
| "loss": 0.6619, |
| "step": 253500 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.4025324072545287e-06, |
| "loss": 0.6534, |
| "step": 254000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.3108051088436127e-06, |
| "loss": 0.651, |
| "step": 254500 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 3.219077810432696e-06, |
| "loss": 0.6567, |
| "step": 255000 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 3.1273505120217797e-06, |
| "loss": 0.6552, |
| "step": 255500 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 3.0356232136108637e-06, |
| "loss": 0.6577, |
| "step": 256000 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 2.943895915199947e-06, |
| "loss": 0.6568, |
| "step": 256500 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 2.852168616789031e-06, |
| "loss": 0.6576, |
| "step": 257000 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 2.7604413183781147e-06, |
| "loss": 0.6599, |
| "step": 257500 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 2.6687140199671982e-06, |
| "loss": 0.6568, |
| "step": 258000 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 2.576986721556282e-06, |
| "loss": 0.6484, |
| "step": 258500 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 2.4852594231453657e-06, |
| "loss": 0.6508, |
| "step": 259000 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 2.3935321247344497e-06, |
| "loss": 0.6482, |
| "step": 259500 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 2.301804826323533e-06, |
| "loss": 0.6533, |
| "step": 260000 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 2.210077527912617e-06, |
| "loss": 0.6555, |
| "step": 260500 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 2.1183502295017007e-06, |
| "loss": 0.6548, |
| "step": 261000 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 2.0266229310907846e-06, |
| "loss": 0.6529, |
| "step": 261500 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.934895632679868e-06, |
| "loss": 0.6423, |
| "step": 262000 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 1.8431683342689517e-06, |
| "loss": 0.6485, |
| "step": 262500 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 1.7514410358580354e-06, |
| "loss": 0.6594, |
| "step": 263000 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.6597137374471192e-06, |
| "loss": 0.6487, |
| "step": 263500 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.567986439036203e-06, |
| "loss": 0.6515, |
| "step": 264000 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.4762591406252867e-06, |
| "loss": 0.6538, |
| "step": 264500 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.3845318422143704e-06, |
| "loss": 0.6497, |
| "step": 265000 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.2928045438034542e-06, |
| "loss": 0.6484, |
| "step": 265500 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 1.201077245392538e-06, |
| "loss": 0.6508, |
| "step": 266000 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 1.1093499469816216e-06, |
| "loss": 0.6572, |
| "step": 266500 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.0176226485707054e-06, |
| "loss": 0.6607, |
| "step": 267000 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 9.25895350159789e-07, |
| "loss": 0.6484, |
| "step": 267500 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.341680517488727e-07, |
| "loss": 0.66, |
| "step": 268000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 7.424407533379564e-07, |
| "loss": 0.6494, |
| "step": 268500 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 6.507134549270401e-07, |
| "loss": 0.6547, |
| "step": 269000 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 5.589861565161238e-07, |
| "loss": 0.6514, |
| "step": 269500 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 4.672588581052076e-07, |
| "loss": 0.6421, |
| "step": 270000 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 3.7553155969429127e-07, |
| "loss": 0.6482, |
| "step": 270500 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 2.8380426128337496e-07, |
| "loss": 0.6474, |
| "step": 271000 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.920769628724587e-07, |
| "loss": 0.6526, |
| "step": 271500 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.0034966446154242e-07, |
| "loss": 0.655, |
| "step": 272000 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 8.62236605062613e-09, |
| "loss": 0.6447, |
| "step": 272500 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 272547, |
| "total_flos": 1.1358886111610301e+18, |
| "train_loss": 0.7392136628871132, |
| "train_runtime": 175400.2766, |
| "train_samples_per_second": 24.861, |
| "train_steps_per_second": 1.554 |
| } |
| ], |
| "max_steps": 272547, |
| "num_train_epochs": 3, |
| "total_flos": 1.1358886111610301e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|