| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9999550887638503, |
| "global_step": 9741, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 0.0, |
| "loss": 12.8749, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 3.412969283276451e-07, |
| "loss": 12.095, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 6.825938566552902e-07, |
| "loss": 8.3313, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.0238907849829352e-06, |
| "loss": 7.0655, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.3651877133105804e-06, |
| "loss": 6.626, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.7064846416382255e-06, |
| "loss": 6.3086, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 2.0477815699658705e-06, |
| "loss": 6.0114, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 2.3890784982935154e-06, |
| "loss": 5.923, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 2.7303754266211608e-06, |
| "loss": 5.8138, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.0716723549488057e-06, |
| "loss": 5.7112, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.412969283276451e-06, |
| "loss": 5.6069, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.7542662116040956e-06, |
| "loss": 5.5401, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.095563139931741e-06, |
| "loss": 5.4575, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.436860068259386e-06, |
| "loss": 5.3794, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.778156996587031e-06, |
| "loss": 5.331, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5.119453924914676e-06, |
| "loss": 5.2926, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5.4607508532423215e-06, |
| "loss": 5.2757, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5.802047781569966e-06, |
| "loss": 5.2307, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.143344709897611e-06, |
| "loss": 5.1463, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.484641638225257e-06, |
| "loss": 5.0758, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.825938566552902e-06, |
| "loss": 5.053, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.167235494880547e-06, |
| "loss": 5.0551, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.508532423208191e-06, |
| "loss": 5.0355, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.849829351535837e-06, |
| "loss": 4.9796, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8.191126279863482e-06, |
| "loss": 4.951, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8.532423208191128e-06, |
| "loss": 4.9085, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8.873720136518773e-06, |
| "loss": 4.8731, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.215017064846417e-06, |
| "loss": 4.8692, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.556313993174062e-06, |
| "loss": 4.8161, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 9.897610921501706e-06, |
| "loss": 4.799, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.0238907849829352e-05, |
| "loss": 4.7847, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.0580204778156997e-05, |
| "loss": 4.7779, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.0921501706484643e-05, |
| "loss": 4.6951, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.126279863481229e-05, |
| "loss": 4.7078, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.1604095563139932e-05, |
| "loss": 4.6461, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.1945392491467578e-05, |
| "loss": 4.6271, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.2286689419795223e-05, |
| "loss": 4.6147, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.2627986348122867e-05, |
| "loss": 4.5607, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.2969283276450513e-05, |
| "loss": 4.5627, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3310580204778158e-05, |
| "loss": 4.5616, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3651877133105804e-05, |
| "loss": 4.5295, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3993174061433447e-05, |
| "loss": 4.4991, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.4334470989761093e-05, |
| "loss": 4.4924, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.467576791808874e-05, |
| "loss": 4.4419, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.5017064846416382e-05, |
| "loss": 4.445, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.5358361774744027e-05, |
| "loss": 4.3713, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.5699658703071675e-05, |
| "loss": 4.4325, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.604095563139932e-05, |
| "loss": 4.3673, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.6382252559726964e-05, |
| "loss": 4.3136, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.6723549488054608e-05, |
| "loss": 4.3566, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.7064846416382256e-05, |
| "loss": 4.2961, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.7406143344709897e-05, |
| "loss": 4.3203, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.7747440273037545e-05, |
| "loss": 4.2783, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.808873720136519e-05, |
| "loss": 4.2523, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.8430034129692834e-05, |
| "loss": 4.2558, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.877133105802048e-05, |
| "loss": 4.2494, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9112627986348123e-05, |
| "loss": 4.2571, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.945392491467577e-05, |
| "loss": 4.2041, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9795221843003412e-05, |
| "loss": 4.2288, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9999997788688342e-05, |
| "loss": 4.2369, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9999972911443404e-05, |
| "loss": 4.1872, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9999920392882944e-05, |
| "loss": 4.167, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.999984023315213e-05, |
| "loss": 4.1851, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9999732432472544e-05, |
| "loss": 4.1391, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.999959699114215e-05, |
| "loss": 4.1194, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9999433909535333e-05, |
| "loss": 4.1501, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.999924318810287e-05, |
| "loss": 4.1971, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9999024827371946e-05, |
| "loss": 4.114, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9998778827946136e-05, |
| "loss": 4.0516, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9998505190505423e-05, |
| "loss": 4.1302, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.999820391580617e-05, |
| "loss": 4.0338, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9997875004681147e-05, |
| "loss": 4.0644, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.999751845803951e-05, |
| "loss": 4.0183, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.99971342768668e-05, |
| "loss": 4.0488, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.999672246222496e-05, |
| "loss": 4.0974, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9996283015252286e-05, |
| "loss": 4.0624, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9995815937163477e-05, |
| "loss": 4.0256, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9995321229249605e-05, |
| "loss": 4.0293, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9994798892878112e-05, |
| "loss": 4.0388, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9994248929492798e-05, |
| "loss": 4.0258, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.999367134061385e-05, |
| "loss": 4.014, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.99930661278378e-05, |
| "loss": 3.9845, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.999243329283754e-05, |
| "loss": 3.9661, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9991772837362315e-05, |
| "loss": 3.9915, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9991084763237715e-05, |
| "loss": 3.9972, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9990369072365666e-05, |
| "loss": 3.9606, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9989625766724453e-05, |
| "loss": 3.9885, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.998885484836866e-05, |
| "loss": 3.9738, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.998805631942922e-05, |
| "loss": 3.9257, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9987230182113374e-05, |
| "loss": 3.9501, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9986376438704686e-05, |
| "loss": 3.9264, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.998549509156302e-05, |
| "loss": 3.9535, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9984586143124543e-05, |
| "loss": 3.9542, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9983649595901706e-05, |
| "loss": 3.9401, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.998268545248327e-05, |
| "loss": 3.9526, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.998169371553425e-05, |
| "loss": 3.9065, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9980674387795948e-05, |
| "loss": 3.9531, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9979627472085927e-05, |
| "loss": 3.9276, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9978552971298014e-05, |
| "loss": 3.9176, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.997745088840227e-05, |
| "loss": 3.9248, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9976321226445007e-05, |
| "loss": 3.894, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9975163988548775e-05, |
| "loss": 3.9489, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.997397917791233e-05, |
| "loss": 3.8736, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.997276679781066e-05, |
| "loss": 3.8586, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9971526851594953e-05, |
| "loss": 3.8848, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.997025934269259e-05, |
| "loss": 3.9149, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.996896427460714e-05, |
| "loss": 3.8333, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9967641650918352e-05, |
| "loss": 3.8496, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9966291475282148e-05, |
| "loss": 3.8546, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9964913751430593e-05, |
| "loss": 3.9015, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9963508483171908e-05, |
| "loss": 3.8391, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9962075674390456e-05, |
| "loss": 3.8628, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9960615329046717e-05, |
| "loss": 3.8383, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9959127451177287e-05, |
| "loss": 3.8381, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9957612044894867e-05, |
| "loss": 3.8557, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.995606911438825e-05, |
| "loss": 3.8447, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9954498663922318e-05, |
| "loss": 3.8571, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9952900697838004e-05, |
| "loss": 3.8519, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9951275220552314e-05, |
| "loss": 3.7803, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9949622236558294e-05, |
| "loss": 3.8677, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9947941750425016e-05, |
| "loss": 3.8477, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.994623376679758e-05, |
| "loss": 3.8065, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9944498290397097e-05, |
| "loss": 3.8206, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9942735326020658e-05, |
| "loss": 3.8404, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.994094487854134e-05, |
| "loss": 3.808, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9939126952908198e-05, |
| "loss": 3.7804, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.993728155414622e-05, |
| "loss": 3.7677, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.993540868735635e-05, |
| "loss": 3.8159, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9933508357715454e-05, |
| "loss": 3.7865, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9931580570476306e-05, |
| "loss": 3.7753, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9929625330967575e-05, |
| "loss": 3.7645, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9927642644593818e-05, |
| "loss": 3.7898, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9925632516835457e-05, |
| "loss": 3.7857, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.992359495324876e-05, |
| "loss": 3.7847, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9921529959465842e-05, |
| "loss": 3.8126, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9919437541194628e-05, |
| "loss": 3.7679, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9917317704218852e-05, |
| "loss": 3.795, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9915170454398045e-05, |
| "loss": 3.7215, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9912995797667498e-05, |
| "loss": 3.7675, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9910793740038266e-05, |
| "loss": 3.7704, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9908564287597145e-05, |
| "loss": 3.7432, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9906307446506647e-05, |
| "loss": 3.7335, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9904023223005e-05, |
| "loss": 3.7434, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.990171162340611e-05, |
| "loss": 3.7501, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.989937265409956e-05, |
| "loss": 3.7638, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9897006321550592e-05, |
| "loss": 3.7249, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9894612632300077e-05, |
| "loss": 3.7536, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9892191592964498e-05, |
| "loss": 3.7492, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.988974321023595e-05, |
| "loss": 3.7529, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.98872674908821e-05, |
| "loss": 3.787, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9884764441746186e-05, |
| "loss": 3.7251, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.988223406974698e-05, |
| "loss": 3.7379, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9879676381878783e-05, |
| "loss": 3.706, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.98770913852114e-05, |
| "loss": 3.7333, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9874479086890117e-05, |
| "loss": 3.721, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9871839494135696e-05, |
| "loss": 3.7456, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9869172614244335e-05, |
| "loss": 3.7335, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.986647845458766e-05, |
| "loss": 3.7378, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.986375702261271e-05, |
| "loss": 3.7137, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9861008325841893e-05, |
| "loss": 3.6932, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9858232371872993e-05, |
| "loss": 3.6973, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9855429168379127e-05, |
| "loss": 3.7263, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.985259872310875e-05, |
| "loss": 3.7263, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9849741043885596e-05, |
| "loss": 3.6926, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9846856138608693e-05, |
| "loss": 3.7449, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9843944015252318e-05, |
| "loss": 3.7038, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9841004681865988e-05, |
| "loss": 3.7182, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9838038146574426e-05, |
| "loss": 3.6883, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.983504441757755e-05, |
| "loss": 3.7354, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.983202350315044e-05, |
| "loss": 3.7327, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.982897541164333e-05, |
| "loss": 3.6979, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9825900151481562e-05, |
| "loss": 3.665, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9822797731165587e-05, |
| "loss": 3.7058, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.981966815927092e-05, |
| "loss": 3.6884, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.981651144444814e-05, |
| "loss": 3.678, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9813327595422843e-05, |
| "loss": 3.7046, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.981011662099563e-05, |
| "loss": 3.6846, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9806878530042083e-05, |
| "loss": 3.6799, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.980361333151273e-05, |
| "loss": 3.6712, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9800321034433043e-05, |
| "loss": 3.7067, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.979700164790338e-05, |
| "loss": 3.6772, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9793655181098992e-05, |
| "loss": 3.6792, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9790281643269974e-05, |
| "loss": 3.7357, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9786881043741256e-05, |
| "loss": 3.6694, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.978345339191257e-05, |
| "loss": 3.6552, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.977999869725842e-05, |
| "loss": 3.671, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9776516969328066e-05, |
| "loss": 3.6895, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9773008217745483e-05, |
| "loss": 3.6742, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.976947245220935e-05, |
| "loss": 3.669, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.976590968249301e-05, |
| "loss": 3.6384, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9762319918444466e-05, |
| "loss": 3.6433, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.975870316998631e-05, |
| "loss": 3.6324, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9755059447115755e-05, |
| "loss": 3.6582, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.975138875990454e-05, |
| "loss": 3.6667, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9747691118498963e-05, |
| "loss": 3.6767, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9743966533119823e-05, |
| "loss": 3.6854, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9740215014062386e-05, |
| "loss": 3.6838, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.973643657169637e-05, |
| "loss": 3.7078, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9732631216465924e-05, |
| "loss": 3.627, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.972879895888957e-05, |
| "loss": 3.6671, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9724939809560208e-05, |
| "loss": 3.6825, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9721053779145057e-05, |
| "loss": 3.6226, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.971714087838565e-05, |
| "loss": 3.6629, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9713201118097784e-05, |
| "loss": 3.6617, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.970923450917151e-05, |
| "loss": 3.6485, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9705241062571084e-05, |
| "loss": 3.6363, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9701220789334945e-05, |
| "loss": 3.6343, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9697173700575694e-05, |
| "loss": 3.6646, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.969309980748004e-05, |
| "loss": 3.6167, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.968899912130879e-05, |
| "loss": 3.6257, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9684871653396817e-05, |
| "loss": 3.652, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.968071741515301e-05, |
| "loss": 3.6486, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9676536418060266e-05, |
| "loss": 3.6246, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9672328673675438e-05, |
| "loss": 3.6233, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9668094193629322e-05, |
| "loss": 3.6361, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.96638329896266e-05, |
| "loss": 3.6405, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9659545073445844e-05, |
| "loss": 3.618, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.965523045693944e-05, |
| "loss": 3.6378, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9650889152033597e-05, |
| "loss": 3.6453, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9646521170728283e-05, |
| "loss": 3.6312, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9642126525097202e-05, |
| "loss": 3.6043, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9637705227287763e-05, |
| "loss": 3.6043, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.963325728952106e-05, |
| "loss": 3.6487, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9628782724091795e-05, |
| "loss": 3.6493, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.96242815433683e-05, |
| "loss": 3.6173, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9619753759792466e-05, |
| "loss": 3.655, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9615199385879712e-05, |
| "loss": 3.6231, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.961061843421896e-05, |
| "loss": 3.6501, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.96060109174726e-05, |
| "loss": 3.6312, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9601376848376443e-05, |
| "loss": 3.6004, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9596716239739708e-05, |
| "loss": 3.6282, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9592029104444964e-05, |
| "loss": 3.5783, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9587315455448097e-05, |
| "loss": 3.6057, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9582575305778297e-05, |
| "loss": 3.6139, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9577808668537995e-05, |
| "loss": 3.5894, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9573015556902836e-05, |
| "loss": 3.5998, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9568195984121648e-05, |
| "loss": 3.5962, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9563349963516403e-05, |
| "loss": 3.6213, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9558477508482175e-05, |
| "loss": 3.5908, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9553578632487103e-05, |
| "loss": 3.6214, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9548653349072363e-05, |
| "loss": 3.5958, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9543701671852127e-05, |
| "loss": 3.5878, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.953872361451352e-05, |
| "loss": 3.6117, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9533719190816575e-05, |
| "loss": 3.5977, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9528688414594224e-05, |
| "loss": 3.6178, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.952363129975223e-05, |
| "loss": 3.5998, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9518547860269157e-05, |
| "loss": 3.6139, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9513438110196346e-05, |
| "loss": 3.6015, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9508302063657853e-05, |
| "loss": 3.6142, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9503139734850426e-05, |
| "loss": 3.642, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9497951138043454e-05, |
| "loss": 3.5928, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9492736287578947e-05, |
| "loss": 3.609, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9487495197871476e-05, |
| "loss": 3.6197, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9482227883408135e-05, |
| "loss": 3.5956, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9476934358748522e-05, |
| "loss": 3.5974, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.947161463852467e-05, |
| "loss": 3.5682, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.946626873744103e-05, |
| "loss": 3.6134, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9460896670274408e-05, |
| "loss": 3.5874, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9455498451873952e-05, |
| "loss": 3.5733, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9450074097161087e-05, |
| "loss": 3.6074, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.944462362112948e-05, |
| "loss": 3.5849, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9439147038845006e-05, |
| "loss": 3.5862, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.94336443654457e-05, |
| "loss": 3.5972, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.942811561614172e-05, |
| "loss": 3.591, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.942256080621529e-05, |
| "loss": 3.5864, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.941697995102069e-05, |
| "loss": 3.5939, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9411373065984166e-05, |
| "loss": 3.5934, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9405740166603936e-05, |
| "loss": 3.5437, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9400081268450107e-05, |
| "loss": 3.5745, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9394396387164677e-05, |
| "loss": 3.5901, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9388685538461435e-05, |
| "loss": 3.5659, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9382948738125966e-05, |
| "loss": 3.57, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.937718600201558e-05, |
| "loss": 3.595, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9371397346059286e-05, |
| "loss": 3.6066, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.936558278625773e-05, |
| "loss": 3.5567, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9359742338683165e-05, |
| "loss": 3.5779, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9353876019479402e-05, |
| "loss": 3.5831, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.934798384486176e-05, |
| "loss": 3.5762, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.934206583111703e-05, |
| "loss": 3.5685, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9336121994603424e-05, |
| "loss": 3.5789, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9330152351750535e-05, |
| "loss": 3.5516, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9324156919059286e-05, |
| "loss": 3.5779, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9318135713101883e-05, |
| "loss": 3.5857, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9312088750521778e-05, |
| "loss": 3.5835, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9306016048033617e-05, |
| "loss": 3.5684, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9299917622423196e-05, |
| "loss": 3.5699, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9293793490547404e-05, |
| "loss": 3.5745, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9287643669334202e-05, |
| "loss": 3.5781, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9281468175782546e-05, |
| "loss": 3.5244, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9275267026962358e-05, |
| "loss": 3.5479, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.926904024001448e-05, |
| "loss": 3.5617, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9262787832150615e-05, |
| "loss": 3.5508, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9256509820653284e-05, |
| "loss": 3.5705, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9250206222875785e-05, |
| "loss": 3.5852, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9243877056242145e-05, |
| "loss": 3.553, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9237522338247053e-05, |
| "loss": 3.5588, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9231142086455838e-05, |
| "loss": 3.5374, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.92247363185044e-05, |
| "loss": 3.5648, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.921830505209917e-05, |
| "loss": 3.5486, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9211848305017072e-05, |
| "loss": 3.5476, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9205366095105443e-05, |
| "loss": 3.5538, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9198858440282016e-05, |
| "loss": 3.5734, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9192325358534855e-05, |
| "loss": 3.586, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9185766867922303e-05, |
| "loss": 3.5631, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9179182986572943e-05, |
| "loss": 3.5143, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.917257373268554e-05, |
| "loss": 3.5349, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9165939124528984e-05, |
| "loss": 3.526, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9159279180442257e-05, |
| "loss": 3.5638, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9152593918834376e-05, |
| "loss": 3.5747, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.914588335818433e-05, |
| "loss": 3.5447, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.913914751704104e-05, |
| "loss": 3.541, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9132386414023306e-05, |
| "loss": 3.5198, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9125600067819765e-05, |
| "loss": 3.548, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9118788497188815e-05, |
| "loss": 3.4812, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.911195172095858e-05, |
| "loss": 3.5336, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9105089758026872e-05, |
| "loss": 3.5517, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.90982026273611e-05, |
| "loss": 3.5337, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9091290347998256e-05, |
| "loss": 3.5607, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.908435293904484e-05, |
| "loss": 3.5372, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9077390419676813e-05, |
| "loss": 3.5373, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.907040280913955e-05, |
| "loss": 3.5493, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9063390126747778e-05, |
| "loss": 3.5431, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9056352391885524e-05, |
| "loss": 3.5358, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.904928962400607e-05, |
| "loss": 3.5254, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.904220184263188e-05, |
| "loss": 3.5759, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9035089067354573e-05, |
| "loss": 3.5486, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9027951317834847e-05, |
| "loss": 3.5699, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9020788613802435e-05, |
| "loss": 3.5456, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9013600975056052e-05, |
| "loss": 3.5657, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9006388421463322e-05, |
| "loss": 3.5525, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.899915097296075e-05, |
| "loss": 3.5306, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.899188864955365e-05, |
| "loss": 3.5637, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.8984601471316092e-05, |
| "loss": 3.5142, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.897728945839085e-05, |
| "loss": 3.5379, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.896995263098935e-05, |
| "loss": 3.5179, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.8962591009391595e-05, |
| "loss": 3.5322, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.8955204613946135e-05, |
| "loss": 3.5419, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.894779346506999e-05, |
| "loss": 3.4724, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.8940357583248613e-05, |
| "loss": 3.5225, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.8932896989035814e-05, |
| "loss": 3.5276, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8925411703053708e-05, |
| "loss": 3.5402, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8917901745992667e-05, |
| "loss": 3.538, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8910367138611257e-05, |
| "loss": 3.5218, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8902807901736185e-05, |
| "loss": 3.488, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8895224056262226e-05, |
| "loss": 3.5459, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8887615623152188e-05, |
| "loss": 3.4957, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8879982623436835e-05, |
| "loss": 3.5491, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.887232507821484e-05, |
| "loss": 3.511, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8864643008652726e-05, |
| "loss": 3.5164, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.88569364359848e-05, |
| "loss": 3.5123, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8849205381513095e-05, |
| "loss": 3.4969, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.884144986660733e-05, |
| "loss": 3.5236, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.883366991270482e-05, |
| "loss": 3.5089, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8825865541310438e-05, |
| "loss": 3.5072, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8818036773996552e-05, |
| "loss": 3.4727, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8810183632402972e-05, |
| "loss": 3.5314, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8802306138236862e-05, |
| "loss": 3.5055, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.879440431327272e-05, |
| "loss": 3.5259, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8786478179352285e-05, |
| "loss": 3.5075, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8778527758384492e-05, |
| "loss": 3.5221, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8770553072345407e-05, |
| "loss": 3.4877, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.876255414327818e-05, |
| "loss": 3.5192, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8754530993292956e-05, |
| "loss": 3.4922, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8746483644566842e-05, |
| "loss": 3.5452, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.873841211934382e-05, |
| "loss": 3.509, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8730316439934723e-05, |
| "loss": 3.5443, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8722196628717118e-05, |
| "loss": 3.5118, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8714052708135305e-05, |
| "loss": 3.5334, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8705884700700206e-05, |
| "loss": 3.5009, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8697692628989327e-05, |
| "loss": 3.5415, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.86894765156467e-05, |
| "loss": 3.5182, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8681236383382804e-05, |
| "loss": 3.5114, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8672972254974507e-05, |
| "loss": 3.5119, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.866468415326501e-05, |
| "loss": 3.4983, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8656372101163774e-05, |
| "loss": 3.5203, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8648036121646474e-05, |
| "loss": 3.5214, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8639676237754916e-05, |
| "loss": 3.5087, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8631292472596978e-05, |
| "loss": 3.5203, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.862288484934655e-05, |
| "loss": 3.5087, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8614453391243482e-05, |
| "loss": 3.5026, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8605998121593486e-05, |
| "loss": 3.51, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8597519063768104e-05, |
| "loss": 3.5118, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8589016241204637e-05, |
| "loss": 3.4831, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8580489677406064e-05, |
| "loss": 3.5349, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8571939395940995e-05, |
| "loss": 3.4857, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8563365420443594e-05, |
| "loss": 3.4766, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8554767774613528e-05, |
| "loss": 3.5126, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8546146482215875e-05, |
| "loss": 3.555, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8537501567081097e-05, |
| "loss": 3.5108, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.852883305310493e-05, |
| "loss": 3.5117, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.852014096424836e-05, |
| "loss": 3.525, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.851142532453753e-05, |
| "loss": 3.4482, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8502686158063676e-05, |
| "loss": 3.5066, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8493923488983066e-05, |
| "loss": 3.5157, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8485137341516947e-05, |
| "loss": 3.5266, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.847632773995144e-05, |
| "loss": 3.4949, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8467494708637517e-05, |
| "loss": 3.495, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.84586382719909e-05, |
| "loss": 3.4731, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8449758454492014e-05, |
| "loss": 3.488, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8440855280685907e-05, |
| "loss": 3.499, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8431928775182194e-05, |
| "loss": 3.5122, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.842297896265497e-05, |
| "loss": 3.4933, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8414005867842765e-05, |
| "loss": 3.5144, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.840500951554846e-05, |
| "loss": 3.4728, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8395989930639224e-05, |
| "loss": 3.4829, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.838694713804645e-05, |
| "loss": 3.4995, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8377881162765662e-05, |
| "loss": 3.492, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8368792029856482e-05, |
| "loss": 3.5294, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8359679764442538e-05, |
| "loss": 3.4922, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8350544391711396e-05, |
| "loss": 3.5267, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8341385936914503e-05, |
| "loss": 3.5039, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8332204425367096e-05, |
| "loss": 3.4839, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8322999882448148e-05, |
| "loss": 3.4741, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.83137723336003e-05, |
| "loss": 3.4617, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8304521804329773e-05, |
| "loss": 3.5136, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8295248320206323e-05, |
| "loss": 3.4694, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.828595190686315e-05, |
| "loss": 3.4442, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.827663258999683e-05, |
| "loss": 3.4919, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.826729039536725e-05, |
| "loss": 3.4623, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8257925348797534e-05, |
| "loss": 3.5064, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8248537476173975e-05, |
| "loss": 3.4907, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.823912680344596e-05, |
| "loss": 3.4883, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8229693356625892e-05, |
| "loss": 3.5198, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8220237161789134e-05, |
| "loss": 3.4987, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8210758245073923e-05, |
| "loss": 3.4645, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.82012566326813e-05, |
| "loss": 3.4729, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8191732350875045e-05, |
| "loss": 3.4733, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8182185425981593e-05, |
| "loss": 3.4836, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.817261588438998e-05, |
| "loss": 3.476, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.816302375255174e-05, |
| "loss": 3.4627, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8153409056980868e-05, |
| "loss": 3.4819, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8143771824253712e-05, |
| "loss": 3.476, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8134112081008926e-05, |
| "loss": 3.474, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8124429853947387e-05, |
| "loss": 3.4573, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.811472516983211e-05, |
| "loss": 3.4789, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8104998055488198e-05, |
| "loss": 3.4821, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8095248537802743e-05, |
| "loss": 3.4564, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8085476643724768e-05, |
| "loss": 3.4867, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.8075682400265146e-05, |
| "loss": 3.468, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.8065865834496535e-05, |
| "loss": 3.4569, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.805602697355328e-05, |
| "loss": 3.4577, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.804616584463136e-05, |
| "loss": 3.4858, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.8036282474988307e-05, |
| "loss": 3.4827, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.8026376891943137e-05, |
| "loss": 3.5044, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.8016449122876247e-05, |
| "loss": 3.4965, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.800649919522938e-05, |
| "loss": 3.4853, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7996527136505516e-05, |
| "loss": 3.4952, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7986532974268814e-05, |
| "loss": 3.4914, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7976516736144524e-05, |
| "loss": 3.4836, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7966478449818925e-05, |
| "loss": 3.4622, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7956418143039232e-05, |
| "loss": 3.4965, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7946335843613533e-05, |
| "loss": 3.4663, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7936231579410707e-05, |
| "loss": 3.496, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.792610537836035e-05, |
| "loss": 3.4688, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7915957268452678e-05, |
| "loss": 3.4422, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7905787277738483e-05, |
| "loss": 3.4605, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7895595434329037e-05, |
| "loss": 3.4842, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7885381766396008e-05, |
| "loss": 3.4694, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7875146302171398e-05, |
| "loss": 3.4923, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7864889069947448e-05, |
| "loss": 3.4477, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7854610098076577e-05, |
| "loss": 3.4722, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7844309414971296e-05, |
| "loss": 3.4378, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.783398704910412e-05, |
| "loss": 3.467, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.78236430290075e-05, |
| "loss": 3.4667, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.781327738327376e-05, |
| "loss": 3.485, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.780289014055497e-05, |
| "loss": 3.4708, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7792481329562923e-05, |
| "loss": 3.4718, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.778205097906902e-05, |
| "loss": 3.4674, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7771599117904193e-05, |
| "loss": 3.4815, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7761125774958846e-05, |
| "loss": 3.4772, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.775063097918275e-05, |
| "loss": 3.4501, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7740114759584983e-05, |
| "loss": 3.4611, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7729577145233835e-05, |
| "loss": 3.4335, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7719018165256745e-05, |
| "loss": 3.4715, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7708437848840193e-05, |
| "loss": 3.4592, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.769783622522965e-05, |
| "loss": 3.4746, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.768721332372947e-05, |
| "loss": 3.4731, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7676569173702844e-05, |
| "loss": 3.497, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7665903804571668e-05, |
| "loss": 3.4437, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7655217245816513e-05, |
| "loss": 3.4544, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7644509526976515e-05, |
| "loss": 3.4832, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.763378067764929e-05, |
| "loss": 3.4588, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7623030727490875e-05, |
| "loss": 3.4574, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7612259706215626e-05, |
| "loss": 3.4369, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7601467643596142e-05, |
| "loss": 3.4789, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7590654569463186e-05, |
| "loss": 3.4798, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7579820513705596e-05, |
| "loss": 3.4592, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7568965506270212e-05, |
| "loss": 3.4747, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7558089577161783e-05, |
| "loss": 3.4782, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7547192756442887e-05, |
| "loss": 3.4625, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7536275074233854e-05, |
| "loss": 3.4597, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7525336560712675e-05, |
| "loss": 3.4495, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.751437724611492e-05, |
| "loss": 3.4815, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.750339716073366e-05, |
| "loss": 3.4729, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.749239633491938e-05, |
| "loss": 3.4658, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.748137479907989e-05, |
| "loss": 3.4374, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.747033258368024e-05, |
| "loss": 3.476, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7459269719242665e-05, |
| "loss": 3.4594, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.744818623634645e-05, |
| "loss": 3.493, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.743708216562788e-05, |
| "loss": 3.4566, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.742595753778016e-05, |
| "loss": 3.4497, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7414812383553297e-05, |
| "loss": 3.4345, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7403646733754057e-05, |
| "loss": 3.4414, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7392460619245842e-05, |
| "loss": 3.4439, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7381254070948635e-05, |
| "loss": 3.4548, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7370027119838884e-05, |
| "loss": 3.4665, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7358779796949447e-05, |
| "loss": 3.4637, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7347512133369494e-05, |
| "loss": 3.4169, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7336224160244404e-05, |
| "loss": 3.45, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7324915908775708e-05, |
| "loss": 3.4598, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7313587410220988e-05, |
| "loss": 3.4528, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7302238695893788e-05, |
| "loss": 3.4456, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7290869797163533e-05, |
| "loss": 3.4426, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7279480745455433e-05, |
| "loss": 3.4472, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.726807157225042e-05, |
| "loss": 3.4715, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.725664230908503e-05, |
| "loss": 3.4379, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7245192987551336e-05, |
| "loss": 3.4433, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7233723639296857e-05, |
| "loss": 3.4492, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.722223429602446e-05, |
| "loss": 3.48, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7210724989492298e-05, |
| "loss": 3.4469, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7199195751513685e-05, |
| "loss": 3.4716, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.718764661395704e-05, |
| "loss": 3.4365, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7176077608745788e-05, |
| "loss": 3.4319, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7164488767858262e-05, |
| "loss": 3.4571, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7152880123327636e-05, |
| "loss": 3.468, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.714125170724182e-05, |
| "loss": 3.4563, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.712960355174336e-05, |
| "loss": 3.4546, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7117935689029386e-05, |
| "loss": 3.4337, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7106248151351493e-05, |
| "loss": 3.4386, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7094540971015663e-05, |
| "loss": 3.4529, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7082814180382165e-05, |
| "loss": 3.434, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7071067811865477e-05, |
| "loss": 3.4692, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.70593018979342e-05, |
| "loss": 3.456, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7047516471110953e-05, |
| "loss": 3.456, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7035711563972297e-05, |
| "loss": 3.4733, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.7023887209148636e-05, |
| "loss": 3.4323, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.7012043439324128e-05, |
| "loss": 3.4326, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.70001802872366e-05, |
| "loss": 3.4657, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6988297785677458e-05, |
| "loss": 3.464, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6976395967491585e-05, |
| "loss": 3.451, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.696447486557726e-05, |
| "loss": 3.4533, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.695253451288607e-05, |
| "loss": 3.4042, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6940574942422807e-05, |
| "loss": 3.4533, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.692859618724539e-05, |
| "loss": 3.4485, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.691659828046476e-05, |
| "loss": 3.4082, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6904581255244802e-05, |
| "loss": 3.4368, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6892545144802245e-05, |
| "loss": 3.435, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6880489982406568e-05, |
| "loss": 3.453, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6868415801379918e-05, |
| "loss": 3.4251, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6856322635097013e-05, |
| "loss": 3.4618, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6844210516985043e-05, |
| "loss": 3.4494, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.683207948052359e-05, |
| "loss": 3.4315, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6819929559244515e-05, |
| "loss": 3.442, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6807760786731905e-05, |
| "loss": 3.4317, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.679557319662193e-05, |
| "loss": 3.4345, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.678336682260278e-05, |
| "loss": 3.4628, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.677114169841458e-05, |
| "loss": 3.4578, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6758897857849268e-05, |
| "loss": 3.4315, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.674663533475052e-05, |
| "loss": 3.4531, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.673435416301366e-05, |
| "loss": 3.431, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6722054376585547e-05, |
| "loss": 3.4282, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6709736009464504e-05, |
| "loss": 3.4283, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6697399095700216e-05, |
| "loss": 3.4503, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6685043669393622e-05, |
| "loss": 3.446, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6672669764696838e-05, |
| "loss": 3.4529, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.666027741581306e-05, |
| "loss": 3.4525, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.664786665699646e-05, |
| "loss": 3.4641, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6635437522552106e-05, |
| "loss": 3.4218, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6622990046835846e-05, |
| "loss": 3.4395, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.661052426425424e-05, |
| "loss": 3.4622, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6598040209264445e-05, |
| "loss": 3.4136, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.658553791637412e-05, |
| "loss": 3.4263, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6573017420141344e-05, |
| "loss": 3.4418, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6560478755174506e-05, |
| "loss": 3.4332, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6547921956132226e-05, |
| "loss": 3.4528, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6535347057723235e-05, |
| "loss": 3.4053, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6522754094706304e-05, |
| "loss": 3.4493, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6510143101890136e-05, |
| "loss": 3.4355, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6497514114133266e-05, |
| "loss": 3.413, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.648486716634397e-05, |
| "loss": 3.4323, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6472202293480172e-05, |
| "loss": 3.473, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6459519530549345e-05, |
| "loss": 3.4457, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.64468189126084e-05, |
| "loss": 3.4162, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6434100474763623e-05, |
| "loss": 3.432, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6421364252170534e-05, |
| "loss": 3.438, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.640861028003383e-05, |
| "loss": 3.4596, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6395838593607263e-05, |
| "loss": 3.3828, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6383049228193545e-05, |
| "loss": 3.438, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6370242219144262e-05, |
| "loss": 3.4488, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6357417601859772e-05, |
| "loss": 3.4436, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6344575411789097e-05, |
| "loss": 3.4508, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6331715684429834e-05, |
| "loss": 3.41, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6318838455328057e-05, |
| "loss": 3.4267, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6305943760078226e-05, |
| "loss": 3.4105, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6293031634323065e-05, |
| "loss": 3.44, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.628010211375348e-05, |
| "loss": 3.4041, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6267155234108474e-05, |
| "loss": 3.4008, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.625419103117502e-05, |
| "loss": 3.4276, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6241209540787973e-05, |
| "loss": 3.4329, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6228210798829978e-05, |
| "loss": 3.4045, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6215194841231365e-05, |
| "loss": 3.4173, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6202161703970057e-05, |
| "loss": 3.4396, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.618911142307145e-05, |
| "loss": 3.4001, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.617604403460834e-05, |
| "loss": 3.4269, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6162959574700798e-05, |
| "loss": 3.4284, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6149858079516097e-05, |
| "loss": 3.4375, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6136739585268593e-05, |
| "loss": 3.4387, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.612360412821962e-05, |
| "loss": 3.4332, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6110451744677415e-05, |
| "loss": 3.3909, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6097282470996997e-05, |
| "loss": 3.3885, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6084096343580056e-05, |
| "loss": 3.425, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.60708933988749e-05, |
| "loss": 3.4276, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.605767367337629e-05, |
| "loss": 3.4234, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.604443720362539e-05, |
| "loss": 3.4213, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.6031184026209642e-05, |
| "loss": 3.4176, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.601791417776267e-05, |
| "loss": 3.4254, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.6004627694964187e-05, |
| "loss": 3.3915, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.599132461453987e-05, |
| "loss": 3.4298, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5978004973261286e-05, |
| "loss": 3.4137, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5964668807945777e-05, |
| "loss": 3.4123, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5951316155456358e-05, |
| "loss": 3.413, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5937947052701615e-05, |
| "loss": 3.4029, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.592456153663561e-05, |
| "loss": 3.4254, |
| "step": 3115 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5911159644257765e-05, |
| "loss": 3.4178, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5897741412612782e-05, |
| "loss": 3.4116, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5884306878790512e-05, |
| "loss": 3.44, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5870856079925877e-05, |
| "loss": 3.4486, |
| "step": 3135 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5857389053198753e-05, |
| "loss": 3.4174, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.584390583583388e-05, |
| "loss": 3.3858, |
| "step": 3145 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.583040646510074e-05, |
| "loss": 3.4183, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5816890978313476e-05, |
| "loss": 3.4317, |
| "step": 3155 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5803359412830763e-05, |
| "loss": 3.4108, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.578981180605574e-05, |
| "loss": 3.4329, |
| "step": 3165 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.577624819543587e-05, |
| "loss": 3.4029, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.576266861846286e-05, |
| "loss": 3.4199, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.574907311267255e-05, |
| "loss": 3.3975, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.573546171564481e-05, |
| "loss": 3.3935, |
| "step": 3185 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5721834465003425e-05, |
| "loss": 3.3933, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5708191398416023e-05, |
| "loss": 3.425, |
| "step": 3195 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5694532553593925e-05, |
| "loss": 3.4013, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5680857968292087e-05, |
| "loss": 3.4289, |
| "step": 3205 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.566716768030896e-05, |
| "loss": 3.4144, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.56534617274864e-05, |
| "loss": 3.416, |
| "step": 3215 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.563974014770957e-05, |
| "loss": 3.4148, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5626002978906827e-05, |
| "loss": 3.421, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.561225025904961e-05, |
| "loss": 3.4306, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5598482026152353e-05, |
| "loss": 3.4607, |
| "step": 3235 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5584698318272367e-05, |
| "loss": 3.417, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.557089917350973e-05, |
| "loss": 3.3939, |
| "step": 3245 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5557084630007206e-05, |
| "loss": 3.3773, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5543254725950104e-05, |
| "loss": 3.4396, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.552940949956621e-05, |
| "loss": 3.4436, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5515548989125654e-05, |
| "loss": 3.3934, |
| "step": 3265 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5501673232940807e-05, |
| "loss": 3.4003, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.54877822693662e-05, |
| "loss": 3.4258, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5473876136798374e-05, |
| "loss": 3.4066, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5459954873675825e-05, |
| "loss": 3.4054, |
| "step": 3285 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.544601851847885e-05, |
| "loss": 3.4156, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.543206710972948e-05, |
| "loss": 3.415, |
| "step": 3295 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5418100685991344e-05, |
| "loss": 3.4014, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5404119285869584e-05, |
| "loss": 3.3938, |
| "step": 3305 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.539012294801073e-05, |
| "loss": 3.4192, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5376111711102604e-05, |
| "loss": 3.3927, |
| "step": 3315 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.536208561387422e-05, |
| "loss": 3.4204, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5348044695095653e-05, |
| "loss": 3.4058, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5333988993577958e-05, |
| "loss": 3.3966, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5319918548173053e-05, |
| "loss": 3.4233, |
| "step": 3335 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5305833397773596e-05, |
| "loss": 3.4064, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.529173358131291e-05, |
| "loss": 3.4137, |
| "step": 3345 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5277619137764843e-05, |
| "loss": 3.3975, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5263490106143684e-05, |
| "loss": 3.4172, |
| "step": 3355 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5249346525504032e-05, |
| "loss": 3.4074, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5235188434940717e-05, |
| "loss": 3.4161, |
| "step": 3365 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5221015873588672e-05, |
| "loss": 3.4484, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5206828880622821e-05, |
| "loss": 3.3988, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5192627495257992e-05, |
| "loss": 3.4127, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5178411756748781e-05, |
| "loss": 3.4079, |
| "step": 3385 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5164181704389471e-05, |
| "loss": 3.3974, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5149937377513904e-05, |
| "loss": 3.4187, |
| "step": 3395 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5135678815495381e-05, |
| "loss": 3.4162, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5121406057746546e-05, |
| "loss": 3.4463, |
| "step": 3405 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.510711914371929e-05, |
| "loss": 3.399, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5092818112904628e-05, |
| "loss": 3.3848, |
| "step": 3415 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5078503004832599e-05, |
| "loss": 3.4137, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.506417385907215e-05, |
| "loss": 3.4264, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5049830715231038e-05, |
| "loss": 3.3953, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5035473612955697e-05, |
| "loss": 3.3849, |
| "step": 3435 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.502110259193116e-05, |
| "loss": 3.4107, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.500671769188093e-05, |
| "loss": 3.3866, |
| "step": 3445 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.4992318952566862e-05, |
| "loss": 3.4107, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.497790641378908e-05, |
| "loss": 3.4024, |
| "step": 3455 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4963480115385847e-05, |
| "loss": 3.3854, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4949040097233453e-05, |
| "loss": 3.3878, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4934586399246116e-05, |
| "loss": 3.4319, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4920119061375868e-05, |
| "loss": 3.3934, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4905638123612443e-05, |
| "loss": 3.4121, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4891143625983169e-05, |
| "loss": 3.4061, |
| "step": 3485 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4876635608552845e-05, |
| "loss": 3.4153, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4862114111423658e-05, |
| "loss": 3.3985, |
| "step": 3495 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4847579174735036e-05, |
| "loss": 3.4101, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.483303083866357e-05, |
| "loss": 3.4199, |
| "step": 3505 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4818469143422882e-05, |
| "loss": 3.3797, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4803894129263527e-05, |
| "loss": 3.4023, |
| "step": 3515 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4789305836472865e-05, |
| "loss": 3.3979, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4774704305374968e-05, |
| "loss": 3.3845, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4760089576330493e-05, |
| "loss": 3.4168, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4745461689736592e-05, |
| "loss": 3.4011, |
| "step": 3535 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4730820686026773e-05, |
| "loss": 3.3818, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4716166605670806e-05, |
| "loss": 3.3813, |
| "step": 3545 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4701499489174604e-05, |
| "loss": 3.4347, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4686819377080123e-05, |
| "loss": 3.3986, |
| "step": 3555 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4672126309965226e-05, |
| "loss": 3.3861, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.46574203284436e-05, |
| "loss": 3.3919, |
| "step": 3565 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4642701473164618e-05, |
| "loss": 3.4078, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4627969784813247e-05, |
| "loss": 3.4109, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4613225304109917e-05, |
| "loss": 3.3927, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4598468071810425e-05, |
| "loss": 3.378, |
| "step": 3585 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4583698128705815e-05, |
| "loss": 3.4147, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.456891551562226e-05, |
| "loss": 3.4059, |
| "step": 3595 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.455412027342096e-05, |
| "loss": 3.3561, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4539312442998019e-05, |
| "loss": 3.4308, |
| "step": 3605 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4524492065284344e-05, |
| "loss": 3.3608, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4509659181245512e-05, |
| "loss": 3.4004, |
| "step": 3615 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4494813831881687e-05, |
| "loss": 3.3856, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4479956058227474e-05, |
| "loss": 3.3773, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4465085901351819e-05, |
| "loss": 3.3811, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.445020340235791e-05, |
| "loss": 3.3973, |
| "step": 3635 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4435308602383043e-05, |
| "loss": 3.3748, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4420401542598514e-05, |
| "loss": 3.4191, |
| "step": 3645 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4405482264209512e-05, |
| "loss": 3.4051, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4390550808454993e-05, |
| "loss": 3.4239, |
| "step": 3655 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.437560721660758e-05, |
| "loss": 3.4045, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4360651529973435e-05, |
| "loss": 3.3648, |
| "step": 3665 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.434568378989216e-05, |
| "loss": 3.3625, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4330704037736665e-05, |
| "loss": 3.4145, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.431571231491307e-05, |
| "loss": 3.3903, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4300708662860585e-05, |
| "loss": 3.4044, |
| "step": 3685 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4285693123051385e-05, |
| "loss": 3.4025, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4270665736990509e-05, |
| "loss": 3.3918, |
| "step": 3695 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4255626546215746e-05, |
| "loss": 3.3852, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4240575592297508e-05, |
| "loss": 3.3773, |
| "step": 3705 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4225512916838726e-05, |
| "loss": 3.4109, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4210438561474726e-05, |
| "loss": 3.398, |
| "step": 3715 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4195352567873124e-05, |
| "loss": 3.3736, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4180254977733703e-05, |
| "loss": 3.3859, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4165145832788305e-05, |
| "loss": 3.4007, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4150025174800704e-05, |
| "loss": 3.3933, |
| "step": 3735 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.41348930455665e-05, |
| "loss": 3.4343, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4119749486913006e-05, |
| "loss": 3.3927, |
| "step": 3745 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4104594540699122e-05, |
| "loss": 3.3558, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.4089428248815224e-05, |
| "loss": 3.3754, |
| "step": 3755 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.4074250653183055e-05, |
| "loss": 3.3841, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.4059061795755598e-05, |
| "loss": 3.4171, |
| "step": 3765 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.4043861718516964e-05, |
| "loss": 3.3985, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.4028650463482287e-05, |
| "loss": 3.3983, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.4013428072697584e-05, |
| "loss": 3.3941, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3998194588239662e-05, |
| "loss": 3.3649, |
| "step": 3785 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.398295005221599e-05, |
| "loss": 3.39, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3967694506764586e-05, |
| "loss": 3.392, |
| "step": 3795 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.39524279940539e-05, |
| "loss": 3.4054, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3937150556282692e-05, |
| "loss": 3.383, |
| "step": 3805 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3921862235679929e-05, |
| "loss": 3.3944, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.390656307450465e-05, |
| "loss": 3.4016, |
| "step": 3815 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3891253115045867e-05, |
| "loss": 3.3936, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3875932399622434e-05, |
| "loss": 3.4001, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.386060097058294e-05, |
| "loss": 3.4204, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3845258870305587e-05, |
| "loss": 3.4111, |
| "step": 3835 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3829906141198076e-05, |
| "loss": 3.3971, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3814542825697476e-05, |
| "loss": 3.3963, |
| "step": 3845 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3799168966270139e-05, |
| "loss": 3.3946, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3783784605411539e-05, |
| "loss": 3.4239, |
| "step": 3855 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3768389785646196e-05, |
| "loss": 3.3657, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3752984549527529e-05, |
| "loss": 3.3875, |
| "step": 3865 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3737568939637753e-05, |
| "loss": 3.4093, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3722142998587757e-05, |
| "loss": 3.3958, |
| "step": 3875 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3706706769016991e-05, |
| "loss": 3.3936, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3691260293593332e-05, |
| "loss": 3.4068, |
| "step": 3885 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3675803615012993e-05, |
| "loss": 3.4127, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3660336776000379e-05, |
| "loss": 3.3843, |
| "step": 3895 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.364485981930798e-05, |
| "loss": 3.3413, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3629372787716264e-05, |
| "loss": 3.3736, |
| "step": 3905 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3613875724033536e-05, |
| "loss": 3.4248, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3598368671095835e-05, |
| "loss": 3.3881, |
| "step": 3915 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3582851671766808e-05, |
| "loss": 3.4194, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3567324768937603e-05, |
| "loss": 3.348, |
| "step": 3925 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3551788005526738e-05, |
| "loss": 3.4205, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3536241424479985e-05, |
| "loss": 3.3955, |
| "step": 3935 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.352068506877026e-05, |
| "loss": 3.3848, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3505118981397485e-05, |
| "loss": 3.4094, |
| "step": 3945 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3489543205388498e-05, |
| "loss": 3.3731, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3473957783796907e-05, |
| "loss": 3.3853, |
| "step": 3955 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.345836275970298e-05, |
| "loss": 3.3809, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3442758176213539e-05, |
| "loss": 3.4384, |
| "step": 3965 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3427144076461818e-05, |
| "loss": 3.398, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.341152050360736e-05, |
| "loss": 3.3624, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3395887500835894e-05, |
| "loss": 3.3804, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.338024511135921e-05, |
| "loss": 3.3796, |
| "step": 3985 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3364593378415054e-05, |
| "loss": 3.3582, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3348932345266987e-05, |
| "loss": 3.3812, |
| "step": 3995 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3333262055204284e-05, |
| "loss": 3.3918, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.33175825515418e-05, |
| "loss": 3.3735, |
| "step": 4005 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3301893877619874e-05, |
| "loss": 3.4029, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3286196076804174e-05, |
| "loss": 3.3907, |
| "step": 4015 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3270489192485606e-05, |
| "loss": 3.3601, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3254773268080182e-05, |
| "loss": 3.3961, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.32390483470289e-05, |
| "loss": 3.3803, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3223314472797632e-05, |
| "loss": 3.3675, |
| "step": 4035 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3207571688876994e-05, |
| "loss": 3.3571, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3191820038782228e-05, |
| "loss": 3.3868, |
| "step": 4045 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3176059566053083e-05, |
| "loss": 3.3573, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.31602903142537e-05, |
| "loss": 3.3634, |
| "step": 4055 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3144512326972485e-05, |
| "loss": 3.3925, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3128725647821984e-05, |
| "loss": 3.3932, |
| "step": 4065 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3112930320438774e-05, |
| "loss": 3.3967, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3097126388483342e-05, |
| "loss": 3.3996, |
| "step": 4075 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3081313895639945e-05, |
| "loss": 3.4227, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3065492885616518e-05, |
| "loss": 3.3741, |
| "step": 4085 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3049663402144528e-05, |
| "loss": 3.3489, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3033825488978868e-05, |
| "loss": 3.3507, |
| "step": 4095 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3017979189897738e-05, |
| "loss": 3.3763, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.300212454870251e-05, |
| "loss": 3.3914, |
| "step": 4105 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2986261609217612e-05, |
| "loss": 3.3686, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2970390415290416e-05, |
| "loss": 3.3706, |
| "step": 4115 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2954511010791111e-05, |
| "loss": 3.3742, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2938623439612581e-05, |
| "loss": 3.3872, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2922727745670276e-05, |
| "loss": 3.3631, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2906823972902105e-05, |
| "loss": 3.3563, |
| "step": 4135 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2890912165268315e-05, |
| "loss": 3.3495, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2874992366751342e-05, |
| "loss": 3.3806, |
| "step": 4145 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2859064621355735e-05, |
| "loss": 3.378, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2843128973107988e-05, |
| "loss": 3.332, |
| "step": 4155 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.282718546605645e-05, |
| "loss": 3.3848, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2811234144271193e-05, |
| "loss": 3.3649, |
| "step": 4165 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2795275051843893e-05, |
| "loss": 3.3807, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2779308232887692e-05, |
| "loss": 3.3834, |
| "step": 4175 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2763333731537102e-05, |
| "loss": 3.3647, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2747351591947862e-05, |
| "loss": 3.3481, |
| "step": 4185 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2731361858296833e-05, |
| "loss": 3.3643, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2715364574781864e-05, |
| "loss": 3.3734, |
| "step": 4195 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2699359785621663e-05, |
| "loss": 3.3656, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2683347535055694e-05, |
| "loss": 3.3747, |
| "step": 4205 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.266732786734405e-05, |
| "loss": 3.3535, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2651300826767317e-05, |
| "loss": 3.332, |
| "step": 4215 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2635266457626461e-05, |
| "loss": 3.3766, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.261922480424271e-05, |
| "loss": 3.372, |
| "step": 4225 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.260317591095742e-05, |
| "loss": 3.3389, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2587119822131975e-05, |
| "loss": 3.3996, |
| "step": 4235 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2571056582147625e-05, |
| "loss": 3.3387, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2554986235405402e-05, |
| "loss": 3.36, |
| "step": 4245 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.253890882632598e-05, |
| "loss": 3.3494, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2522824399349553e-05, |
| "loss": 3.3745, |
| "step": 4255 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2506732998935717e-05, |
| "loss": 3.3513, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2490634669563338e-05, |
| "loss": 3.376, |
| "step": 4265 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2474529455730429e-05, |
| "loss": 3.3596, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2458417401954048e-05, |
| "loss": 3.3764, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2442298552770151e-05, |
| "loss": 3.3739, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2426172952733482e-05, |
| "loss": 3.3721, |
| "step": 4285 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2410040646417431e-05, |
| "loss": 3.3646, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2393901678413944e-05, |
| "loss": 3.3435, |
| "step": 4295 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2377756093333371e-05, |
| "loss": 3.383, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2361603935804357e-05, |
| "loss": 3.3563, |
| "step": 4305 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2345445250473702e-05, |
| "loss": 3.3686, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2329280082006268e-05, |
| "loss": 3.3408, |
| "step": 4315 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2313108475084823e-05, |
| "loss": 3.3663, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2296930474409943e-05, |
| "loss": 3.3789, |
| "step": 4325 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2280746124699864e-05, |
| "loss": 3.3563, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2264555470690381e-05, |
| "loss": 3.3576, |
| "step": 4335 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2248358557134714e-05, |
| "loss": 3.3725, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2232155428803387e-05, |
| "loss": 3.3735, |
| "step": 4345 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2215946130484096e-05, |
| "loss": 3.386, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2199730706981594e-05, |
| "loss": 3.3951, |
| "step": 4355 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.218350920311757e-05, |
| "loss": 3.3483, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2167281663730512e-05, |
| "loss": 3.3281, |
| "step": 4365 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.21510481336756e-05, |
| "loss": 3.3764, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2134808657824564e-05, |
| "loss": 3.3487, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2118563281065574e-05, |
| "loss": 3.3574, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2102312048303111e-05, |
| "loss": 3.3336, |
| "step": 4385 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2086055004457844e-05, |
| "loss": 3.3471, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2069792194466499e-05, |
| "loss": 3.3387, |
| "step": 4395 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2053523663281745e-05, |
| "loss": 3.3496, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2037249455872065e-05, |
| "loss": 3.3761, |
| "step": 4405 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2020969617221627e-05, |
| "loss": 3.3492, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2004684192330176e-05, |
| "loss": 3.3398, |
| "step": 4415 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.1988393226212884e-05, |
| "loss": 3.3646, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.1972096763900252e-05, |
| "loss": 3.41, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.1955794850437962e-05, |
| "loss": 3.3711, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1939487530886776e-05, |
| "loss": 3.371, |
| "step": 4435 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1923174850322385e-05, |
| "loss": 3.347, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1906856853835312e-05, |
| "loss": 3.3773, |
| "step": 4445 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1890533586530766e-05, |
| "loss": 3.3762, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1874205093528525e-05, |
| "loss": 3.3312, |
| "step": 4455 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1857871419962823e-05, |
| "loss": 3.3581, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1841532610982194e-05, |
| "loss": 3.3483, |
| "step": 4465 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.182518871174938e-05, |
| "loss": 3.3597, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1808839767441196e-05, |
| "loss": 3.3726, |
| "step": 4475 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1792485823248396e-05, |
| "loss": 3.3494, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1776126924375553e-05, |
| "loss": 3.3468, |
| "step": 4485 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1759763116040936e-05, |
| "loss": 3.3237, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.174339444347639e-05, |
| "loss": 3.3899, |
| "step": 4495 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1727020951927206e-05, |
| "loss": 3.3624, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1710642686651981e-05, |
| "loss": 3.3505, |
| "step": 4505 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1694259692922525e-05, |
| "loss": 3.403, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1677872016023707e-05, |
| "loss": 3.3984, |
| "step": 4515 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1661479701253348e-05, |
| "loss": 3.3749, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1645082793922085e-05, |
| "loss": 3.3832, |
| "step": 4525 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1628681339353244e-05, |
| "loss": 3.3484, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.161227538288273e-05, |
| "loss": 3.3754, |
| "step": 4535 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1595864969858888e-05, |
| "loss": 3.3544, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1579450145642382e-05, |
| "loss": 3.3428, |
| "step": 4545 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1563030955606067e-05, |
| "loss": 3.3489, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1546607445134865e-05, |
| "loss": 3.3652, |
| "step": 4555 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1530179659625647e-05, |
| "loss": 3.3745, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1513747644487091e-05, |
| "loss": 3.375, |
| "step": 4565 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.149731144513958e-05, |
| "loss": 3.3633, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1480871107015047e-05, |
| "loss": 3.3774, |
| "step": 4575 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1464426675556873e-05, |
| "loss": 3.3696, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1447978196219754e-05, |
| "loss": 3.332, |
| "step": 4585 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1431525714469576e-05, |
| "loss": 3.3364, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.141506927578328e-05, |
| "loss": 3.3759, |
| "step": 4595 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.139860892564876e-05, |
| "loss": 3.347, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1382144709564703e-05, |
| "loss": 3.3402, |
| "step": 4605 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1365676673040502e-05, |
| "loss": 3.327, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.134920486159609e-05, |
| "loss": 3.3341, |
| "step": 4615 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1332729320761846e-05, |
| "loss": 3.3715, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1316250096078458e-05, |
| "loss": 3.3556, |
| "step": 4625 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1299767233096794e-05, |
| "loss": 3.3632, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.128328077737778e-05, |
| "loss": 3.3481, |
| "step": 4635 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.126679077449227e-05, |
| "loss": 3.3603, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1250297270020922e-05, |
| "loss": 3.3334, |
| "step": 4645 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1233800309554083e-05, |
| "loss": 3.3521, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1217299938691639e-05, |
| "loss": 3.356, |
| "step": 4655 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1200796203042912e-05, |
| "loss": 3.3798, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1184289148226521e-05, |
| "loss": 3.354, |
| "step": 4665 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.116777881987026e-05, |
| "loss": 3.3578, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1151265263610975e-05, |
| "loss": 3.3504, |
| "step": 4675 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.113474852509443e-05, |
| "loss": 3.3549, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1118228649975185e-05, |
| "loss": 3.3461, |
| "step": 4685 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1101705683916473e-05, |
| "loss": 3.3459, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1085179672590072e-05, |
| "loss": 3.3561, |
| "step": 4695 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1068650661676173e-05, |
| "loss": 3.3829, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1052118696863258e-05, |
| "loss": 3.3625, |
| "step": 4705 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.103558382384798e-05, |
| "loss": 3.3398, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1019046088335023e-05, |
| "loss": 3.3633, |
| "step": 4715 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1002505536036997e-05, |
| "loss": 3.3527, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0985962212674275e-05, |
| "loss": 3.3526, |
| "step": 4725 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.096941616397491e-05, |
| "loss": 3.3715, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.095286743567448e-05, |
| "loss": 3.3388, |
| "step": 4735 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0936316073515973e-05, |
| "loss": 3.353, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0919762123249656e-05, |
| "loss": 3.3712, |
| "step": 4745 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0903205630632942e-05, |
| "loss": 3.3622, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0886646641430288e-05, |
| "loss": 3.3788, |
| "step": 4755 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0870085201413034e-05, |
| "loss": 3.3842, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0853521356359312e-05, |
| "loss": 3.3665, |
| "step": 4765 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0836955152053883e-05, |
| "loss": 3.3418, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0820386634288045e-05, |
| "loss": 3.3411, |
| "step": 4775 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0803815848859485e-05, |
| "loss": 3.3331, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0787242841572154e-05, |
| "loss": 3.374, |
| "step": 4785 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0770667658236156e-05, |
| "loss": 3.3842, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0754090344667591e-05, |
| "loss": 3.3619, |
| "step": 4795 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0737510946688468e-05, |
| "loss": 3.3549, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0720929510126543e-05, |
| "loss": 3.3514, |
| "step": 4805 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0704346080815218e-05, |
| "loss": 3.3746, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.068776070459339e-05, |
| "loss": 3.3746, |
| "step": 4815 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.067117342730535e-05, |
| "loss": 3.3469, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0654584294800636e-05, |
| "loss": 3.3524, |
| "step": 4825 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0637993352933917e-05, |
| "loss": 3.3609, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.062140064756487e-05, |
| "loss": 3.3318, |
| "step": 4835 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0604806224558028e-05, |
| "loss": 3.3615, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.058821012978269e-05, |
| "loss": 3.3568, |
| "step": 4845 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.057161240911277e-05, |
| "loss": 3.3374, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0555013108426675e-05, |
| "loss": 3.3645, |
| "step": 4855 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.053841227360718e-05, |
| "loss": 3.363, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0521809950541298e-05, |
| "loss": 3.3724, |
| "step": 4865 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.050520618512016e-05, |
| "loss": 3.3864, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0488601023238885e-05, |
| "loss": 3.3368, |
| "step": 4875 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0471994510796444e-05, |
| "loss": 3.3616, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.045538669369555e-05, |
| "loss": 3.3979, |
| "step": 4885 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.043877761784252e-05, |
| "loss": 3.3696, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0422167329147145e-05, |
| "loss": 3.3609, |
| "step": 4895 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0405555873522576e-05, |
| "loss": 3.3782, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0388943296885181e-05, |
| "loss": 3.3268, |
| "step": 4905 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.037232964515444e-05, |
| "loss": 3.3384, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0355714964252786e-05, |
| "loss": 3.3798, |
| "step": 4915 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0339099300105513e-05, |
| "loss": 3.3563, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0322482698640631e-05, |
| "loss": 3.3752, |
| "step": 4925 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0305865205788728e-05, |
| "loss": 3.3422, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0289246867482868e-05, |
| "loss": 3.3152, |
| "step": 4935 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.027262772965845e-05, |
| "loss": 3.35, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0256007838253084e-05, |
| "loss": 3.3226, |
| "step": 4945 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0239387239206455e-05, |
| "loss": 3.3701, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0222765978460211e-05, |
| "loss": 3.3537, |
| "step": 4955 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0206144101957831e-05, |
| "loss": 3.3488, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0189521655644495e-05, |
| "loss": 3.3563, |
| "step": 4965 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0172898685466947e-05, |
| "loss": 3.3255, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0156275237373394e-05, |
| "loss": 3.3641, |
| "step": 4975 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0139651357313354e-05, |
| "loss": 3.3227, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0123027091237549e-05, |
| "loss": 3.3251, |
| "step": 4985 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0106402485097757e-05, |
| "loss": 3.3351, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.00897775848467e-05, |
| "loss": 3.3688, |
| "step": 4995 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0073152436437918e-05, |
| "loss": 3.3765, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0056527085825629e-05, |
| "loss": 3.3487, |
| "step": 5005 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0039901578964619e-05, |
| "loss": 3.3531, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0023275961810095e-05, |
| "loss": 3.3649, |
| "step": 5015 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 1.0006650280317573e-05, |
| "loss": 3.3219, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.990024580442754e-06, |
| "loss": 3.3799, |
| "step": 5025 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.973398908141383e-06, |
| "loss": 3.3305, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.956773309369128e-06, |
| "loss": 3.3466, |
| "step": 5035 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.940147830081455e-06, |
| "loss": 3.3418, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.9235225162335e-06, |
| "loss": 3.3444, |
| "step": 5045 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.906897413779949e-06, |
| "loss": 3.3251, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.890272568674886e-06, |
| "loss": 3.3345, |
| "step": 5055 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.873648026871701e-06, |
| "loss": 3.3607, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.857023834322937e-06, |
| "loss": 3.3777, |
| "step": 5065 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.840400036980176e-06, |
| "loss": 3.348, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.823776680793904e-06, |
| "loss": 3.3405, |
| "step": 5075 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.807153811713386e-06, |
| "loss": 3.3557, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.790531475686546e-06, |
| "loss": 3.3467, |
| "step": 5085 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.773909718659831e-06, |
| "loss": 3.3388, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.757288586578093e-06, |
| "loss": 3.3561, |
| "step": 5095 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.74066812538445e-06, |
| "loss": 3.3206, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.724048381020162e-06, |
| "loss": 3.3642, |
| "step": 5105 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.707429399424514e-06, |
| "loss": 3.3225, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.690811226534688e-06, |
| "loss": 3.3208, |
| "step": 5115 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.67419390828562e-06, |
| "loss": 3.347, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.657577490609893e-06, |
| "loss": 3.3575, |
| "step": 5125 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.64096201943759e-06, |
| "loss": 3.3727, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.624347540696184e-06, |
| "loss": 3.3379, |
| "step": 5135 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.607734100310408e-06, |
| "loss": 3.3449, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.59112174420212e-06, |
| "loss": 3.3356, |
| "step": 5145 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.57451051829018e-06, |
| "loss": 3.3262, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.557900468490327e-06, |
| "loss": 3.3648, |
| "step": 5155 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.541291640715047e-06, |
| "loss": 3.359, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.524684080873456e-06, |
| "loss": 3.3426, |
| "step": 5165 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.50807783487115e-06, |
| "loss": 3.3423, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.491472948610105e-06, |
| "loss": 3.3503, |
| "step": 5175 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.474869467988534e-06, |
| "loss": 3.316, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.45826743890077e-06, |
| "loss": 3.3335, |
| "step": 5185 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.441666907237127e-06, |
| "loss": 3.3285, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.425067918883781e-06, |
| "loss": 3.3326, |
| "step": 5195 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.408470519722646e-06, |
| "loss": 3.3575, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.391874755631241e-06, |
| "loss": 3.3475, |
| "step": 5205 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.375280672482567e-06, |
| "loss": 3.3359, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.358688316144972e-06, |
| "loss": 3.3548, |
| "step": 5215 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.342097732482041e-06, |
| "loss": 3.3333, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.32550896735245e-06, |
| "loss": 3.3762, |
| "step": 5225 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.308922066609858e-06, |
| "loss": 3.3474, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.292337076102758e-06, |
| "loss": 3.3459, |
| "step": 5235 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.275754041674373e-06, |
| "loss": 3.3069, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.259173009162515e-06, |
| "loss": 3.3358, |
| "step": 5245 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.242594024399467e-06, |
| "loss": 3.3431, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.226017133211843e-06, |
| "loss": 3.3378, |
| "step": 5255 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.209442381420476e-06, |
| "loss": 3.3214, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.192869814840288e-06, |
| "loss": 3.3363, |
| "step": 5265 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.176299479280155e-06, |
| "loss": 3.3669, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.159731420542786e-06, |
| "loss": 3.3256, |
| "step": 5275 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.143165684424604e-06, |
| "loss": 3.3208, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.126602316715601e-06, |
| "loss": 3.3535, |
| "step": 5285 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.110041363199233e-06, |
| "loss": 3.328, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.093482869652279e-06, |
| "loss": 3.3766, |
| "step": 5295 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.076926881844713e-06, |
| "loss": 3.3399, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.06037344553959e-06, |
| "loss": 3.3526, |
| "step": 5305 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.043822606492907e-06, |
| "loss": 3.3426, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.027274410453489e-06, |
| "loss": 3.3251, |
| "step": 5315 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.010728903162846e-06, |
| "loss": 3.3554, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.994186130355063e-06, |
| "loss": 3.3117, |
| "step": 5325 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.977646137756662e-06, |
| "loss": 3.3107, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.961108971086489e-06, |
| "loss": 3.3492, |
| "step": 5335 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.944574676055564e-06, |
| "loss": 3.3525, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.928043298366979e-06, |
| "loss": 3.3634, |
| "step": 5345 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.911514883715763e-06, |
| "loss": 3.3353, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.894989477788753e-06, |
| "loss": 3.3184, |
| "step": 5355 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.878467126264467e-06, |
| "loss": 3.343, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.86194787481298e-06, |
| "loss": 3.3254, |
| "step": 5365 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.8454317690958e-06, |
| "loss": 3.3286, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.828918854765742e-06, |
| "loss": 3.3469, |
| "step": 5375 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.812409177466796e-06, |
| "loss": 3.3302, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.795902782834006e-06, |
| "loss": 3.3324, |
| "step": 5385 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.779399716493342e-06, |
| "loss": 3.3155, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.762900024061572e-06, |
| "loss": 3.3336, |
| "step": 5395 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.746403751146142e-06, |
| "loss": 3.3134, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.729910943345044e-06, |
| "loss": 3.3502, |
| "step": 5405 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.713421646246692e-06, |
| "loss": 3.3115, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.696935905429793e-06, |
| "loss": 3.3137, |
| "step": 5415 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.68045376646323e-06, |
| "loss": 3.3459, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.663975274905926e-06, |
| "loss": 3.3688, |
| "step": 5425 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.647500476306724e-06, |
| "loss": 3.3532, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.631029416204255e-06, |
| "loss": 3.3263, |
| "step": 5435 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.61456214012682e-06, |
| "loss": 3.3302, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.598098693592263e-06, |
| "loss": 3.3703, |
| "step": 5445 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.581639122107837e-06, |
| "loss": 3.3089, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.565183471170084e-06, |
| "loss": 3.3272, |
| "step": 5455 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.548731786264713e-06, |
| "loss": 3.3306, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.532284112866469e-06, |
| "loss": 3.3481, |
| "step": 5465 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.515840496439009e-06, |
| "loss": 3.3229, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.499400982434773e-06, |
| "loss": 3.3144, |
| "step": 5475 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.482965616294863e-06, |
| "loss": 3.3318, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.466534443448918e-06, |
| "loss": 3.29, |
| "step": 5485 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.450107509314983e-06, |
| "loss": 3.3388, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.433684859299394e-06, |
| "loss": 3.3479, |
| "step": 5495 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.41726653879663e-06, |
| "loss": 3.2932, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.400852593189214e-06, |
| "loss": 3.3142, |
| "step": 5505 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.384443067847578e-06, |
| "loss": 3.3353, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.36803800812993e-06, |
| "loss": 3.3384, |
| "step": 5515 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.351637459382133e-06, |
| "loss": 3.3627, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.335241466937585e-06, |
| "loss": 3.3282, |
| "step": 5525 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.31885007611709e-06, |
| "loss": 3.3118, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.302463332228734e-06, |
| "loss": 3.3339, |
| "step": 5535 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.286081280567751e-06, |
| "loss": 3.3178, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.269703966416412e-06, |
| "loss": 3.3302, |
| "step": 5545 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.253331435043888e-06, |
| "loss": 3.3292, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.236963731706137e-06, |
| "loss": 3.3259, |
| "step": 5555 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.22060090164576e-06, |
| "loss": 3.2972, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.204242990091898e-06, |
| "loss": 3.3338, |
| "step": 5565 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.187890042260094e-06, |
| "loss": 3.3246, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.171542103352166e-06, |
| "loss": 3.3236, |
| "step": 5575 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.155199218556098e-06, |
| "loss": 3.3122, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.138861433045887e-06, |
| "loss": 3.3686, |
| "step": 5585 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.122528791981447e-06, |
| "loss": 3.3435, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.106201340508468e-06, |
| "loss": 3.3379, |
| "step": 5595 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.089879123758301e-06, |
| "loss": 3.3528, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 8.073562186847816e-06, |
| "loss": 3.3556, |
| "step": 5605 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 8.057250574879296e-06, |
| "loss": 3.3303, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 8.040944332940313e-06, |
| "loss": 3.3402, |
| "step": 5615 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 8.024643506103574e-06, |
| "loss": 3.3234, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 8.008348139426838e-06, |
| "loss": 3.3514, |
| "step": 5625 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.992058277952765e-06, |
| "loss": 3.3101, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.975773966708794e-06, |
| "loss": 3.3163, |
| "step": 5635 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.959495250707026e-06, |
| "loss": 3.3123, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.943222174944097e-06, |
| "loss": 3.3163, |
| "step": 5645 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.92695478440105e-06, |
| "loss": 3.3455, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.910693124043214e-06, |
| "loss": 3.3545, |
| "step": 5655 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.89443723882008e-06, |
| "loss": 3.3437, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.878187173665174e-06, |
| "loss": 3.3227, |
| "step": 5665 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.861942973495939e-06, |
| "loss": 3.3427, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.845704683213598e-06, |
| "loss": 3.3086, |
| "step": 5675 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.829472347703046e-06, |
| "loss": 3.3765, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.813246011832712e-06, |
| "loss": 3.3161, |
| "step": 5685 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.79702572045445e-06, |
| "loss": 3.3682, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.780811518403397e-06, |
| "loss": 3.3335, |
| "step": 5695 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.764603450497861e-06, |
| "loss": 3.3127, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.748401561539196e-06, |
| "loss": 3.2919, |
| "step": 5705 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.732205896311678e-06, |
| "loss": 3.3443, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.716016499582376e-06, |
| "loss": 3.303, |
| "step": 5715 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.699833416101033e-06, |
| "loss": 3.3185, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.683656690599942e-06, |
| "loss": 3.3215, |
| "step": 5725 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.667486367793822e-06, |
| "loss": 3.3261, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.651322492379694e-06, |
| "loss": 3.3445, |
| "step": 5735 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.635165109036756e-06, |
| "loss": 3.3505, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.619014262426262e-06, |
| "loss": 3.306, |
| "step": 5745 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.602869997191398e-06, |
| "loss": 3.3131, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.586732357957158e-06, |
| "loss": 3.3371, |
| "step": 5755 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.570601389330222e-06, |
| "loss": 3.3456, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.554477135898828e-06, |
| "loss": 3.2972, |
| "step": 5765 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.538359642232654e-06, |
| "loss": 3.3107, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.522248952882695e-06, |
| "loss": 3.3091, |
| "step": 5775 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.506145112381138e-06, |
| "loss": 3.3101, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.490048165241233e-06, |
| "loss": 3.3269, |
| "step": 5785 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.473958155957182e-06, |
| "loss": 3.3064, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.457875129004008e-06, |
| "loss": 3.2897, |
| "step": 5795 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.441799128837437e-06, |
| "loss": 3.2875, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.425730199893761e-06, |
| "loss": 3.3285, |
| "step": 5805 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.40966838658974e-06, |
| "loss": 3.3292, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.3936137333224565e-06, |
| "loss": 3.3438, |
| "step": 5815 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.3775662844692075e-06, |
| "loss": 3.3151, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.361526084387369e-06, |
| "loss": 3.2861, |
| "step": 5825 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.345493177414284e-06, |
| "loss": 3.3326, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.3294676078671405e-06, |
| "loss": 3.342, |
| "step": 5835 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.313449420042837e-06, |
| "loss": 3.3066, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.297438658217878e-06, |
| "loss": 3.3273, |
| "step": 5845 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.2814353666482276e-06, |
| "loss": 3.3008, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.265439589569212e-06, |
| "loss": 3.3439, |
| "step": 5855 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.249451371195384e-06, |
| "loss": 3.3129, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.233470755720402e-06, |
| "loss": 3.35, |
| "step": 5865 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.217497787316909e-06, |
| "loss": 3.3494, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.201532510136411e-06, |
| "loss": 3.3338, |
| "step": 5875 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.18557496830915e-06, |
| "loss": 3.3099, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.169625205943995e-06, |
| "loss": 3.3496, |
| "step": 5885 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.153683267128304e-06, |
| "loss": 3.3073, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.137749195927815e-06, |
| "loss": 3.3288, |
| "step": 5895 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.121823036386514e-06, |
| "loss": 3.3173, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.10590483252652e-06, |
| "loss": 3.3351, |
| "step": 5905 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.089994628347965e-06, |
| "loss": 3.3412, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.074092467828864e-06, |
| "loss": 3.3308, |
| "step": 5915 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.058198394924997e-06, |
| "loss": 3.2986, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.042312453569793e-06, |
| "loss": 3.3275, |
| "step": 5925 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.026434687674204e-06, |
| "loss": 3.3116, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.010565141126584e-06, |
| "loss": 3.3234, |
| "step": 5935 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.994703857792562e-06, |
| "loss": 3.3192, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.978850881514934e-06, |
| "loss": 3.3206, |
| "step": 5945 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.963006256113527e-06, |
| "loss": 3.3144, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.94717002538509e-06, |
| "loss": 3.3564, |
| "step": 5955 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.931342233103171e-06, |
| "loss": 3.2916, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.915522923017983e-06, |
| "loss": 3.3106, |
| "step": 5965 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.8997121388563e-06, |
| "loss": 3.3251, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.883909924321328e-06, |
| "loss": 3.3333, |
| "step": 5975 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.868116323092589e-06, |
| "loss": 3.3074, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.852331378825785e-06, |
| "loss": 3.327, |
| "step": 5985 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.8365551351527e-06, |
| "loss": 3.3249, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.820787635681068e-06, |
| "loss": 3.3421, |
| "step": 5995 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.80502892399445e-06, |
| "loss": 3.3011, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.78927904365211e-06, |
| "loss": 3.3154, |
| "step": 6005 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.773538038188912e-06, |
| "loss": 3.3476, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.757805951115182e-06, |
| "loss": 3.3043, |
| "step": 6015 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.742082825916599e-06, |
| "loss": 3.3314, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.726368706054063e-06, |
| "loss": 3.2959, |
| "step": 6025 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.710663634963588e-06, |
| "loss": 3.3122, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.694967656056175e-06, |
| "loss": 3.3096, |
| "step": 6035 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.67928081271769e-06, |
| "loss": 3.3262, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.663603148308754e-06, |
| "loss": 3.3224, |
| "step": 6045 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.6479347061646046e-06, |
| "loss": 3.3478, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.632275529594997e-06, |
| "loss": 3.3353, |
| "step": 6055 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.616625661884073e-06, |
| "loss": 3.3296, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.600985146290246e-06, |
| "loss": 3.3236, |
| "step": 6065 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.585354026046069e-06, |
| "loss": 3.2962, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.569732344358137e-06, |
| "loss": 3.2868, |
| "step": 6075 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.554120144406948e-06, |
| "loss": 3.2703, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.5385174693467955e-06, |
| "loss": 3.3199, |
| "step": 6085 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.522924362305639e-06, |
| "loss": 3.3387, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.507340866384997e-06, |
| "loss": 3.3014, |
| "step": 6095 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.491767024659818e-06, |
| "loss": 3.2966, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.476202880178369e-06, |
| "loss": 3.3131, |
| "step": 6105 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.460648475962104e-06, |
| "loss": 3.3123, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.445103855005563e-06, |
| "loss": 3.3045, |
| "step": 6115 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.429569060276237e-06, |
| "loss": 3.3193, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.414044134714461e-06, |
| "loss": 3.2906, |
| "step": 6125 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.398529121233291e-06, |
| "loss": 3.2899, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.3830240627183745e-06, |
| "loss": 3.3199, |
| "step": 6135 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.36752900202785e-06, |
| "loss": 3.301, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.352043981992222e-06, |
| "loss": 3.3046, |
| "step": 6145 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.336569045414238e-06, |
| "loss": 3.3354, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.321104235068775e-06, |
| "loss": 3.3019, |
| "step": 6155 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.305649593702721e-06, |
| "loss": 3.2985, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.290205164034849e-06, |
| "loss": 3.318, |
| "step": 6165 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.274770988755712e-06, |
| "loss": 3.3463, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.259347110527516e-06, |
| "loss": 3.3031, |
| "step": 6175 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.243933571984009e-06, |
| "loss": 3.2965, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.228530415730349e-06, |
| "loss": 3.2875, |
| "step": 6185 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.213137684343002e-06, |
| "loss": 3.337, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.197755420369622e-06, |
| "loss": 3.3082, |
| "step": 6195 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.182383666328925e-06, |
| "loss": 3.3336, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.1670224647105714e-06, |
| "loss": 3.3176, |
| "step": 6205 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.151671857975061e-06, |
| "loss": 3.3146, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.136331888553606e-06, |
| "loss": 3.3012, |
| "step": 6215 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.121002598848017e-06, |
| "loss": 3.3134, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.105684031230577e-06, |
| "loss": 3.3179, |
| "step": 6225 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.090376228043938e-06, |
| "loss": 3.3075, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.075079231600999e-06, |
| "loss": 3.3314, |
| "step": 6235 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.059793084184782e-06, |
| "loss": 3.3294, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.0445178280483285e-06, |
| "loss": 3.2846, |
| "step": 6245 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.029253505414565e-06, |
| "loss": 3.2943, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.014000158476204e-06, |
| "loss": 3.3094, |
| "step": 6255 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 5.998757829395617e-06, |
| "loss": 3.2884, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 5.983526560304723e-06, |
| "loss": 3.3439, |
| "step": 6265 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 5.968306393304863e-06, |
| "loss": 3.3057, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 5.9530973704666984e-06, |
| "loss": 3.3483, |
| "step": 6275 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 5.9378995338300815e-06, |
| "loss": 3.2989, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.9227129254039486e-06, |
| "loss": 3.2861, |
| "step": 6285 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.907537587166191e-06, |
| "loss": 3.3116, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.892373561063558e-06, |
| "loss": 3.3052, |
| "step": 6295 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.877220889011526e-06, |
| "loss": 3.3233, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.862079612894187e-06, |
| "loss": 3.3429, |
| "step": 6305 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.846949774564133e-06, |
| "loss": 3.2817, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.8318314158423395e-06, |
| "loss": 3.2818, |
| "step": 6315 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.8167245785180535e-06, |
| "loss": 3.3262, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.801629304348675e-06, |
| "loss": 3.3519, |
| "step": 6325 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.789561438525277e-06, |
| "loss": 3.335, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.774487083161278e-06, |
| "loss": 3.3075, |
| "step": 6335 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.759424407702493e-06, |
| "loss": 3.3132, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.744373453784256e-06, |
| "loss": 3.3155, |
| "step": 6345 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.729334263009495e-06, |
| "loss": 3.3061, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.714306876948621e-06, |
| "loss": 3.3282, |
| "step": 6355 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.699291337139419e-06, |
| "loss": 3.317, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.684287685086931e-06, |
| "loss": 3.3401, |
| "step": 6365 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.669295962263337e-06, |
| "loss": 3.3114, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.654316210107843e-06, |
| "loss": 3.2913, |
| "step": 6375 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.6393484700265666e-06, |
| "loss": 3.2988, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.624392783392422e-06, |
| "loss": 3.3231, |
| "step": 6385 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.609449191545009e-06, |
| "loss": 3.3397, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.5945177357904935e-06, |
| "loss": 3.3236, |
| "step": 6395 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.579598457401489e-06, |
| "loss": 3.3173, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.564691397616961e-06, |
| "loss": 3.3354, |
| "step": 6405 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.549796597642093e-06, |
| "loss": 3.3391, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.534914098648185e-06, |
| "loss": 3.3339, |
| "step": 6415 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.52004394177253e-06, |
| "loss": 3.3056, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.505186168118314e-06, |
| "loss": 3.3162, |
| "step": 6425 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.490340818754485e-06, |
| "loss": 3.2815, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.47550793471566e-06, |
| "loss": 3.302, |
| "step": 6435 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.460687557001983e-06, |
| "loss": 3.2952, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.445879726579042e-06, |
| "loss": 3.3209, |
| "step": 6445 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.431084484377742e-06, |
| "loss": 3.356, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.416301871294186e-06, |
| "loss": 3.3438, |
| "step": 6455 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.401531928189574e-06, |
| "loss": 3.3069, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.386774695890083e-06, |
| "loss": 3.344, |
| "step": 6465 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.372030215186753e-06, |
| "loss": 3.3163, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.357298526835381e-06, |
| "loss": 3.3111, |
| "step": 6475 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.342579671556402e-06, |
| "loss": 3.3036, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.327873690034775e-06, |
| "loss": 3.3204, |
| "step": 6485 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.313180622919883e-06, |
| "loss": 3.3088, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.298500510825399e-06, |
| "loss": 3.2783, |
| "step": 6495 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.2838333943291984e-06, |
| "loss": 3.3212, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.269179313973232e-06, |
| "loss": 3.3111, |
| "step": 6505 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.254538310263411e-06, |
| "loss": 3.2709, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.239910423669509e-06, |
| "loss": 3.3042, |
| "step": 6515 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.225295694625036e-06, |
| "loss": 3.3092, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.210694163527138e-06, |
| "loss": 3.2934, |
| "step": 6525 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.196105870736479e-06, |
| "loss": 3.3031, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.181530856577121e-06, |
| "loss": 3.3218, |
| "step": 6535 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.166969161336435e-06, |
| "loss": 3.2857, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.152420825264968e-06, |
| "loss": 3.303, |
| "step": 6545 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.1378858885763475e-06, |
| "loss": 3.2928, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.123364391447156e-06, |
| "loss": 3.3229, |
| "step": 6555 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.1088563740168355e-06, |
| "loss": 3.3101, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.094361876387557e-06, |
| "loss": 3.3175, |
| "step": 6565 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.079880938624133e-06, |
| "loss": 3.3263, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.065413600753888e-06, |
| "loss": 3.2984, |
| "step": 6575 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5.050959902766552e-06, |
| "loss": 3.3193, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5.036519884614157e-06, |
| "loss": 3.324, |
| "step": 6585 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5.022093586210921e-06, |
| "loss": 3.3079, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5.0076810474331395e-06, |
| "loss": 3.3138, |
| "step": 6595 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.993282308119074e-06, |
| "loss": 3.3108, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.9788974080688416e-06, |
| "loss": 3.2662, |
| "step": 6605 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.964526387044304e-06, |
| "loss": 3.3211, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.950169284768968e-06, |
| "loss": 3.3244, |
| "step": 6615 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.9358261409278515e-06, |
| "loss": 3.3103, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.921496995167404e-06, |
| "loss": 3.3248, |
| "step": 6625 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.9071818870953745e-06, |
| "loss": 3.3114, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.892880856280713e-06, |
| "loss": 3.3497, |
| "step": 6635 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.878593942253456e-06, |
| "loss": 3.292, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.864321184504622e-06, |
| "loss": 3.3124, |
| "step": 6645 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.850062622486098e-06, |
| "loss": 3.301, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.835818295610531e-06, |
| "loss": 3.2937, |
| "step": 6655 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.821588243251223e-06, |
| "loss": 3.3096, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.807372504742013e-06, |
| "loss": 3.2841, |
| "step": 6665 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.7931711193771805e-06, |
| "loss": 3.2898, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.77898412641133e-06, |
| "loss": 3.3198, |
| "step": 6675 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.764811565059283e-06, |
| "loss": 3.3456, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.750653474495969e-06, |
| "loss": 3.3202, |
| "step": 6685 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.7365098938563195e-06, |
| "loss": 3.3289, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.722380862235156e-06, |
| "loss": 3.3172, |
| "step": 6695 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.708266418687092e-06, |
| "loss": 3.2886, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.694166602226404e-06, |
| "loss": 3.315, |
| "step": 6705 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.680081451826949e-06, |
| "loss": 3.3038, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.666011006422041e-06, |
| "loss": 3.297, |
| "step": 6715 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.651955304904348e-06, |
| "loss": 3.3293, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.637914386125781e-06, |
| "loss": 3.3173, |
| "step": 6725 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.623888288897395e-06, |
| "loss": 3.2995, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.60987705198927e-06, |
| "loss": 3.317, |
| "step": 6735 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.595880714130415e-06, |
| "loss": 3.3505, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.581899314008657e-06, |
| "loss": 3.2903, |
| "step": 6745 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.5679328902705224e-06, |
| "loss": 3.3395, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.553981481521156e-06, |
| "loss": 3.2834, |
| "step": 6755 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.540045126324182e-06, |
| "loss": 3.3247, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.52612386320163e-06, |
| "loss": 3.3246, |
| "step": 6765 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.512217730633806e-06, |
| "loss": 3.2869, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.498326767059196e-06, |
| "loss": 3.2991, |
| "step": 6775 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.484451010874351e-06, |
| "loss": 3.2981, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.4705905004337925e-06, |
| "loss": 3.311, |
| "step": 6785 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.456745274049898e-06, |
| "loss": 3.3263, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.442915369992802e-06, |
| "loss": 3.3202, |
| "step": 6795 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.4291008264902744e-06, |
| "loss": 3.3266, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.415301681727638e-06, |
| "loss": 3.3141, |
| "step": 6805 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.40151797384765e-06, |
| "loss": 3.3069, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.387749740950392e-06, |
| "loss": 3.3015, |
| "step": 6815 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.373997021093176e-06, |
| "loss": 3.3116, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.360259852290431e-06, |
| "loss": 3.2859, |
| "step": 6825 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.3465382725136015e-06, |
| "loss": 3.317, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.332832319691044e-06, |
| "loss": 3.3087, |
| "step": 6835 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.319142031707918e-06, |
| "loss": 3.3001, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.305467446406077e-06, |
| "loss": 3.3174, |
| "step": 6845 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.291808601583982e-06, |
| "loss": 3.3165, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.278165534996577e-06, |
| "loss": 3.3251, |
| "step": 6855 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.264538284355194e-06, |
| "loss": 3.308, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.250926887327451e-06, |
| "loss": 3.2723, |
| "step": 6865 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.2373313815371395e-06, |
| "loss": 3.3212, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.22375180456413e-06, |
| "loss": 3.2993, |
| "step": 6875 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.2101881939442645e-06, |
| "loss": 3.3228, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.1966405871692394e-06, |
| "loss": 3.2958, |
| "step": 6885 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.18310902168653e-06, |
| "loss": 3.2818, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.169593534899262e-06, |
| "loss": 3.3064, |
| "step": 6895 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.156094164166122e-06, |
| "loss": 3.2605, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.142610946801248e-06, |
| "loss": 3.3101, |
| "step": 6905 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.129143920074126e-06, |
| "loss": 3.3303, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.11569312120949e-06, |
| "loss": 3.304, |
| "step": 6915 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.10225858738722e-06, |
| "loss": 3.3225, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.088840355742238e-06, |
| "loss": 3.2888, |
| "step": 6925 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.075438463364394e-06, |
| "loss": 3.3037, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.062052947298387e-06, |
| "loss": 3.2861, |
| "step": 6935 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.0486838445436445e-06, |
| "loss": 3.3013, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.035331192054225e-06, |
| "loss": 3.3137, |
| "step": 6945 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.021995026738715e-06, |
| "loss": 3.3227, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.008675385460131e-06, |
| "loss": 3.3012, |
| "step": 6955 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 3.995372305035815e-06, |
| "loss": 3.3244, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 3.982085822237332e-06, |
| "loss": 3.3113, |
| "step": 6965 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.968815973790361e-06, |
| "loss": 3.2902, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.955562796374614e-06, |
| "loss": 3.3019, |
| "step": 6975 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.942326326623713e-06, |
| "loss": 3.2806, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.9291066011251024e-06, |
| "loss": 3.306, |
| "step": 6985 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.915903656419942e-06, |
| "loss": 3.3425, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.902717529003005e-06, |
| "loss": 3.2898, |
| "step": 6995 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.8895482553225874e-06, |
| "loss": 3.304, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.876395871780381e-06, |
| "loss": 3.2967, |
| "step": 7005 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.863260414731411e-06, |
| "loss": 3.2621, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.8501419204839085e-06, |
| "loss": 3.3084, |
| "step": 7015 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.837040425299209e-06, |
| "loss": 3.2824, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.8239559653916684e-06, |
| "loss": 3.3124, |
| "step": 7025 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.8108885769285555e-06, |
| "loss": 3.2996, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.7978382960299476e-06, |
| "loss": 3.2839, |
| "step": 7035 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.7848051587686363e-06, |
| "loss": 3.3113, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.771789201170025e-06, |
| "loss": 3.2749, |
| "step": 7045 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.7587904592120307e-06, |
| "loss": 3.3367, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.7458089688249823e-06, |
| "loss": 3.311, |
| "step": 7055 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.7328447658915277e-06, |
| "loss": 3.2968, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.719897886246521e-06, |
| "loss": 3.2959, |
| "step": 7065 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.7069683656769396e-06, |
| "loss": 3.2592, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.694056239921776e-06, |
| "loss": 3.317, |
| "step": 7075 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.6811615446719418e-06, |
| "loss": 3.3112, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.6682843155701684e-06, |
| "loss": 3.3045, |
| "step": 7085 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.655424588210906e-06, |
| "loss": 3.3146, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.6425823981402297e-06, |
| "loss": 3.304, |
| "step": 7095 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.6297577808557406e-06, |
| "loss": 3.2753, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.616950771806459e-06, |
| "loss": 3.2887, |
| "step": 7105 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.604161406392742e-06, |
| "loss": 3.2948, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.5913897199661716e-06, |
| "loss": 3.3032, |
| "step": 7115 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.5786357478294677e-06, |
| "loss": 3.3039, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.5658995252363805e-06, |
| "loss": 3.2876, |
| "step": 7125 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.5531810873916005e-06, |
| "loss": 3.2972, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.540480469450659e-06, |
| "loss": 3.2791, |
| "step": 7135 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.52779770651983e-06, |
| "loss": 3.2985, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.5151328336560363e-06, |
| "loss": 3.3382, |
| "step": 7145 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.50248588586674e-06, |
| "loss": 3.3049, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.4898568981098678e-06, |
| "loss": 3.2783, |
| "step": 7155 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.477245905293698e-06, |
| "loss": 3.2496, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.464652942276767e-06, |
| "loss": 3.2848, |
| "step": 7165 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.452078043867777e-06, |
| "loss": 3.2948, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.4395212448254944e-06, |
| "loss": 3.3074, |
| "step": 7175 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.4269825798586576e-06, |
| "loss": 3.2856, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.4144620836258835e-06, |
| "loss": 3.2771, |
| "step": 7185 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.4019597907355586e-06, |
| "loss": 3.3349, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.389475735745761e-06, |
| "loss": 3.3313, |
| "step": 7195 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.377009953164154e-06, |
| "loss": 3.2901, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.3645624774478967e-06, |
| "loss": 3.2823, |
| "step": 7205 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.3521333430035397e-06, |
| "loss": 3.284, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.3397225841869408e-06, |
| "loss": 3.2731, |
| "step": 7215 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.327330235303161e-06, |
| "loss": 3.2848, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.314956330606378e-06, |
| "loss": 3.31, |
| "step": 7225 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.3026009042997864e-06, |
| "loss": 3.3137, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.2902639905354948e-06, |
| "loss": 3.3119, |
| "step": 7235 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.2779456234144545e-06, |
| "loss": 3.3162, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.265645836986343e-06, |
| "loss": 3.2823, |
| "step": 7245 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.253364665249481e-06, |
| "loss": 3.3369, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.241102142150734e-06, |
| "loss": 3.3076, |
| "step": 7255 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.2288583015854234e-06, |
| "loss": 3.3118, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.2166331773972227e-06, |
| "loss": 3.2874, |
| "step": 7265 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.204426803378076e-06, |
| "loss": 3.28, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.192239213268099e-06, |
| "loss": 3.2863, |
| "step": 7275 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.1800704407554884e-06, |
| "loss": 3.2691, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.1679205194764173e-06, |
| "loss": 3.2996, |
| "step": 7285 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.1557894830149616e-06, |
| "loss": 3.302, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.1436773649029906e-06, |
| "loss": 3.2936, |
| "step": 7295 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.1315841986200847e-06, |
| "loss": 3.2785, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.1195100175934357e-06, |
| "loss": 3.3168, |
| "step": 7305 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.107454855197759e-06, |
| "loss": 3.3032, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.0954187447551996e-06, |
| "loss": 3.2976, |
| "step": 7315 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.0834017195352405e-06, |
| "loss": 3.2897, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.0714038127546142e-06, |
| "loss": 3.3018, |
| "step": 7325 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.0594250575771954e-06, |
| "loss": 3.299, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.047465487113933e-06, |
| "loss": 3.2533, |
| "step": 7335 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.035525134422743e-06, |
| "loss": 3.3027, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.023604032508419e-06, |
| "loss": 3.2931, |
| "step": 7345 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.011702214322545e-06, |
| "loss": 3.3184, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.999819712763402e-06, |
| "loss": 3.2858, |
| "step": 7355 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.9879565606758755e-06, |
| "loss": 3.3183, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.9784799926630415e-06, |
| "loss": 3.2936, |
| "step": 7365 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.9666517522227576e-06, |
| "loss": 3.2514, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.9548429529349452e-06, |
| "loss": 3.2958, |
| "step": 7375 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.943053627440771e-06, |
| "loss": 3.2819, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.931283808327562e-06, |
| "loss": 3.2744, |
| "step": 7385 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.9195335281287395e-06, |
| "loss": 3.3165, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.9078028193237107e-06, |
| "loss": 3.2909, |
| "step": 7395 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.8960917143377865e-06, |
| "loss": 3.2847, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.8844002455420894e-06, |
| "loss": 3.2775, |
| "step": 7405 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.8727284452534634e-06, |
| "loss": 3.2883, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.8610763457343895e-06, |
| "loss": 3.2857, |
| "step": 7415 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.849443979192892e-06, |
| "loss": 3.2906, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.837831377782443e-06, |
| "loss": 3.3134, |
| "step": 7425 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.8262385736018925e-06, |
| "loss": 3.3238, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.814665598695362e-06, |
| "loss": 3.282, |
| "step": 7435 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.803112485052163e-06, |
| "loss": 3.2921, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.7915792646067088e-06, |
| "loss": 3.3177, |
| "step": 7445 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.7800659692384237e-06, |
| "loss": 3.3009, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.7685726307716564e-06, |
| "loss": 3.3029, |
| "step": 7455 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.7570992809755937e-06, |
| "loss": 3.2818, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.7479350142952733e-06, |
| "loss": 3.2972, |
| "step": 7465 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.736497723988406e-06, |
| "loss": 3.2862, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.725080511011745e-06, |
| "loss": 3.2853, |
| "step": 7475 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.713683406924057e-06, |
| "loss": 3.2694, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.702306443228516e-06, |
| "loss": 3.2785, |
| "step": 7485 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.6909496513726354e-06, |
| "loss": 3.2912, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.6796130627481663e-06, |
| "loss": 3.298, |
| "step": 7495 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.668296708691015e-06, |
| "loss": 3.2919, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.6570006204811595e-06, |
| "loss": 3.2967, |
| "step": 7505 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.6457248293425576e-06, |
| "loss": 3.2963, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.634469366443063e-06, |
| "loss": 3.323, |
| "step": 7515 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.623234262894343e-06, |
| "loss": 3.2898, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.6120195497517818e-06, |
| "loss": 3.3104, |
| "step": 7525 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.600825258014407e-06, |
| "loss": 3.2927, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.589651418624798e-06, |
| "loss": 3.3129, |
| "step": 7535 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.578498062468999e-06, |
| "loss": 3.3192, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.567365220376441e-06, |
| "loss": 3.3048, |
| "step": 7545 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.556252923119843e-06, |
| "loss": 3.3125, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.5451612014151427e-06, |
| "loss": 3.2848, |
| "step": 7555 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.5340900859214003e-06, |
| "loss": 3.2921, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.5230396072407204e-06, |
| "loss": 3.2902, |
| "step": 7565 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.5120097959181578e-06, |
| "loss": 3.2713, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.501000682441647e-06, |
| "loss": 3.2878, |
| "step": 7575 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4900122972419083e-06, |
| "loss": 3.3087, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4790446706923664e-06, |
| "loss": 3.2623, |
| "step": 7585 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4680978331090655e-06, |
| "loss": 3.2834, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4571718147505872e-06, |
| "loss": 3.2755, |
| "step": 7595 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4462666458179664e-06, |
| "loss": 3.3104, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4353823564546064e-06, |
| "loss": 3.3256, |
| "step": 7605 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.424518976746194e-06, |
| "loss": 3.2682, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4136765367206216e-06, |
| "loss": 3.255, |
| "step": 7615 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.405017681413605e-06, |
| "loss": 3.2806, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.394213008302627e-06, |
| "loss": 3.2818, |
| "step": 7625 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.3834293586444e-06, |
| "loss": 3.2964, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.372666762246433e-06, |
| "loss": 3.3073, |
| "step": 7635 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.3619252488580345e-06, |
| "loss": 3.3, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.3512048481702454e-06, |
| "loss": 3.2655, |
| "step": 7645 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.3405055898157416e-06, |
| "loss": 3.3318, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.3298275033687613e-06, |
| "loss": 3.298, |
| "step": 7655 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.3191706183450225e-06, |
| "loss": 3.3327, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.3085349642016317e-06, |
| "loss": 3.2962, |
| "step": 7665 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.297920570337019e-06, |
| "loss": 3.3318, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.287327466090845e-06, |
| "loss": 3.31, |
| "step": 7675 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.2767556807439216e-06, |
| "loss": 3.2856, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.2662052435181335e-06, |
| "loss": 3.3154, |
| "step": 7685 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.2556761835763576e-06, |
| "loss": 3.2669, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.245168530022378e-06, |
| "loss": 3.2833, |
| "step": 7695 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.234682311900812e-06, |
| "loss": 3.2926, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.2242175581970247e-06, |
| "loss": 3.2746, |
| "step": 7705 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.213774297837047e-06, |
| "loss": 3.304, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.2033525596875027e-06, |
| "loss": 3.3048, |
| "step": 7715 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.192952372555528e-06, |
| "loss": 3.2855, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.182573765188686e-06, |
| "loss": 3.2732, |
| "step": 7725 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.1722167662748874e-06, |
| "loss": 3.2926, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.161881404442321e-06, |
| "loss": 3.285, |
| "step": 7735 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.151567708259361e-06, |
| "loss": 3.3023, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.1412757062345022e-06, |
| "loss": 3.2894, |
| "step": 7745 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.1310054268162628e-06, |
| "loss": 3.2888, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.120756898393126e-06, |
| "loss": 3.2795, |
| "step": 7755 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.1105301492934503e-06, |
| "loss": 3.2553, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.1003252077853906e-06, |
| "loss": 3.248, |
| "step": 7765 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.090142102076825e-06, |
| "loss": 3.3011, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.0799808603152737e-06, |
| "loss": 3.3006, |
| "step": 7775 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.069841510587821e-06, |
| "loss": 3.2864, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.0597240809210404e-06, |
| "loss": 3.2518, |
| "step": 7785 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.0496285992809163e-06, |
| "loss": 3.2973, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.039555093572757e-06, |
| "loss": 3.313, |
| "step": 7795 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.0295035916411377e-06, |
| "loss": 3.2947, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.0194741212698066e-06, |
| "loss": 3.2519, |
| "step": 7805 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.0094667101816133e-06, |
| "loss": 3.3003, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.9994813860384342e-06, |
| "loss": 3.3083, |
| "step": 7815 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.989518176441094e-06, |
| "loss": 3.2958, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.9795771089292913e-06, |
| "loss": 3.2794, |
| "step": 7825 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.9696582109815145e-06, |
| "loss": 3.2749, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.959761510014979e-06, |
| "loss": 3.276, |
| "step": 7835 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.9498870333855436e-06, |
| "loss": 3.3054, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.9400348083876308e-06, |
| "loss": 3.2713, |
| "step": 7845 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.9302048622541635e-06, |
| "loss": 3.2601, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.9203972221564772e-06, |
| "loss": 3.3, |
| "step": 7855 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.9106119152042545e-06, |
| "loss": 3.2794, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.9008489684454456e-06, |
| "loss": 3.2949, |
| "step": 7865 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8911084088661903e-06, |
| "loss": 3.262, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8813902633907499e-06, |
| "loss": 3.3085, |
| "step": 7875 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8716945588814339e-06, |
| "loss": 3.3054, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8620213221385108e-06, |
| "loss": 3.282, |
| "step": 7885 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8523705799001556e-06, |
| "loss": 3.2407, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.842742358842362e-06, |
| "loss": 3.3074, |
| "step": 7895 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8331366855788702e-06, |
| "loss": 3.2899, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8235535866610975e-06, |
| "loss": 3.2802, |
| "step": 7905 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8139930885780621e-06, |
| "loss": 3.2734, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8044552177563101e-06, |
| "loss": 3.2506, |
| "step": 7915 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7949400005598416e-06, |
| "loss": 3.2716, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7854474632900431e-06, |
| "loss": 3.2922, |
| "step": 7925 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7759776321856014e-06, |
| "loss": 3.2956, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7665305334224514e-06, |
| "loss": 3.2613, |
| "step": 7935 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7571061931136845e-06, |
| "loss": 3.3031, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7477046373094908e-06, |
| "loss": 3.3053, |
| "step": 7945 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7383258919970746e-06, |
| "loss": 3.2591, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7289699831005946e-06, |
| "loss": 3.3021, |
| "step": 7955 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7196369364810816e-06, |
| "loss": 3.3034, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7103267779363786e-06, |
| "loss": 3.3135, |
| "step": 7965 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.701039533201052e-06, |
| "loss": 3.2869, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6917752279463406e-06, |
| "loss": 3.307, |
| "step": 7975 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6825338877800712e-06, |
| "loss": 3.2935, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.673315538246595e-06, |
| "loss": 3.2563, |
| "step": 7985 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6641202048267102e-06, |
| "loss": 3.3076, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6549479129375966e-06, |
| "loss": 3.3103, |
| "step": 7995 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6457986879327459e-06, |
| "loss": 3.2997, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6366725551018868e-06, |
| "loss": 3.2573, |
| "step": 8005 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6275695396709223e-06, |
| "loss": 3.2803, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.618489666801848e-06, |
| "loss": 3.2775, |
| "step": 8015 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6094329615926974e-06, |
| "loss": 3.269, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6003994490774622e-06, |
| "loss": 3.2442, |
| "step": 8025 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.5913891542260284e-06, |
| "loss": 3.3082, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.5824021019441016e-06, |
| "loss": 3.244, |
| "step": 8035 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.573438317073146e-06, |
| "loss": 3.3122, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.5644978243903087e-06, |
| "loss": 3.299, |
| "step": 8045 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.5555806486083559e-06, |
| "loss": 3.2703, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.5466868143755975e-06, |
| "loss": 3.3111, |
| "step": 8055 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.53781634627583e-06, |
| "loss": 3.2756, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.528969268828261e-06, |
| "loss": 3.2836, |
| "step": 8065 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.520145606487442e-06, |
| "loss": 3.2704, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.5113453836432034e-06, |
| "loss": 3.2674, |
| "step": 8075 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.502568624620584e-06, |
| "loss": 3.298, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4938153536797684e-06, |
| "loss": 3.2892, |
| "step": 8085 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4850855950160103e-06, |
| "loss": 3.2547, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4763793727595788e-06, |
| "loss": 3.2729, |
| "step": 8095 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4676967109756823e-06, |
| "loss": 3.2964, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4590376336644086e-06, |
| "loss": 3.2885, |
| "step": 8105 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4504021647606448e-06, |
| "loss": 3.2628, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4417903281340306e-06, |
| "loss": 3.293, |
| "step": 8115 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4332021475888801e-06, |
| "loss": 3.2924, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4246376468641198e-06, |
| "loss": 3.2881, |
| "step": 8125 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4160968496332183e-06, |
| "loss": 3.3112, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.4075797795041279e-06, |
| "loss": 3.2929, |
| "step": 8135 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3990864600192133e-06, |
| "loss": 3.2537, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.390616914655195e-06, |
| "loss": 3.2784, |
| "step": 8145 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3821711668230675e-06, |
| "loss": 3.2776, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3737492398680551e-06, |
| "loss": 3.2766, |
| "step": 8155 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3653511570695355e-06, |
| "loss": 3.2719, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.356976941640976e-06, |
| "loss": 3.289, |
| "step": 8165 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3486266167298733e-06, |
| "loss": 3.2855, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.340300205417686e-06, |
| "loss": 3.3072, |
| "step": 8175 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.331997730719773e-06, |
| "loss": 3.2851, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3237192155853284e-06, |
| "loss": 3.2476, |
| "step": 8185 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3154646828973217e-06, |
| "loss": 3.2732, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3072341554724232e-06, |
| "loss": 3.3164, |
| "step": 8195 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2990276560609594e-06, |
| "loss": 3.2812, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2908452073468348e-06, |
| "loss": 3.2912, |
| "step": 8205 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.282686831947474e-06, |
| "loss": 3.2888, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2745525524137626e-06, |
| "loss": 3.2631, |
| "step": 8215 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2664423912299807e-06, |
| "loss": 3.307, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2583563708137393e-06, |
| "loss": 3.2872, |
| "step": 8225 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2502945135159272e-06, |
| "loss": 3.2584, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.2422568416206337e-06, |
| "loss": 3.2873, |
| "step": 8235 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.2342433773451036e-06, |
| "loss": 3.2726, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.2262541428396668e-06, |
| "loss": 3.2613, |
| "step": 8245 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.2182891601876778e-06, |
| "loss": 3.2725, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.2103484514054564e-06, |
| "loss": 3.2914, |
| "step": 8255 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.202432038442226e-06, |
| "loss": 3.3065, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.194539943180052e-06, |
| "loss": 3.315, |
| "step": 8265 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1866721874337827e-06, |
| "loss": 3.2763, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1788287929509924e-06, |
| "loss": 3.2773, |
| "step": 8275 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1710097814119093e-06, |
| "loss": 3.2957, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1632151744293707e-06, |
| "loss": 3.273, |
| "step": 8285 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1554449935487533e-06, |
| "loss": 3.2953, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1476992602479175e-06, |
| "loss": 3.2936, |
| "step": 8295 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.139977995937147e-06, |
| "loss": 3.2597, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1322812219590917e-06, |
| "loss": 3.2854, |
| "step": 8305 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1246089595887023e-06, |
| "loss": 3.2469, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1169612300331834e-06, |
| "loss": 3.2948, |
| "step": 8315 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1093380544319166e-06, |
| "loss": 3.2602, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.101739453856423e-06, |
| "loss": 3.2798, |
| "step": 8325 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0941654493102893e-06, |
| "loss": 3.2818, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0866160617291188e-06, |
| "loss": 3.2841, |
| "step": 8335 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0790913119804691e-06, |
| "loss": 3.2983, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0715912208637925e-06, |
| "loss": 3.2592, |
| "step": 8345 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0641158091103832e-06, |
| "loss": 3.3059, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0566650973833204e-06, |
| "loss": 3.2818, |
| "step": 8355 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0492391062774076e-06, |
| "loss": 3.248, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0418378563191157e-06, |
| "loss": 3.2681, |
| "step": 8365 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0344613679665306e-06, |
| "loss": 3.2611, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.027109661609288e-06, |
| "loss": 3.2629, |
| "step": 8375 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.019782757568528e-06, |
| "loss": 3.2744, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0124806760968341e-06, |
| "loss": 3.3047, |
| "step": 8385 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0052034373781716e-06, |
| "loss": 3.277, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.979510615278432e-07, |
| "loss": 3.292, |
| "step": 8395 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.90723568592422e-07, |
| "loss": 3.2525, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.835209785497045e-07, |
| "loss": 3.2571, |
| "step": 8405 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.763433113086528e-07, |
| "loss": 3.2751, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.69190586709332e-07, |
| "loss": 3.2489, |
| "step": 8415 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.620628245228714e-07, |
| "loss": 3.2893, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.549600444513952e-07, |
| "loss": 3.3105, |
| "step": 8425 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.478822661279763e-07, |
| "loss": 3.2897, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.408295091165765e-07, |
| "loss": 3.2818, |
| "step": 8435 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.33801792911998e-07, |
| "loss": 3.2626, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.267991369398255e-07, |
| "loss": 3.3027, |
| "step": 8445 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.198215605563732e-07, |
| "loss": 3.3029, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.128690830486341e-07, |
| "loss": 3.2613, |
| "step": 8455 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.059417236342194e-07, |
| "loss": 3.2515, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.990395014613163e-07, |
| "loss": 3.2854, |
| "step": 8465 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.921624356086256e-07, |
| "loss": 3.2896, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.853105450853128e-07, |
| "loss": 3.2407, |
| "step": 8475 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.784838488309577e-07, |
| "loss": 3.2668, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.716823657154971e-07, |
| "loss": 3.2732, |
| "step": 8485 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.649061145391758e-07, |
| "loss": 3.253, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.581551140324962e-07, |
| "loss": 3.2737, |
| "step": 8495 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.514293828561593e-07, |
| "loss": 3.2807, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.447289396010228e-07, |
| "loss": 3.2653, |
| "step": 8505 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.380538027880425e-07, |
| "loss": 3.2816, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.314039908682247e-07, |
| "loss": 3.2654, |
| "step": 8515 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.247795222225763e-07, |
| "loss": 3.326, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 8.181804151620465e-07, |
| "loss": 3.2938, |
| "step": 8525 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 8.116066879274875e-07, |
| "loss": 3.2424, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 8.050583586895944e-07, |
| "loss": 3.2997, |
| "step": 8535 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.985354455488615e-07, |
| "loss": 3.3034, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.920379665355237e-07, |
| "loss": 3.283, |
| "step": 8545 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.855659396095183e-07, |
| "loss": 3.2677, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.791193826604282e-07, |
| "loss": 3.2842, |
| "step": 8555 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.726983135074328e-07, |
| "loss": 3.3262, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.663027498992592e-07, |
| "loss": 3.2995, |
| "step": 8565 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.599327095141363e-07, |
| "loss": 3.2576, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.535882099597391e-07, |
| "loss": 3.2752, |
| "step": 8575 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.472692687731498e-07, |
| "loss": 3.2643, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.409759034207975e-07, |
| "loss": 3.2734, |
| "step": 8585 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.347081312984194e-07, |
| "loss": 3.2759, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.284659697310104e-07, |
| "loss": 3.2535, |
| "step": 8595 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.222494359727716e-07, |
| "loss": 3.2791, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.160585472070669e-07, |
| "loss": 3.2831, |
| "step": 8605 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.098933205463742e-07, |
| "loss": 3.2818, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.037537730322363e-07, |
| "loss": 3.2748, |
| "step": 8615 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 6.97639921635217e-07, |
| "loss": 3.2676, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.915517832548524e-07, |
| "loss": 3.3018, |
| "step": 8625 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.854893747196034e-07, |
| "loss": 3.2746, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.794527127868078e-07, |
| "loss": 3.3115, |
| "step": 8635 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.734418141426391e-07, |
| "loss": 3.2836, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.674566954020589e-07, |
| "loss": 3.2557, |
| "step": 8645 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.61497373108766e-07, |
| "loss": 3.2776, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.55563863735157e-07, |
| "loss": 3.2634, |
| "step": 8655 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.496561836822745e-07, |
| "loss": 3.2757, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.43774349279771e-07, |
| "loss": 3.2799, |
| "step": 8665 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.37918376785851e-07, |
| "loss": 3.2686, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.320882823872409e-07, |
| "loss": 3.2869, |
| "step": 8675 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.262840821991278e-07, |
| "loss": 3.2576, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.205057922651303e-07, |
| "loss": 3.3013, |
| "step": 8685 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.147534285572443e-07, |
| "loss": 3.2858, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.090270069758042e-07, |
| "loss": 3.3093, |
| "step": 8695 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.03326543349434e-07, |
| "loss": 3.2905, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 5.976520534350094e-07, |
| "loss": 3.298, |
| "step": 8705 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 5.920035529176082e-07, |
| "loss": 3.2839, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 5.863810574104723e-07, |
| "loss": 3.2714, |
| "step": 8715 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.807845824549596e-07, |
| "loss": 3.2802, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.75214143520505e-07, |
| "loss": 3.2793, |
| "step": 8725 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.696697560045772e-07, |
| "loss": 3.2689, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.641514352326338e-07, |
| "loss": 3.2587, |
| "step": 8735 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.586591964580812e-07, |
| "loss": 3.274, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.531930548622311e-07, |
| "loss": 3.284, |
| "step": 8745 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.477530255542573e-07, |
| "loss": 3.2707, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.423391235711584e-07, |
| "loss": 3.2782, |
| "step": 8755 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.369513638777147e-07, |
| "loss": 3.2873, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.315897613664378e-07, |
| "loss": 3.2874, |
| "step": 8765 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.262543308575451e-07, |
| "loss": 3.2797, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.209450870989086e-07, |
| "loss": 3.3091, |
| "step": 8775 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.156620447660165e-07, |
| "loss": 3.2891, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.104052184619324e-07, |
| "loss": 3.295, |
| "step": 8785 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.051746227172538e-07, |
| "loss": 3.2895, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.999702719900767e-07, |
| "loss": 3.2923, |
| "step": 8795 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.947921806659495e-07, |
| "loss": 3.3033, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.89640363057834e-07, |
| "loss": 3.2588, |
| "step": 8805 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.845148334060734e-07, |
| "loss": 3.2688, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.794156058783428e-07, |
| "loss": 3.3199, |
| "step": 8815 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.7434269456961725e-07, |
| "loss": 3.2976, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.692961135021268e-07, |
| "loss": 3.2956, |
| "step": 8825 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.6427587662532636e-07, |
| "loss": 3.2688, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.5928199781584584e-07, |
| "loss": 3.3109, |
| "step": 8835 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.5431449087746216e-07, |
| "loss": 3.2951, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.4937336954105516e-07, |
| "loss": 3.243, |
| "step": 8845 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.444586474645707e-07, |
| "loss": 3.2626, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.395703382329852e-07, |
| "loss": 3.2786, |
| "step": 8855 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.3470845535826255e-07, |
| "loss": 3.2755, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.2987301227932377e-07, |
| "loss": 3.2818, |
| "step": 8865 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.2506402236200616e-07, |
| "loss": 3.2512, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.202814988990278e-07, |
| "loss": 3.2715, |
| "step": 8875 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.1552545510994746e-07, |
| "loss": 3.2619, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.107959041411314e-07, |
| "loss": 3.2924, |
| "step": 8885 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.0609285906571536e-07, |
| "loss": 3.2535, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.01416332883573e-07, |
| "loss": 3.2827, |
| "step": 8895 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.9676633852126834e-07, |
| "loss": 3.2735, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.9214288883203444e-07, |
| "loss": 3.2674, |
| "step": 8905 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.875459965957307e-07, |
| "loss": 3.2839, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.829756745188029e-07, |
| "loss": 3.2896, |
| "step": 8915 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.7843193523426026e-07, |
| "loss": 3.3003, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.7391479130162833e-07, |
| "loss": 3.2831, |
| "step": 8925 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.6942425520692047e-07, |
| "loss": 3.2677, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.6496033936260334e-07, |
| "loss": 3.2695, |
| "step": 8935 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.6052305610756235e-07, |
| "loss": 3.2717, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.5611241770706293e-07, |
| "loss": 3.2862, |
| "step": 8945 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.5172843635272403e-07, |
| "loss": 3.2815, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.473711241624789e-07, |
| "loss": 3.3103, |
| "step": 8955 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.430404931805464e-07, |
| "loss": 3.2718, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.3873655537739026e-07, |
| "loss": 3.263, |
| "step": 8965 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.3445932264969504e-07, |
| "loss": 3.2483, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.302088068203246e-07, |
| "loss": 3.2871, |
| "step": 8975 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.259850196382985e-07, |
| "loss": 3.294, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.217879727787487e-07, |
| "loss": 3.2767, |
| "step": 8985 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.176176778428974e-07, |
| "loss": 3.2855, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.134741463580204e-07, |
| "loss": 3.2995, |
| "step": 8995 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.093573897774149e-07, |
| "loss": 3.2776, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.052674194803662e-07, |
| "loss": 3.2495, |
| "step": 9005 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.0120424677212434e-07, |
| "loss": 3.2901, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.971678828838609e-07, |
| "loss": 3.2924, |
| "step": 9015 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.931583389726478e-07, |
| "loss": 3.2595, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.8917562612142293e-07, |
| "loss": 3.2705, |
| "step": 9025 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.852197553389568e-07, |
| "loss": 3.2435, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.812907375598273e-07, |
| "loss": 3.3048, |
| "step": 9035 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.7738858364438457e-07, |
| "loss": 3.2694, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.7351330437872525e-07, |
| "loss": 3.2592, |
| "step": 9045 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.696649104746607e-07, |
| "loss": 3.2565, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.6584341256968624e-07, |
| "loss": 3.275, |
| "step": 9055 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.6204882122695343e-07, |
| "loss": 3.268, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.58281146935242e-07, |
| "loss": 3.2635, |
| "step": 9065 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.5454040010892354e-07, |
| "loss": 3.2905, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.5082659108794595e-07, |
| "loss": 3.3049, |
| "step": 9075 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.4713973013779204e-07, |
| "loss": 3.2745, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.434798274494587e-07, |
| "loss": 3.2704, |
| "step": 9085 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.398468931394249e-07, |
| "loss": 3.2951, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.3624093724962883e-07, |
| "loss": 3.2698, |
| "step": 9095 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.3266196974743084e-07, |
| "loss": 3.2305, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.291100005255964e-07, |
| "loss": 3.2776, |
| "step": 9105 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.2558503940226296e-07, |
| "loss": 3.2877, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.2208709612091096e-07, |
| "loss": 3.2611, |
| "step": 9115 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.1861618035034394e-07, |
| "loss": 3.2604, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.1517230168465408e-07, |
| "loss": 3.2839, |
| "step": 9125 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.1175546964320226e-07, |
| "loss": 3.3282, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.083656936705858e-07, |
| "loss": 3.2492, |
| "step": 9135 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.050029831366185e-07, |
| "loss": 3.2816, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.0166734733629843e-07, |
| "loss": 3.2894, |
| "step": 9145 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.983587954897881e-07, |
| "loss": 3.2791, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.9507733674238305e-07, |
| "loss": 3.265, |
| "step": 9155 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.918229801644944e-07, |
| "loss": 3.2694, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.885957347516132e-07, |
| "loss": 3.2654, |
| "step": 9165 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.8539560942429592e-07, |
| "loss": 3.301, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.822226130281335e-07, |
| "loss": 3.2797, |
| "step": 9175 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.7907675433372907e-07, |
| "loss": 3.2806, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.7595804203667355e-07, |
| "loss": 3.3021, |
| "step": 9185 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.7286648475752122e-07, |
| "loss": 3.2882, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6980209104176747e-07, |
| "loss": 3.2911, |
| "step": 9195 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6676486935982116e-07, |
| "loss": 3.2956, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6375482810698673e-07, |
| "loss": 3.2737, |
| "step": 9205 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.6077197560343537e-07, |
| "loss": 3.2746, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.5781632009418513e-07, |
| "loss": 3.2838, |
| "step": 9215 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.5488786974908188e-07, |
| "loss": 3.3016, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.5198663266276724e-07, |
| "loss": 3.286, |
| "step": 9225 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.4911261685466416e-07, |
| "loss": 3.2825, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.4626583026895235e-07, |
| "loss": 3.2414, |
| "step": 9235 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.4344628077454626e-07, |
| "loss": 3.2936, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.406539761650727e-07, |
| "loss": 3.2469, |
| "step": 9245 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.3788892415884881e-07, |
| "loss": 3.2852, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.3515113239886302e-07, |
| "loss": 3.283, |
| "step": 9255 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.3244060845275298e-07, |
| "loss": 3.259, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.2975735981278327e-07, |
| "loss": 3.2594, |
| "step": 9265 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.2710139389582654e-07, |
| "loss": 3.25, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.2447271804334137e-07, |
| "loss": 3.2664, |
| "step": 9275 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.2187133952135445e-07, |
| "loss": 3.2906, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.1929726552043607e-07, |
| "loss": 3.2723, |
| "step": 9285 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.1675050315568703e-07, |
| "loss": 3.2456, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.1423105946671064e-07, |
| "loss": 3.2726, |
| "step": 9295 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.1173894141759955e-07, |
| "loss": 3.2828, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.092741558969146e-07, |
| "loss": 3.2799, |
| "step": 9305 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.068367097176659e-07, |
| "loss": 3.2998, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.0442660961729523e-07, |
| "loss": 3.296, |
| "step": 9315 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.020438622576514e-07, |
| "loss": 3.2948, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.968847422498152e-08, |
| "loss": 3.2566, |
| "step": 9325 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.736045202990651e-08, |
| "loss": 3.2575, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.505980210740007e-08, |
| "loss": 3.273, |
| "step": 9335 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.278653081678079e-08, |
| "loss": 3.2977, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.054064444168564e-08, |
| "loss": 3.2649, |
| "step": 9345 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 8.832214919005877e-08, |
| "loss": 3.2967, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 8.613105119412712e-08, |
| "loss": 3.3035, |
| "step": 9355 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 8.396735651039046e-08, |
| "loss": 3.2843, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 8.183107111960353e-08, |
| "loss": 3.2812, |
| "step": 9365 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 7.97222009267551e-08, |
| "loss": 3.2732, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 7.764075176105445e-08, |
| "loss": 3.2867, |
| "step": 9375 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 7.558672937591937e-08, |
| "loss": 3.2719, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 7.356013944895046e-08, |
| "loss": 3.279, |
| "step": 9385 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 7.156098758192453e-08, |
| "loss": 3.2891, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 6.958927930077685e-08, |
| "loss": 3.2454, |
| "step": 9395 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 6.764502005558115e-08, |
| "loss": 3.2557, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 6.572821522054295e-08, |
| "loss": 3.2519, |
| "step": 9405 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 6.383887009397515e-08, |
| "loss": 3.2945, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 6.197698989829026e-08, |
| "loss": 3.2533, |
| "step": 9415 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 6.014257977998594e-08, |
| "loss": 3.2706, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 5.833564480962617e-08, |
| "loss": 3.2836, |
| "step": 9425 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 5.655618998182899e-08, |
| "loss": 3.2431, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 5.480422021525655e-08, |
| "loss": 3.2896, |
| "step": 9435 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 5.307974035259511e-08, |
| "loss": 3.2848, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 5.1382755160549466e-08, |
| "loss": 3.2868, |
| "step": 9445 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.971326932981968e-08, |
| "loss": 3.2602, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.807128747509882e-08, |
| "loss": 3.2547, |
| "step": 9455 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.645681413505299e-08, |
| "loss": 3.289, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.4869853772310235e-08, |
| "loss": 3.2911, |
| "step": 9465 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.331041077344944e-08, |
| "loss": 3.2862, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.177848944898699e-08, |
| "loss": 3.2793, |
| "step": 9475 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.027409403336901e-08, |
| "loss": 3.2741, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.8797228684952504e-08, |
| "loss": 3.2785, |
| "step": 9485 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.734789748599754e-08, |
| "loss": 3.2867, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.5926104442658426e-08, |
| "loss": 3.2456, |
| "step": 9495 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.453185348496702e-08, |
| "loss": 3.2583, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.316514846682939e-08, |
| "loss": 3.2788, |
| "step": 9505 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.182599316600699e-08, |
| "loss": 3.2592, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.0514391284111043e-08, |
| "loss": 3.2706, |
| "step": 9515 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.9230346446591506e-08, |
| "loss": 3.259, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.797386220272702e-08, |
| "loss": 3.2798, |
| "step": 9525 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.674494202561384e-08, |
| "loss": 3.259, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.5543589312160276e-08, |
| "loss": 3.2784, |
| "step": 9535 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.4369807383071154e-08, |
| "loss": 3.2779, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.3223599482842252e-08, |
| "loss": 3.2632, |
| "step": 9545 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.2104968779752546e-08, |
| "loss": 3.2426, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.1013918365851982e-08, |
| "loss": 3.263, |
| "step": 9555 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.9950451256957048e-08, |
| "loss": 3.2811, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.8914570392636332e-08, |
| "loss": 3.2586, |
| "step": 9565 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.7906278636210527e-08, |
| "loss": 3.2595, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.6925578774737994e-08, |
| "loss": 3.2839, |
| "step": 9575 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.5972473519009212e-08, |
| "loss": 3.2604, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.5046965503540124e-08, |
| "loss": 3.2561, |
| "step": 9585 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.4149057286562128e-08, |
| "loss": 3.2574, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.327875135001988e-08, |
| "loss": 3.2554, |
| "step": 9595 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.243605009955906e-08, |
| "loss": 3.2622, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.1620955864523053e-08, |
| "loss": 3.2632, |
| "step": 9605 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.0833470897947396e-08, |
| "loss": 3.2642, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.007359737654756e-08, |
| "loss": 3.284, |
| "step": 9615 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.341337400721185e-09, |
| "loss": 3.2716, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 8.636692994535845e-09, |
| "loss": 3.3286, |
| "step": 9625 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 7.959666105727959e-09, |
| "loss": 3.2922, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 7.310258605691678e-09, |
| "loss": 3.2543, |
| "step": 9635 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 6.6884722894822174e-09, |
| "loss": 3.2923, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 6.094308875801425e-09, |
| "loss": 3.2795, |
| "step": 9645 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 5.52777000700111e-09, |
| "loss": 3.2784, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 4.988857249071943e-09, |
| "loss": 3.242, |
| "step": 9655 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 4.4775720916445665e-09, |
| "loss": 3.2848, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.993915947982929e-09, |
| "loss": 3.2318, |
| "step": 9665 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.53789015497763e-09, |
| "loss": 3.2863, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.109495973150356e-09, |
| "loss": 3.2526, |
| "step": 9675 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 2.7087345866394497e-09, |
| "loss": 3.2524, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 2.335607103207682e-09, |
| "loss": 3.2777, |
| "step": 9685 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.990114554228928e-09, |
| "loss": 3.278, |
| "step": 9690 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.6722578946937184e-09, |
| "loss": 3.2891, |
| "step": 9695 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.3820380032025794e-09, |
| "loss": 3.2557, |
| "step": 9700 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.1194556819627e-09, |
| "loss": 3.2817, |
| "step": 9705 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 8.845116567879342e-10, |
| "loss": 3.2412, |
| "step": 9710 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 6.772065770976888e-10, |
| "loss": 3.2796, |
| "step": 9715 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.975410159102634e-10, |
| "loss": 3.2618, |
| "step": 9720 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.4551546984729067e-10, |
| "loss": 3.2715, |
| "step": 9725 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 2.211303591292957e-10, |
| "loss": 3.3119, |
| "step": 9730 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.2438602757125495e-10, |
| "loss": 3.2758, |
| "step": 9735 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 5.528274259147814e-11, |
| "loss": 3.2601, |
| "step": 9740 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 9741, |
| "total_flos": 2.4313337791056445e+18, |
| "train_loss": 3.456989575382033, |
| "train_runtime": 78610.9062, |
| "train_samples_per_second": 15.862, |
| "train_steps_per_second": 0.124 |
| } |
| ], |
| "max_steps": 9741, |
| "num_train_epochs": 1, |
| "total_flos": 2.4313337791056445e+18, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|