diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 19.169274992015332, + "global_step": 30000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 6.389776357827476e-07, + "loss": 11.1023, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 1.2779552715654952e-06, + "loss": 11.0812, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 1.916932907348243e-06, + "loss": 11.1009, + "step": 30 + }, + { + "epoch": 0.03, + "learning_rate": 2.5559105431309904e-06, + "loss": 11.1185, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 3.194888178913738e-06, + "loss": 11.0872, + "step": 50 + }, + { + "epoch": 0.04, + "learning_rate": 3.833865814696486e-06, + "loss": 11.0512, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.472843450479234e-06, + "loss": 11.0023, + "step": 70 + }, + { + "epoch": 0.05, + "learning_rate": 5.111821086261981e-06, + "loss": 10.9751, + "step": 80 + }, + { + "epoch": 0.06, + "learning_rate": 5.750798722044729e-06, + "loss": 10.9535, + "step": 90 + }, + { + "epoch": 0.06, + "learning_rate": 6.389776357827476e-06, + "loss": 10.9472, + "step": 100 + }, + { + "epoch": 0.07, + "learning_rate": 7.028753993610224e-06, + "loss": 10.922, + "step": 110 + }, + { + "epoch": 0.08, + "learning_rate": 7.667731629392972e-06, + "loss": 10.8773, + "step": 120 + }, + { + "epoch": 0.08, + "learning_rate": 8.306709265175718e-06, + "loss": 10.8485, + "step": 130 + }, + { + "epoch": 0.09, + "learning_rate": 8.945686900958468e-06, + "loss": 10.7794, + "step": 140 + }, + { + "epoch": 0.1, + "learning_rate": 9.584664536741214e-06, + "loss": 10.7876, + "step": 150 + }, + { + "epoch": 0.1, + "learning_rate": 1.0223642172523962e-05, + "loss": 10.7846, + "step": 160 + }, + { + "epoch": 0.11, + "learning_rate": 1.0862619808306708e-05, + "loss": 10.7546, + "step": 170 + }, + { + "epoch": 0.11, + "learning_rate": 1.1501597444089457e-05, + "loss": 10.6649, + "step": 180 + }, + { + "epoch": 0.12, + "learning_rate": 1.2140575079872205e-05, + "loss": 10.6765, + "step": 190 + }, + { + "epoch": 0.13, + "learning_rate": 1.2779552715654951e-05, + "loss": 10.6211, + "step": 200 + }, + { + "epoch": 0.13, + "learning_rate": 1.3418530351437701e-05, + "loss": 10.6008, + "step": 210 + }, + { + "epoch": 0.14, + "learning_rate": 1.4057507987220449e-05, + "loss": 10.5362, + "step": 220 + }, + { + "epoch": 0.15, + "learning_rate": 1.4696485623003195e-05, + "loss": 10.4825, + "step": 230 + }, + { + "epoch": 0.15, + "learning_rate": 1.5335463258785944e-05, + "loss": 10.449, + "step": 240 + }, + { + "epoch": 0.16, + "learning_rate": 1.597444089456869e-05, + "loss": 10.3695, + "step": 250 + }, + { + "epoch": 0.17, + "learning_rate": 1.6613418530351437e-05, + "loss": 10.3128, + "step": 260 + }, + { + "epoch": 0.17, + "learning_rate": 1.7252396166134186e-05, + "loss": 10.221, + "step": 270 + }, + { + "epoch": 0.18, + "learning_rate": 1.7891373801916936e-05, + "loss": 10.1125, + "step": 280 + }, + { + "epoch": 0.19, + "learning_rate": 1.853035143769968e-05, + "loss": 9.9933, + "step": 290 + }, + { + "epoch": 0.19, + "learning_rate": 1.9169329073482428e-05, + "loss": 9.7903, + "step": 300 + }, + { + "epoch": 0.2, + "learning_rate": 1.9808306709265177e-05, + "loss": 9.5898, + "step": 310 + }, + { + "epoch": 0.2, + "learning_rate": 2.0447284345047924e-05, + "loss": 9.4338, + "step": 320 + }, + { + "epoch": 0.21, + "learning_rate": 2.1086261980830673e-05, + "loss": 9.2167, + "step": 330 + }, + { + "epoch": 0.22, + "learning_rate": 2.1725239616613416e-05, + "loss": 8.9299, + "step": 340 + }, + { + "epoch": 0.22, + "learning_rate": 2.2364217252396165e-05, + "loss": 8.5451, + "step": 350 + }, + { + "epoch": 0.23, + "learning_rate": 2.3003194888178915e-05, + "loss": 8.377, + "step": 360 + }, + { + "epoch": 0.24, + "learning_rate": 2.364217252396166e-05, + "loss": 8.1605, + "step": 370 + }, + { + "epoch": 0.24, + "learning_rate": 2.428115015974441e-05, + "loss": 7.8136, + "step": 380 + }, + { + "epoch": 0.25, + "learning_rate": 2.4920127795527157e-05, + "loss": 7.4985, + "step": 390 + }, + { + "epoch": 0.26, + "learning_rate": 2.5559105431309903e-05, + "loss": 7.1695, + "step": 400 + }, + { + "epoch": 0.26, + "learning_rate": 2.6198083067092652e-05, + "loss": 6.9816, + "step": 410 + }, + { + "epoch": 0.27, + "learning_rate": 2.6837060702875402e-05, + "loss": 6.4929, + "step": 420 + }, + { + "epoch": 0.27, + "learning_rate": 2.7476038338658148e-05, + "loss": 6.3582, + "step": 430 + }, + { + "epoch": 0.28, + "learning_rate": 2.8115015974440897e-05, + "loss": 6.1947, + "step": 440 + }, + { + "epoch": 0.29, + "learning_rate": 2.875399361022364e-05, + "loss": 5.8765, + "step": 450 + }, + { + "epoch": 0.29, + "learning_rate": 2.939297124600639e-05, + "loss": 5.6917, + "step": 460 + }, + { + "epoch": 0.3, + "learning_rate": 3.003194888178914e-05, + "loss": 5.5349, + "step": 470 + }, + { + "epoch": 0.31, + "learning_rate": 3.067092651757189e-05, + "loss": 5.4405, + "step": 480 + }, + { + "epoch": 0.31, + "learning_rate": 3.130990415335463e-05, + "loss": 5.2719, + "step": 490 + }, + { + "epoch": 0.32, + "learning_rate": 3.194888178913738e-05, + "loss": 4.8337, + "step": 500 + }, + { + "epoch": 0.33, + "learning_rate": 3.258785942492013e-05, + "loss": 4.8143, + "step": 510 + }, + { + "epoch": 0.33, + "learning_rate": 3.322683706070287e-05, + "loss": 4.5746, + "step": 520 + }, + { + "epoch": 0.34, + "learning_rate": 3.386581469648562e-05, + "loss": 4.348, + "step": 530 + }, + { + "epoch": 0.34, + "learning_rate": 3.450479233226837e-05, + "loss": 4.1607, + "step": 540 + }, + { + "epoch": 0.35, + "learning_rate": 3.5143769968051115e-05, + "loss": 4.0622, + "step": 550 + }, + { + "epoch": 0.36, + "learning_rate": 3.578274760383387e-05, + "loss": 3.9419, + "step": 560 + }, + { + "epoch": 0.36, + "learning_rate": 3.6421725239616614e-05, + "loss": 3.8112, + "step": 570 + }, + { + "epoch": 0.37, + "learning_rate": 3.706070287539936e-05, + "loss": 3.6399, + "step": 580 + }, + { + "epoch": 0.38, + "learning_rate": 3.769968051118211e-05, + "loss": 3.5703, + "step": 590 + }, + { + "epoch": 0.38, + "learning_rate": 3.8338658146964856e-05, + "loss": 3.4017, + "step": 600 + }, + { + "epoch": 0.39, + "learning_rate": 3.8977635782747605e-05, + "loss": 3.2633, + "step": 610 + }, + { + "epoch": 0.4, + "learning_rate": 3.9616613418530355e-05, + "loss": 3.0287, + "step": 620 + }, + { + "epoch": 0.4, + "learning_rate": 4.02555910543131e-05, + "loss": 3.0033, + "step": 630 + }, + { + "epoch": 0.41, + "learning_rate": 4.089456869009585e-05, + "loss": 3.0306, + "step": 640 + }, + { + "epoch": 0.42, + "learning_rate": 4.15335463258786e-05, + "loss": 2.8719, + "step": 650 + }, + { + "epoch": 0.42, + "learning_rate": 4.2172523961661346e-05, + "loss": 2.6886, + "step": 660 + }, + { + "epoch": 0.43, + "learning_rate": 4.281150159744409e-05, + "loss": 2.5607, + "step": 670 + }, + { + "epoch": 0.43, + "learning_rate": 4.345047923322683e-05, + "loss": 2.4779, + "step": 680 + }, + { + "epoch": 0.44, + "learning_rate": 4.408945686900959e-05, + "loss": 2.4884, + "step": 690 + }, + { + "epoch": 0.45, + "learning_rate": 4.472843450479233e-05, + "loss": 2.3248, + "step": 700 + }, + { + "epoch": 0.45, + "learning_rate": 4.536741214057508e-05, + "loss": 2.3108, + "step": 710 + }, + { + "epoch": 0.46, + "learning_rate": 4.600638977635783e-05, + "loss": 2.2866, + "step": 720 + }, + { + "epoch": 0.47, + "learning_rate": 4.664536741214057e-05, + "loss": 2.1556, + "step": 730 + }, + { + "epoch": 0.47, + "learning_rate": 4.728434504792332e-05, + "loss": 2.0662, + "step": 740 + }, + { + "epoch": 0.48, + "learning_rate": 4.792332268370607e-05, + "loss": 2.1048, + "step": 750 + }, + { + "epoch": 0.49, + "learning_rate": 4.856230031948882e-05, + "loss": 1.9392, + "step": 760 + }, + { + "epoch": 0.49, + "learning_rate": 4.9201277955271564e-05, + "loss": 1.9819, + "step": 770 + }, + { + "epoch": 0.5, + "learning_rate": 4.984025559105431e-05, + "loss": 1.9099, + "step": 780 + }, + { + "epoch": 0.5, + "learning_rate": 5.047923322683706e-05, + "loss": 1.8879, + "step": 790 + }, + { + "epoch": 0.51, + "learning_rate": 5.1118210862619806e-05, + "loss": 1.8877, + "step": 800 + }, + { + "epoch": 0.52, + "learning_rate": 5.175718849840256e-05, + "loss": 1.8067, + "step": 810 + }, + { + "epoch": 0.52, + "learning_rate": 5.2396166134185305e-05, + "loss": 1.74, + "step": 820 + }, + { + "epoch": 0.53, + "learning_rate": 5.3035143769968054e-05, + "loss": 1.705, + "step": 830 + }, + { + "epoch": 0.54, + "learning_rate": 5.3674121405750804e-05, + "loss": 1.5959, + "step": 840 + }, + { + "epoch": 0.54, + "learning_rate": 5.4313099041533546e-05, + "loss": 1.6158, + "step": 850 + }, + { + "epoch": 0.55, + "learning_rate": 5.4952076677316296e-05, + "loss": 1.6574, + "step": 860 + }, + { + "epoch": 0.56, + "learning_rate": 5.559105431309904e-05, + "loss": 1.5606, + "step": 870 + }, + { + "epoch": 0.56, + "learning_rate": 5.6230031948881795e-05, + "loss": 1.5595, + "step": 880 + }, + { + "epoch": 0.57, + "learning_rate": 5.686900958466454e-05, + "loss": 1.6025, + "step": 890 + }, + { + "epoch": 0.57, + "learning_rate": 5.750798722044728e-05, + "loss": 1.6317, + "step": 900 + }, + { + "epoch": 0.58, + "learning_rate": 5.814696485623004e-05, + "loss": 1.4737, + "step": 910 + }, + { + "epoch": 0.59, + "learning_rate": 5.878594249201278e-05, + "loss": 1.5243, + "step": 920 + }, + { + "epoch": 0.59, + "learning_rate": 5.942492012779553e-05, + "loss": 1.4923, + "step": 930 + }, + { + "epoch": 0.6, + "learning_rate": 6.006389776357828e-05, + "loss": 1.3972, + "step": 940 + }, + { + "epoch": 0.61, + "learning_rate": 6.070287539936102e-05, + "loss": 1.4433, + "step": 950 + }, + { + "epoch": 0.61, + "learning_rate": 6.134185303514378e-05, + "loss": 1.4677, + "step": 960 + }, + { + "epoch": 0.62, + "learning_rate": 6.198083067092652e-05, + "loss": 1.3918, + "step": 970 + }, + { + "epoch": 0.63, + "learning_rate": 6.261980830670926e-05, + "loss": 1.2902, + "step": 980 + }, + { + "epoch": 0.63, + "learning_rate": 6.325878594249202e-05, + "loss": 1.3005, + "step": 990 + }, + { + "epoch": 0.64, + "learning_rate": 6.389776357827476e-05, + "loss": 1.2544, + "step": 1000 + }, + { + "epoch": 0.65, + "learning_rate": 6.45367412140575e-05, + "loss": 1.2713, + "step": 1010 + }, + { + "epoch": 0.65, + "learning_rate": 6.517571884984026e-05, + "loss": 1.3027, + "step": 1020 + }, + { + "epoch": 0.66, + "learning_rate": 6.5814696485623e-05, + "loss": 1.2434, + "step": 1030 + }, + { + "epoch": 0.66, + "learning_rate": 6.645367412140575e-05, + "loss": 1.2283, + "step": 1040 + }, + { + "epoch": 0.67, + "learning_rate": 6.70926517571885e-05, + "loss": 1.2587, + "step": 1050 + }, + { + "epoch": 0.68, + "learning_rate": 6.773162939297125e-05, + "loss": 1.1828, + "step": 1060 + }, + { + "epoch": 0.68, + "learning_rate": 6.8370607028754e-05, + "loss": 1.1369, + "step": 1070 + }, + { + "epoch": 0.69, + "learning_rate": 6.900958466453674e-05, + "loss": 1.1307, + "step": 1080 + }, + { + "epoch": 0.7, + "learning_rate": 6.964856230031949e-05, + "loss": 1.1675, + "step": 1090 + }, + { + "epoch": 0.7, + "learning_rate": 7.028753993610223e-05, + "loss": 1.1872, + "step": 1100 + }, + { + "epoch": 0.71, + "learning_rate": 7.092651757188499e-05, + "loss": 1.1707, + "step": 1110 + }, + { + "epoch": 0.72, + "learning_rate": 7.156549520766774e-05, + "loss": 1.19, + "step": 1120 + }, + { + "epoch": 0.72, + "learning_rate": 7.220447284345049e-05, + "loss": 1.1268, + "step": 1130 + }, + { + "epoch": 0.73, + "learning_rate": 7.284345047923323e-05, + "loss": 1.0189, + "step": 1140 + }, + { + "epoch": 0.73, + "learning_rate": 7.348242811501597e-05, + "loss": 1.0764, + "step": 1150 + }, + { + "epoch": 0.74, + "learning_rate": 7.412140575079871e-05, + "loss": 1.073, + "step": 1160 + }, + { + "epoch": 0.75, + "learning_rate": 7.476038338658147e-05, + "loss": 0.9837, + "step": 1170 + }, + { + "epoch": 0.75, + "learning_rate": 7.539936102236423e-05, + "loss": 1.014, + "step": 1180 + }, + { + "epoch": 0.76, + "learning_rate": 7.603833865814697e-05, + "loss": 1.0643, + "step": 1190 + }, + { + "epoch": 0.77, + "learning_rate": 7.667731629392971e-05, + "loss": 1.0439, + "step": 1200 + }, + { + "epoch": 0.77, + "learning_rate": 7.731629392971245e-05, + "loss": 1.0419, + "step": 1210 + }, + { + "epoch": 0.78, + "learning_rate": 7.795527156549521e-05, + "loss": 1.01, + "step": 1220 + }, + { + "epoch": 0.79, + "learning_rate": 7.859424920127795e-05, + "loss": 0.9829, + "step": 1230 + }, + { + "epoch": 0.79, + "learning_rate": 7.923322683706071e-05, + "loss": 1.0113, + "step": 1240 + }, + { + "epoch": 0.8, + "learning_rate": 7.987220447284345e-05, + "loss": 0.9818, + "step": 1250 + }, + { + "epoch": 0.8, + "learning_rate": 8.05111821086262e-05, + "loss": 0.9299, + "step": 1260 + }, + { + "epoch": 0.81, + "learning_rate": 8.115015974440895e-05, + "loss": 0.9891, + "step": 1270 + }, + { + "epoch": 0.82, + "learning_rate": 8.17891373801917e-05, + "loss": 0.9429, + "step": 1280 + }, + { + "epoch": 0.82, + "learning_rate": 8.242811501597444e-05, + "loss": 0.8778, + "step": 1290 + }, + { + "epoch": 0.83, + "learning_rate": 8.30670926517572e-05, + "loss": 0.8918, + "step": 1300 + }, + { + "epoch": 0.84, + "learning_rate": 8.370607028753994e-05, + "loss": 0.8873, + "step": 1310 + }, + { + "epoch": 0.84, + "learning_rate": 8.434504792332269e-05, + "loss": 0.9026, + "step": 1320 + }, + { + "epoch": 0.85, + "learning_rate": 8.498402555910544e-05, + "loss": 0.9084, + "step": 1330 + }, + { + "epoch": 0.86, + "learning_rate": 8.562300319488818e-05, + "loss": 0.9513, + "step": 1340 + }, + { + "epoch": 0.86, + "learning_rate": 8.626198083067092e-05, + "loss": 0.8507, + "step": 1350 + }, + { + "epoch": 0.87, + "learning_rate": 8.690095846645366e-05, + "loss": 0.9136, + "step": 1360 + }, + { + "epoch": 0.88, + "learning_rate": 8.753993610223643e-05, + "loss": 0.8661, + "step": 1370 + }, + { + "epoch": 0.88, + "learning_rate": 8.817891373801918e-05, + "loss": 0.8559, + "step": 1380 + }, + { + "epoch": 0.89, + "learning_rate": 8.881789137380192e-05, + "loss": 0.8322, + "step": 1390 + }, + { + "epoch": 0.89, + "learning_rate": 8.945686900958466e-05, + "loss": 0.829, + "step": 1400 + }, + { + "epoch": 0.9, + "learning_rate": 9.00958466453674e-05, + "loss": 0.9206, + "step": 1410 + }, + { + "epoch": 0.91, + "learning_rate": 9.073482428115016e-05, + "loss": 0.788, + "step": 1420 + }, + { + "epoch": 0.91, + "learning_rate": 9.137380191693292e-05, + "loss": 0.8313, + "step": 1430 + }, + { + "epoch": 0.92, + "learning_rate": 9.201277955271566e-05, + "loss": 0.8072, + "step": 1440 + }, + { + "epoch": 0.93, + "learning_rate": 9.26517571884984e-05, + "loss": 0.8115, + "step": 1450 + }, + { + "epoch": 0.93, + "learning_rate": 9.329073482428115e-05, + "loss": 0.7849, + "step": 1460 + }, + { + "epoch": 0.94, + "learning_rate": 9.39297124600639e-05, + "loss": 0.8003, + "step": 1470 + }, + { + "epoch": 0.95, + "learning_rate": 9.456869009584664e-05, + "loss": 0.7835, + "step": 1480 + }, + { + "epoch": 0.95, + "learning_rate": 9.52076677316294e-05, + "loss": 0.8598, + "step": 1490 + }, + { + "epoch": 0.96, + "learning_rate": 9.584664536741214e-05, + "loss": 0.746, + "step": 1500 + }, + { + "epoch": 0.96, + "learning_rate": 9.648562300319489e-05, + "loss": 0.8485, + "step": 1510 + }, + { + "epoch": 0.97, + "learning_rate": 9.712460063897764e-05, + "loss": 0.7976, + "step": 1520 + }, + { + "epoch": 0.98, + "learning_rate": 9.776357827476038e-05, + "loss": 0.7023, + "step": 1530 + }, + { + "epoch": 0.98, + "learning_rate": 9.840255591054313e-05, + "loss": 0.7586, + "step": 1540 + }, + { + "epoch": 0.99, + "learning_rate": 9.904153354632587e-05, + "loss": 0.771, + "step": 1550 + }, + { + "epoch": 1.0, + "learning_rate": 9.968051118210863e-05, + "loss": 0.7551, + "step": 1560 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010031948881789138, + "loss": 0.7311, + "step": 1570 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010095846645367413, + "loss": 0.7774, + "step": 1580 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010159744408945687, + "loss": 0.7289, + "step": 1590 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010223642172523961, + "loss": 0.7326, + "step": 1600 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010287539936102237, + "loss": 0.7089, + "step": 1610 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010351437699680512, + "loss": 0.6825, + "step": 1620 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010415335463258787, + "loss": 0.7002, + "step": 1630 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010479233226837061, + "loss": 0.6959, + "step": 1640 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010543130990415335, + "loss": 0.713, + "step": 1650 + }, + { + "epoch": 1.06, + "learning_rate": 0.00010607028753993611, + "loss": 0.6884, + "step": 1660 + }, + { + "epoch": 1.07, + "learning_rate": 0.00010670926517571885, + "loss": 0.6914, + "step": 1670 + }, + { + "epoch": 1.07, + "learning_rate": 0.00010734824281150161, + "loss": 0.6628, + "step": 1680 + }, + { + "epoch": 1.08, + "learning_rate": 0.00010798722044728435, + "loss": 0.7524, + "step": 1690 + }, + { + "epoch": 1.09, + "learning_rate": 0.00010862619808306709, + "loss": 0.6824, + "step": 1700 + }, + { + "epoch": 1.09, + "learning_rate": 0.00010926517571884985, + "loss": 0.6922, + "step": 1710 + }, + { + "epoch": 1.1, + "learning_rate": 0.00010990415335463259, + "loss": 0.6298, + "step": 1720 + }, + { + "epoch": 1.11, + "learning_rate": 0.00011054313099041533, + "loss": 0.6545, + "step": 1730 + }, + { + "epoch": 1.11, + "learning_rate": 0.00011118210862619808, + "loss": 0.6842, + "step": 1740 + }, + { + "epoch": 1.12, + "learning_rate": 0.00011182108626198083, + "loss": 0.6909, + "step": 1750 + }, + { + "epoch": 1.12, + "learning_rate": 0.00011246006389776359, + "loss": 0.687, + "step": 1760 + }, + { + "epoch": 1.13, + "learning_rate": 0.00011309904153354633, + "loss": 0.6096, + "step": 1770 + }, + { + "epoch": 1.14, + "learning_rate": 0.00011373801916932908, + "loss": 0.6599, + "step": 1780 + }, + { + "epoch": 1.14, + "learning_rate": 0.00011437699680511182, + "loss": 0.7088, + "step": 1790 + }, + { + "epoch": 1.15, + "learning_rate": 0.00011501597444089456, + "loss": 0.7042, + "step": 1800 + }, + { + "epoch": 1.16, + "learning_rate": 0.00011565495207667733, + "loss": 0.6482, + "step": 1810 + }, + { + "epoch": 1.16, + "learning_rate": 0.00011629392971246007, + "loss": 0.6221, + "step": 1820 + }, + { + "epoch": 1.17, + "learning_rate": 0.00011693290734824282, + "loss": 0.6134, + "step": 1830 + }, + { + "epoch": 1.18, + "learning_rate": 0.00011757188498402556, + "loss": 0.5959, + "step": 1840 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001182108626198083, + "loss": 0.6201, + "step": 1850 + }, + { + "epoch": 1.19, + "learning_rate": 0.00011884984025559106, + "loss": 0.6298, + "step": 1860 + }, + { + "epoch": 1.19, + "learning_rate": 0.00011948881789137381, + "loss": 0.5908, + "step": 1870 + }, + { + "epoch": 1.2, + "learning_rate": 0.00012012779552715656, + "loss": 0.5862, + "step": 1880 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001207667731629393, + "loss": 0.6449, + "step": 1890 + }, + { + "epoch": 1.21, + "learning_rate": 0.00012140575079872204, + "loss": 0.5922, + "step": 1900 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001220447284345048, + "loss": 0.604, + "step": 1910 + }, + { + "epoch": 1.23, + "learning_rate": 0.00012268370607028756, + "loss": 0.6396, + "step": 1920 + }, + { + "epoch": 1.23, + "learning_rate": 0.00012332268370607028, + "loss": 0.6068, + "step": 1930 + }, + { + "epoch": 1.24, + "learning_rate": 0.00012396166134185304, + "loss": 0.6454, + "step": 1940 + }, + { + "epoch": 1.25, + "learning_rate": 0.00012460063897763577, + "loss": 0.5194, + "step": 1950 + }, + { + "epoch": 1.25, + "learning_rate": 0.00012523961661341853, + "loss": 0.5865, + "step": 1960 + }, + { + "epoch": 1.26, + "learning_rate": 0.00012587859424920128, + "loss": 0.5784, + "step": 1970 + }, + { + "epoch": 1.27, + "learning_rate": 0.00012651757188498404, + "loss": 0.559, + "step": 1980 + }, + { + "epoch": 1.27, + "learning_rate": 0.00012715654952076677, + "loss": 0.5501, + "step": 1990 + }, + { + "epoch": 1.28, + "learning_rate": 0.00012779552715654952, + "loss": 0.6316, + "step": 2000 + }, + { + "epoch": 1.28, + "learning_rate": 0.00012843450479233225, + "loss": 0.593, + "step": 2010 + }, + { + "epoch": 1.29, + "learning_rate": 0.000129073482428115, + "loss": 0.5946, + "step": 2020 + }, + { + "epoch": 1.3, + "learning_rate": 0.00012971246006389777, + "loss": 0.5813, + "step": 2030 + }, + { + "epoch": 1.3, + "learning_rate": 0.00013035143769968052, + "loss": 0.5541, + "step": 2040 + }, + { + "epoch": 1.31, + "learning_rate": 0.00013099041533546328, + "loss": 0.5761, + "step": 2050 + }, + { + "epoch": 1.32, + "learning_rate": 0.000131629392971246, + "loss": 0.557, + "step": 2060 + }, + { + "epoch": 1.32, + "learning_rate": 0.00013226837060702876, + "loss": 0.5663, + "step": 2070 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001329073482428115, + "loss": 0.5496, + "step": 2080 + }, + { + "epoch": 1.34, + "learning_rate": 0.00013354632587859425, + "loss": 0.5291, + "step": 2090 + }, + { + "epoch": 1.34, + "learning_rate": 0.000134185303514377, + "loss": 0.565, + "step": 2100 + }, + { + "epoch": 1.35, + "learning_rate": 0.00013482428115015973, + "loss": 0.5763, + "step": 2110 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001354632587859425, + "loss": 0.6175, + "step": 2120 + }, + { + "epoch": 1.36, + "learning_rate": 0.00013610223642172525, + "loss": 0.5378, + "step": 2130 + }, + { + "epoch": 1.37, + "learning_rate": 0.000136741214057508, + "loss": 0.574, + "step": 2140 + }, + { + "epoch": 1.37, + "learning_rate": 0.00013738019169329073, + "loss": 0.561, + "step": 2150 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001380191693290735, + "loss": 0.5277, + "step": 2160 + }, + { + "epoch": 1.39, + "learning_rate": 0.00013865814696485625, + "loss": 0.5662, + "step": 2170 + }, + { + "epoch": 1.39, + "learning_rate": 0.00013929712460063897, + "loss": 0.526, + "step": 2180 + }, + { + "epoch": 1.4, + "learning_rate": 0.00013993610223642173, + "loss": 0.4969, + "step": 2190 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014057507987220446, + "loss": 0.5275, + "step": 2200 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014121405750798722, + "loss": 0.553, + "step": 2210 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014185303514376997, + "loss": 0.5598, + "step": 2220 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014249201277955273, + "loss": 0.5127, + "step": 2230 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014313099041533549, + "loss": 0.5606, + "step": 2240 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014376996805111821, + "loss": 0.5499, + "step": 2250 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014440894568690097, + "loss": 0.5118, + "step": 2260 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001450479233226837, + "loss": 0.5638, + "step": 2270 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014568690095846646, + "loss": 0.5235, + "step": 2280 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001463258785942492, + "loss": 0.5114, + "step": 2290 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014696485623003194, + "loss": 0.5375, + "step": 2300 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001476038338658147, + "loss": 0.5737, + "step": 2310 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014824281150159743, + "loss": 0.541, + "step": 2320 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001488817891373802, + "loss": 0.5112, + "step": 2330 + }, + { + "epoch": 1.5, + "learning_rate": 0.00014952076677316294, + "loss": 0.4936, + "step": 2340 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001501597444089457, + "loss": 0.515, + "step": 2350 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015079872204472845, + "loss": 0.507, + "step": 2360 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015143769968051118, + "loss": 0.4934, + "step": 2370 + }, + { + "epoch": 1.52, + "learning_rate": 0.00015207667731629394, + "loss": 0.5081, + "step": 2380 + }, + { + "epoch": 1.53, + "learning_rate": 0.00015271565495207667, + "loss": 0.4726, + "step": 2390 + }, + { + "epoch": 1.53, + "learning_rate": 0.00015335463258785942, + "loss": 0.5149, + "step": 2400 + }, + { + "epoch": 1.54, + "learning_rate": 0.00015399361022364218, + "loss": 0.4544, + "step": 2410 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001546325878594249, + "loss": 0.5026, + "step": 2420 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001552715654952077, + "loss": 0.4987, + "step": 2430 + }, + { + "epoch": 1.56, + "learning_rate": 0.00015591054313099042, + "loss": 0.4715, + "step": 2440 + }, + { + "epoch": 1.57, + "learning_rate": 0.00015654952076677318, + "loss": 0.5137, + "step": 2450 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001571884984025559, + "loss": 0.4659, + "step": 2460 + }, + { + "epoch": 1.58, + "learning_rate": 0.00015782747603833866, + "loss": 0.5293, + "step": 2470 + }, + { + "epoch": 1.58, + "learning_rate": 0.00015846645367412142, + "loss": 0.4848, + "step": 2480 + }, + { + "epoch": 1.59, + "learning_rate": 0.00015910543130990415, + "loss": 0.5464, + "step": 2490 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001597444089456869, + "loss": 0.4918, + "step": 2500 + }, + { + "epoch": 1.6, + "learning_rate": 0.00016038338658146963, + "loss": 0.4821, + "step": 2510 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001610223642172524, + "loss": 0.4464, + "step": 2520 + }, + { + "epoch": 1.62, + "learning_rate": 0.00016166134185303515, + "loss": 0.5041, + "step": 2530 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001623003194888179, + "loss": 0.5254, + "step": 2540 + }, + { + "epoch": 1.63, + "learning_rate": 0.00016293929712460066, + "loss": 0.4722, + "step": 2550 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001635782747603834, + "loss": 0.5127, + "step": 2560 + }, + { + "epoch": 1.64, + "learning_rate": 0.00016421725239616614, + "loss": 0.4978, + "step": 2570 + }, + { + "epoch": 1.65, + "learning_rate": 0.00016485623003194887, + "loss": 0.5174, + "step": 2580 + }, + { + "epoch": 1.65, + "learning_rate": 0.00016549520766773163, + "loss": 0.528, + "step": 2590 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001661341853035144, + "loss": 0.5612, + "step": 2600 + }, + { + "epoch": 1.67, + "learning_rate": 0.00016677316293929712, + "loss": 0.4953, + "step": 2610 + }, + { + "epoch": 1.67, + "learning_rate": 0.00016741214057507987, + "loss": 0.475, + "step": 2620 + }, + { + "epoch": 1.68, + "learning_rate": 0.00016805111821086263, + "loss": 0.476, + "step": 2630 + }, + { + "epoch": 1.69, + "learning_rate": 0.00016869009584664538, + "loss": 0.4799, + "step": 2640 + }, + { + "epoch": 1.69, + "learning_rate": 0.00016932907348242811, + "loss": 0.4838, + "step": 2650 + }, + { + "epoch": 1.7, + "learning_rate": 0.00016996805111821087, + "loss": 0.5215, + "step": 2660 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017060702875399363, + "loss": 0.4927, + "step": 2670 + }, + { + "epoch": 1.71, + "learning_rate": 0.00017124600638977636, + "loss": 0.4973, + "step": 2680 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001718849840255591, + "loss": 0.4703, + "step": 2690 + }, + { + "epoch": 1.73, + "learning_rate": 0.00017252396166134184, + "loss": 0.5073, + "step": 2700 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001731629392971246, + "loss": 0.4445, + "step": 2710 + }, + { + "epoch": 1.74, + "learning_rate": 0.00017380191693290733, + "loss": 0.463, + "step": 2720 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001744408945686901, + "loss": 0.4289, + "step": 2730 + }, + { + "epoch": 1.75, + "learning_rate": 0.00017507987220447287, + "loss": 0.4378, + "step": 2740 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001757188498402556, + "loss": 0.4408, + "step": 2750 + }, + { + "epoch": 1.76, + "learning_rate": 0.00017635782747603835, + "loss": 0.518, + "step": 2760 + }, + { + "epoch": 1.77, + "learning_rate": 0.00017699680511182108, + "loss": 0.4583, + "step": 2770 + }, + { + "epoch": 1.78, + "learning_rate": 0.00017763578274760384, + "loss": 0.4415, + "step": 2780 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001782747603833866, + "loss": 0.4543, + "step": 2790 + }, + { + "epoch": 1.79, + "learning_rate": 0.00017891373801916932, + "loss": 0.4353, + "step": 2800 + }, + { + "epoch": 1.8, + "learning_rate": 0.00017955271565495208, + "loss": 0.5315, + "step": 2810 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001801916932907348, + "loss": 0.468, + "step": 2820 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001808306709265176, + "loss": 0.4915, + "step": 2830 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018146964856230032, + "loss": 0.4684, + "step": 2840 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018210862619808308, + "loss": 0.4367, + "step": 2850 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018274760383386583, + "loss": 0.3751, + "step": 2860 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018338658146964856, + "loss": 0.4347, + "step": 2870 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018402555910543132, + "loss": 0.4611, + "step": 2880 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018466453674121405, + "loss": 0.4425, + "step": 2890 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001853035143769968, + "loss": 0.4483, + "step": 2900 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018594249201277953, + "loss": 0.4603, + "step": 2910 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001865814696485623, + "loss": 0.4369, + "step": 2920 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018722044728434507, + "loss": 0.4521, + "step": 2930 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001878594249201278, + "loss": 0.4239, + "step": 2940 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018849840255591056, + "loss": 0.3795, + "step": 2950 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001891373801916933, + "loss": 0.477, + "step": 2960 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018977635782747604, + "loss": 0.4273, + "step": 2970 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001904153354632588, + "loss": 0.4786, + "step": 2980 + }, + { + "epoch": 1.91, + "learning_rate": 0.00019105431309904153, + "loss": 0.4803, + "step": 2990 + }, + { + "epoch": 1.92, + "learning_rate": 0.00019169329073482429, + "loss": 0.4497, + "step": 3000 + }, + { + "epoch": 1.92, + "learning_rate": 0.00019233226837060702, + "loss": 0.4749, + "step": 3010 + }, + { + "epoch": 1.93, + "learning_rate": 0.00019297124600638977, + "loss": 0.4231, + "step": 3020 + }, + { + "epoch": 1.94, + "learning_rate": 0.00019361022364217253, + "loss": 0.4618, + "step": 3030 + }, + { + "epoch": 1.94, + "learning_rate": 0.00019424920127795528, + "loss": 0.4265, + "step": 3040 + }, + { + "epoch": 1.95, + "learning_rate": 0.00019488817891373804, + "loss": 0.39, + "step": 3050 + }, + { + "epoch": 1.95, + "learning_rate": 0.00019552715654952077, + "loss": 0.4346, + "step": 3060 + }, + { + "epoch": 1.96, + "learning_rate": 0.00019616613418530353, + "loss": 0.4079, + "step": 3070 + }, + { + "epoch": 1.97, + "learning_rate": 0.00019680511182108626, + "loss": 0.4351, + "step": 3080 + }, + { + "epoch": 1.97, + "learning_rate": 0.000197444089456869, + "loss": 0.4402, + "step": 3090 + }, + { + "epoch": 1.98, + "learning_rate": 0.00019808306709265174, + "loss": 0.4294, + "step": 3100 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001987220447284345, + "loss": 0.4079, + "step": 3110 + }, + { + "epoch": 1.99, + "learning_rate": 0.00019936102236421725, + "loss": 0.398, + "step": 3120 + }, + { + "epoch": 2.0, + "learning_rate": 0.0002, + "loss": 0.3759, + "step": 3130 + }, + { + "epoch": 2.01, + "learning_rate": 0.00020063897763578277, + "loss": 0.408, + "step": 3140 + }, + { + "epoch": 2.01, + "learning_rate": 0.0002012779552715655, + "loss": 0.4261, + "step": 3150 + }, + { + "epoch": 2.02, + "learning_rate": 0.00020191693290734825, + "loss": 0.3951, + "step": 3160 + }, + { + "epoch": 2.03, + "learning_rate": 0.000202555910543131, + "loss": 0.3488, + "step": 3170 + }, + { + "epoch": 2.03, + "learning_rate": 0.00020319488817891374, + "loss": 0.3529, + "step": 3180 + }, + { + "epoch": 2.04, + "learning_rate": 0.0002038338658146965, + "loss": 0.3806, + "step": 3190 + }, + { + "epoch": 2.04, + "learning_rate": 0.00020447284345047922, + "loss": 0.3641, + "step": 3200 + }, + { + "epoch": 2.05, + "learning_rate": 0.00020511182108626198, + "loss": 0.4043, + "step": 3210 + }, + { + "epoch": 2.06, + "learning_rate": 0.00020575079872204473, + "loss": 0.3598, + "step": 3220 + }, + { + "epoch": 2.06, + "learning_rate": 0.0002063897763578275, + "loss": 0.3858, + "step": 3230 + }, + { + "epoch": 2.07, + "learning_rate": 0.00020702875399361025, + "loss": 0.3666, + "step": 3240 + }, + { + "epoch": 2.08, + "learning_rate": 0.00020766773162939298, + "loss": 0.3702, + "step": 3250 + }, + { + "epoch": 2.08, + "learning_rate": 0.00020830670926517573, + "loss": 0.3782, + "step": 3260 + }, + { + "epoch": 2.09, + "learning_rate": 0.00020894568690095846, + "loss": 0.3782, + "step": 3270 + }, + { + "epoch": 2.1, + "learning_rate": 0.00020958466453674122, + "loss": 0.3474, + "step": 3280 + }, + { + "epoch": 2.1, + "learning_rate": 0.00021022364217252395, + "loss": 0.3645, + "step": 3290 + }, + { + "epoch": 2.11, + "learning_rate": 0.0002108626198083067, + "loss": 0.4034, + "step": 3300 + }, + { + "epoch": 2.11, + "learning_rate": 0.00021150159744408946, + "loss": 0.3892, + "step": 3310 + }, + { + "epoch": 2.12, + "learning_rate": 0.00021214057507987222, + "loss": 0.3843, + "step": 3320 + }, + { + "epoch": 2.13, + "learning_rate": 0.00021277955271565497, + "loss": 0.3667, + "step": 3330 + }, + { + "epoch": 2.13, + "learning_rate": 0.0002134185303514377, + "loss": 0.3954, + "step": 3340 + }, + { + "epoch": 2.14, + "learning_rate": 0.00021405750798722046, + "loss": 0.3733, + "step": 3350 + }, + { + "epoch": 2.15, + "learning_rate": 0.00021469648562300321, + "loss": 0.3801, + "step": 3360 + }, + { + "epoch": 2.15, + "learning_rate": 0.00021533546325878594, + "loss": 0.3813, + "step": 3370 + }, + { + "epoch": 2.16, + "learning_rate": 0.0002159744408945687, + "loss": 0.382, + "step": 3380 + }, + { + "epoch": 2.17, + "learning_rate": 0.00021661341853035143, + "loss": 0.3959, + "step": 3390 + }, + { + "epoch": 2.17, + "learning_rate": 0.00021725239616613419, + "loss": 0.338, + "step": 3400 + }, + { + "epoch": 2.18, + "learning_rate": 0.00021789137380191691, + "loss": 0.3884, + "step": 3410 + }, + { + "epoch": 2.19, + "learning_rate": 0.0002185303514376997, + "loss": 0.3583, + "step": 3420 + }, + { + "epoch": 2.19, + "learning_rate": 0.00021916932907348245, + "loss": 0.3224, + "step": 3430 + }, + { + "epoch": 2.2, + "learning_rate": 0.00021980830670926518, + "loss": 0.3587, + "step": 3440 + }, + { + "epoch": 2.2, + "learning_rate": 0.00022044728434504794, + "loss": 0.3493, + "step": 3450 + }, + { + "epoch": 2.21, + "learning_rate": 0.00022108626198083067, + "loss": 0.392, + "step": 3460 + }, + { + "epoch": 2.22, + "learning_rate": 0.00022172523961661343, + "loss": 0.3555, + "step": 3470 + }, + { + "epoch": 2.22, + "learning_rate": 0.00022236421725239615, + "loss": 0.4006, + "step": 3480 + }, + { + "epoch": 2.23, + "learning_rate": 0.0002230031948881789, + "loss": 0.3817, + "step": 3490 + }, + { + "epoch": 2.24, + "learning_rate": 0.00022364217252396167, + "loss": 0.3957, + "step": 3500 + }, + { + "epoch": 2.24, + "learning_rate": 0.0002242811501597444, + "loss": 0.3538, + "step": 3510 + }, + { + "epoch": 2.25, + "learning_rate": 0.00022492012779552718, + "loss": 0.3811, + "step": 3520 + }, + { + "epoch": 2.26, + "learning_rate": 0.0002255591054313099, + "loss": 0.3615, + "step": 3530 + }, + { + "epoch": 2.26, + "learning_rate": 0.00022619808306709267, + "loss": 0.3491, + "step": 3540 + }, + { + "epoch": 2.27, + "learning_rate": 0.00022683706070287542, + "loss": 0.3904, + "step": 3550 + }, + { + "epoch": 2.27, + "learning_rate": 0.00022747603833865815, + "loss": 0.3692, + "step": 3560 + }, + { + "epoch": 2.28, + "learning_rate": 0.0002281150159744409, + "loss": 0.3764, + "step": 3570 + }, + { + "epoch": 2.29, + "learning_rate": 0.00022875399361022364, + "loss": 0.3189, + "step": 3580 + }, + { + "epoch": 2.29, + "learning_rate": 0.0002293929712460064, + "loss": 0.3437, + "step": 3590 + }, + { + "epoch": 2.3, + "learning_rate": 0.00023003194888178912, + "loss": 0.38, + "step": 3600 + }, + { + "epoch": 2.31, + "learning_rate": 0.00023067092651757188, + "loss": 0.3694, + "step": 3610 + }, + { + "epoch": 2.31, + "learning_rate": 0.00023130990415335466, + "loss": 0.3297, + "step": 3620 + }, + { + "epoch": 2.32, + "learning_rate": 0.0002319488817891374, + "loss": 0.391, + "step": 3630 + }, + { + "epoch": 2.33, + "learning_rate": 0.00023258785942492015, + "loss": 0.3465, + "step": 3640 + }, + { + "epoch": 2.33, + "learning_rate": 0.00023322683706070288, + "loss": 0.3879, + "step": 3650 + }, + { + "epoch": 2.34, + "learning_rate": 0.00023386581469648563, + "loss": 0.3551, + "step": 3660 + }, + { + "epoch": 2.34, + "learning_rate": 0.00023450479233226836, + "loss": 0.3383, + "step": 3670 + }, + { + "epoch": 2.35, + "learning_rate": 0.00023514376996805112, + "loss": 0.3284, + "step": 3680 + }, + { + "epoch": 2.36, + "learning_rate": 0.00023578274760383387, + "loss": 0.3362, + "step": 3690 + }, + { + "epoch": 2.36, + "learning_rate": 0.0002364217252396166, + "loss": 0.349, + "step": 3700 + }, + { + "epoch": 2.37, + "learning_rate": 0.00023706070287539936, + "loss": 0.357, + "step": 3710 + }, + { + "epoch": 2.38, + "learning_rate": 0.00023769968051118212, + "loss": 0.3993, + "step": 3720 + }, + { + "epoch": 2.38, + "learning_rate": 0.00023833865814696487, + "loss": 0.3611, + "step": 3730 + }, + { + "epoch": 2.39, + "learning_rate": 0.00023897763578274763, + "loss": 0.3989, + "step": 3740 + }, + { + "epoch": 2.4, + "learning_rate": 0.00023961661341853036, + "loss": 0.3792, + "step": 3750 + }, + { + "epoch": 2.4, + "learning_rate": 0.00024025559105431311, + "loss": 0.3434, + "step": 3760 + }, + { + "epoch": 2.41, + "learning_rate": 0.00024089456869009584, + "loss": 0.3669, + "step": 3770 + }, + { + "epoch": 2.42, + "learning_rate": 0.0002415335463258786, + "loss": 0.3858, + "step": 3780 + }, + { + "epoch": 2.42, + "learning_rate": 0.00024217252396166133, + "loss": 0.3016, + "step": 3790 + }, + { + "epoch": 2.43, + "learning_rate": 0.00024281150159744408, + "loss": 0.3253, + "step": 3800 + }, + { + "epoch": 2.43, + "learning_rate": 0.00024345047923322684, + "loss": 0.3811, + "step": 3810 + }, + { + "epoch": 2.44, + "learning_rate": 0.0002440894568690096, + "loss": 0.338, + "step": 3820 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002447284345047923, + "loss": 0.3214, + "step": 3830 + }, + { + "epoch": 2.45, + "learning_rate": 0.0002453674121405751, + "loss": 0.3696, + "step": 3840 + }, + { + "epoch": 2.46, + "learning_rate": 0.00024600638977635784, + "loss": 0.3624, + "step": 3850 + }, + { + "epoch": 2.47, + "learning_rate": 0.00024664536741214057, + "loss": 0.3538, + "step": 3860 + }, + { + "epoch": 2.47, + "learning_rate": 0.00024728434504792335, + "loss": 0.372, + "step": 3870 + }, + { + "epoch": 2.48, + "learning_rate": 0.0002479233226837061, + "loss": 0.3884, + "step": 3880 + }, + { + "epoch": 2.49, + "learning_rate": 0.0002485623003194888, + "loss": 0.325, + "step": 3890 + }, + { + "epoch": 2.49, + "learning_rate": 0.00024920127795527154, + "loss": 0.3567, + "step": 3900 + }, + { + "epoch": 2.5, + "learning_rate": 0.0002498402555910543, + "loss": 0.379, + "step": 3910 + }, + { + "epoch": 2.5, + "learning_rate": 0.00025047923322683705, + "loss": 0.3542, + "step": 3920 + }, + { + "epoch": 2.51, + "learning_rate": 0.00025111821086261984, + "loss": 0.3739, + "step": 3930 + }, + { + "epoch": 2.52, + "learning_rate": 0.00025175718849840256, + "loss": 0.3657, + "step": 3940 + }, + { + "epoch": 2.52, + "learning_rate": 0.0002523961661341853, + "loss": 0.3499, + "step": 3950 + }, + { + "epoch": 2.53, + "learning_rate": 0.0002530351437699681, + "loss": 0.3428, + "step": 3960 + }, + { + "epoch": 2.54, + "learning_rate": 0.0002536741214057508, + "loss": 0.3369, + "step": 3970 + }, + { + "epoch": 2.54, + "learning_rate": 0.00025431309904153354, + "loss": 0.4055, + "step": 3980 + }, + { + "epoch": 2.55, + "learning_rate": 0.00025495207667731626, + "loss": 0.365, + "step": 3990 + }, + { + "epoch": 2.56, + "learning_rate": 0.00025559105431309905, + "loss": 0.3533, + "step": 4000 + }, + { + "epoch": 2.56, + "learning_rate": 0.0002562300319488818, + "loss": 0.3223, + "step": 4010 + }, + { + "epoch": 2.57, + "learning_rate": 0.0002568690095846645, + "loss": 0.3176, + "step": 4020 + }, + { + "epoch": 2.57, + "learning_rate": 0.0002575079872204473, + "loss": 0.3098, + "step": 4030 + }, + { + "epoch": 2.58, + "learning_rate": 0.00025814696485623, + "loss": 0.3792, + "step": 4040 + }, + { + "epoch": 2.59, + "learning_rate": 0.00025878594249201275, + "loss": 0.357, + "step": 4050 + }, + { + "epoch": 2.59, + "learning_rate": 0.00025942492012779553, + "loss": 0.3706, + "step": 4060 + }, + { + "epoch": 2.6, + "learning_rate": 0.0002600638977635783, + "loss": 0.346, + "step": 4070 + }, + { + "epoch": 2.61, + "learning_rate": 0.00026070287539936104, + "loss": 0.341, + "step": 4080 + }, + { + "epoch": 2.61, + "learning_rate": 0.0002613418530351438, + "loss": 0.3352, + "step": 4090 + }, + { + "epoch": 2.62, + "learning_rate": 0.00026198083067092656, + "loss": 0.3219, + "step": 4100 + }, + { + "epoch": 2.63, + "learning_rate": 0.0002626198083067093, + "loss": 0.3638, + "step": 4110 + }, + { + "epoch": 2.63, + "learning_rate": 0.000263258785942492, + "loss": 0.3356, + "step": 4120 + }, + { + "epoch": 2.64, + "learning_rate": 0.0002638977635782748, + "loss": 0.3309, + "step": 4130 + }, + { + "epoch": 2.65, + "learning_rate": 0.00026453674121405753, + "loss": 0.3649, + "step": 4140 + }, + { + "epoch": 2.65, + "learning_rate": 0.00026517571884984026, + "loss": 0.3334, + "step": 4150 + }, + { + "epoch": 2.66, + "learning_rate": 0.000265814696485623, + "loss": 0.3279, + "step": 4160 + }, + { + "epoch": 2.66, + "learning_rate": 0.00026645367412140577, + "loss": 0.3603, + "step": 4170 + }, + { + "epoch": 2.67, + "learning_rate": 0.0002670926517571885, + "loss": 0.3207, + "step": 4180 + }, + { + "epoch": 2.68, + "learning_rate": 0.00026773162939297123, + "loss": 0.2887, + "step": 4190 + }, + { + "epoch": 2.68, + "learning_rate": 0.000268370607028754, + "loss": 0.343, + "step": 4200 + }, + { + "epoch": 2.69, + "learning_rate": 0.00026900958466453674, + "loss": 0.3347, + "step": 4210 + }, + { + "epoch": 2.7, + "learning_rate": 0.00026964856230031947, + "loss": 0.3013, + "step": 4220 + }, + { + "epoch": 2.7, + "learning_rate": 0.0002702875399361022, + "loss": 0.3681, + "step": 4230 + }, + { + "epoch": 2.71, + "learning_rate": 0.000270926517571885, + "loss": 0.313, + "step": 4240 + }, + { + "epoch": 2.72, + "learning_rate": 0.0002715654952076677, + "loss": 0.3769, + "step": 4250 + }, + { + "epoch": 2.72, + "learning_rate": 0.0002722044728434505, + "loss": 0.346, + "step": 4260 + }, + { + "epoch": 2.73, + "learning_rate": 0.0002728434504792333, + "loss": 0.321, + "step": 4270 + }, + { + "epoch": 2.73, + "learning_rate": 0.000273482428115016, + "loss": 0.3783, + "step": 4280 + }, + { + "epoch": 2.74, + "learning_rate": 0.00027412140575079874, + "loss": 0.3621, + "step": 4290 + }, + { + "epoch": 2.75, + "learning_rate": 0.00027476038338658147, + "loss": 0.3509, + "step": 4300 + }, + { + "epoch": 2.75, + "learning_rate": 0.00027539936102236425, + "loss": 0.3836, + "step": 4310 + }, + { + "epoch": 2.76, + "learning_rate": 0.000276038338658147, + "loss": 0.3595, + "step": 4320 + }, + { + "epoch": 2.77, + "learning_rate": 0.0002766773162939297, + "loss": 0.3375, + "step": 4330 + }, + { + "epoch": 2.77, + "learning_rate": 0.0002773162939297125, + "loss": 0.3343, + "step": 4340 + }, + { + "epoch": 2.78, + "learning_rate": 0.0002779552715654952, + "loss": 0.3306, + "step": 4350 + }, + { + "epoch": 2.79, + "learning_rate": 0.00027859424920127795, + "loss": 0.3474, + "step": 4360 + }, + { + "epoch": 2.79, + "learning_rate": 0.0002792332268370607, + "loss": 0.3696, + "step": 4370 + }, + { + "epoch": 2.8, + "learning_rate": 0.00027987220447284346, + "loss": 0.35, + "step": 4380 + }, + { + "epoch": 2.8, + "learning_rate": 0.0002805111821086262, + "loss": 0.3887, + "step": 4390 + }, + { + "epoch": 2.81, + "learning_rate": 0.0002811501597444089, + "loss": 0.3449, + "step": 4400 + }, + { + "epoch": 2.82, + "learning_rate": 0.0002817891373801917, + "loss": 0.369, + "step": 4410 + }, + { + "epoch": 2.82, + "learning_rate": 0.00028242811501597443, + "loss": 0.373, + "step": 4420 + }, + { + "epoch": 2.83, + "learning_rate": 0.00028306709265175716, + "loss": 0.3299, + "step": 4430 + }, + { + "epoch": 2.84, + "learning_rate": 0.00028370607028753995, + "loss": 0.3233, + "step": 4440 + }, + { + "epoch": 2.84, + "learning_rate": 0.0002843450479233227, + "loss": 0.3629, + "step": 4450 + }, + { + "epoch": 2.85, + "learning_rate": 0.00028498402555910546, + "loss": 0.3576, + "step": 4460 + }, + { + "epoch": 2.86, + "learning_rate": 0.0002856230031948882, + "loss": 0.3053, + "step": 4470 + }, + { + "epoch": 2.86, + "learning_rate": 0.00028626198083067097, + "loss": 0.3185, + "step": 4480 + }, + { + "epoch": 2.87, + "learning_rate": 0.0002869009584664537, + "loss": 0.3423, + "step": 4490 + }, + { + "epoch": 2.88, + "learning_rate": 0.00028753993610223643, + "loss": 0.3727, + "step": 4500 + }, + { + "epoch": 2.88, + "learning_rate": 0.0002881789137380192, + "loss": 0.3121, + "step": 4510 + }, + { + "epoch": 2.89, + "learning_rate": 0.00028881789137380194, + "loss": 0.322, + "step": 4520 + }, + { + "epoch": 2.89, + "learning_rate": 0.00028945686900958467, + "loss": 0.3581, + "step": 4530 + }, + { + "epoch": 2.9, + "learning_rate": 0.0002900958466453674, + "loss": 0.339, + "step": 4540 + }, + { + "epoch": 2.91, + "learning_rate": 0.0002907348242811502, + "loss": 0.3226, + "step": 4550 + }, + { + "epoch": 2.91, + "learning_rate": 0.0002913738019169329, + "loss": 0.2947, + "step": 4560 + }, + { + "epoch": 2.92, + "learning_rate": 0.00029201277955271564, + "loss": 0.3572, + "step": 4570 + }, + { + "epoch": 2.93, + "learning_rate": 0.0002926517571884984, + "loss": 0.3029, + "step": 4580 + }, + { + "epoch": 2.93, + "learning_rate": 0.00029329073482428115, + "loss": 0.3149, + "step": 4590 + }, + { + "epoch": 2.94, + "learning_rate": 0.0002939297124600639, + "loss": 0.3521, + "step": 4600 + }, + { + "epoch": 2.95, + "learning_rate": 0.0002945686900958466, + "loss": 0.3207, + "step": 4610 + }, + { + "epoch": 2.95, + "learning_rate": 0.0002952076677316294, + "loss": 0.2926, + "step": 4620 + }, + { + "epoch": 2.96, + "learning_rate": 0.0002958466453674121, + "loss": 0.3318, + "step": 4630 + }, + { + "epoch": 2.96, + "learning_rate": 0.00029648562300319485, + "loss": 0.2907, + "step": 4640 + }, + { + "epoch": 2.97, + "learning_rate": 0.00029712460063897764, + "loss": 0.3876, + "step": 4650 + }, + { + "epoch": 2.98, + "learning_rate": 0.0002977635782747604, + "loss": 0.3375, + "step": 4660 + }, + { + "epoch": 2.98, + "learning_rate": 0.00029840255591054315, + "loss": 0.3327, + "step": 4670 + }, + { + "epoch": 2.99, + "learning_rate": 0.0002990415335463259, + "loss": 0.2991, + "step": 4680 + }, + { + "epoch": 3.0, + "learning_rate": 0.00029968051118210866, + "loss": 0.3027, + "step": 4690 + }, + { + "epoch": 3.0, + "learning_rate": 0.0003003194888178914, + "loss": 0.3189, + "step": 4700 + }, + { + "epoch": 3.01, + "learning_rate": 0.0003009584664536741, + "loss": 0.3073, + "step": 4710 + }, + { + "epoch": 3.02, + "learning_rate": 0.0003015974440894569, + "loss": 0.2792, + "step": 4720 + }, + { + "epoch": 3.02, + "learning_rate": 0.00030223642172523963, + "loss": 0.2776, + "step": 4730 + }, + { + "epoch": 3.03, + "learning_rate": 0.00030287539936102236, + "loss": 0.3064, + "step": 4740 + }, + { + "epoch": 3.04, + "learning_rate": 0.0003035143769968051, + "loss": 0.2736, + "step": 4750 + }, + { + "epoch": 3.04, + "learning_rate": 0.0003041533546325879, + "loss": 0.2906, + "step": 4760 + }, + { + "epoch": 3.05, + "learning_rate": 0.0003047923322683706, + "loss": 0.2766, + "step": 4770 + }, + { + "epoch": 3.05, + "learning_rate": 0.00030543130990415333, + "loss": 0.3055, + "step": 4780 + }, + { + "epoch": 3.06, + "learning_rate": 0.0003060702875399361, + "loss": 0.2889, + "step": 4790 + }, + { + "epoch": 3.07, + "learning_rate": 0.00030670926517571885, + "loss": 0.2952, + "step": 4800 + }, + { + "epoch": 3.07, + "learning_rate": 0.0003073482428115016, + "loss": 0.3135, + "step": 4810 + }, + { + "epoch": 3.08, + "learning_rate": 0.00030798722044728436, + "loss": 0.3029, + "step": 4820 + }, + { + "epoch": 3.09, + "learning_rate": 0.0003086261980830671, + "loss": 0.2786, + "step": 4830 + }, + { + "epoch": 3.09, + "learning_rate": 0.0003092651757188498, + "loss": 0.2854, + "step": 4840 + }, + { + "epoch": 3.1, + "learning_rate": 0.00030990415335463255, + "loss": 0.2851, + "step": 4850 + }, + { + "epoch": 3.11, + "learning_rate": 0.0003105431309904154, + "loss": 0.2715, + "step": 4860 + }, + { + "epoch": 3.11, + "learning_rate": 0.0003111821086261981, + "loss": 0.2966, + "step": 4870 + }, + { + "epoch": 3.12, + "learning_rate": 0.00031182108626198084, + "loss": 0.2524, + "step": 4880 + }, + { + "epoch": 3.12, + "learning_rate": 0.0003124600638977636, + "loss": 0.3158, + "step": 4890 + }, + { + "epoch": 3.13, + "learning_rate": 0.00031309904153354636, + "loss": 0.2955, + "step": 4900 + }, + { + "epoch": 3.14, + "learning_rate": 0.0003137380191693291, + "loss": 0.3291, + "step": 4910 + }, + { + "epoch": 3.14, + "learning_rate": 0.0003143769968051118, + "loss": 0.3231, + "step": 4920 + }, + { + "epoch": 3.15, + "learning_rate": 0.0003150159744408946, + "loss": 0.3137, + "step": 4930 + }, + { + "epoch": 3.16, + "learning_rate": 0.0003156549520766773, + "loss": 0.2828, + "step": 4940 + }, + { + "epoch": 3.16, + "learning_rate": 0.00031629392971246006, + "loss": 0.2963, + "step": 4950 + }, + { + "epoch": 3.17, + "learning_rate": 0.00031693290734824284, + "loss": 0.281, + "step": 4960 + }, + { + "epoch": 3.18, + "learning_rate": 0.00031757188498402557, + "loss": 0.2507, + "step": 4970 + }, + { + "epoch": 3.18, + "learning_rate": 0.0003182108626198083, + "loss": 0.2772, + "step": 4980 + }, + { + "epoch": 3.19, + "learning_rate": 0.000318849840255591, + "loss": 0.2989, + "step": 4990 + }, + { + "epoch": 3.19, + "learning_rate": 0.0003194888178913738, + "loss": 0.2848, + "step": 5000 + }, + { + "epoch": 3.2, + "learning_rate": 0.00032012779552715654, + "loss": 0.2925, + "step": 5010 + }, + { + "epoch": 3.21, + "learning_rate": 0.00032076677316293927, + "loss": 0.2756, + "step": 5020 + }, + { + "epoch": 3.21, + "learning_rate": 0.00032140575079872205, + "loss": 0.2793, + "step": 5030 + }, + { + "epoch": 3.22, + "learning_rate": 0.0003220447284345048, + "loss": 0.3096, + "step": 5040 + }, + { + "epoch": 3.23, + "learning_rate": 0.0003226837060702875, + "loss": 0.3056, + "step": 5050 + }, + { + "epoch": 3.23, + "learning_rate": 0.0003233226837060703, + "loss": 0.2636, + "step": 5060 + }, + { + "epoch": 3.24, + "learning_rate": 0.0003239616613418531, + "loss": 0.362, + "step": 5070 + }, + { + "epoch": 3.25, + "learning_rate": 0.0003246006389776358, + "loss": 0.294, + "step": 5080 + }, + { + "epoch": 3.25, + "learning_rate": 0.00032523961661341854, + "loss": 0.2913, + "step": 5090 + }, + { + "epoch": 3.26, + "learning_rate": 0.0003258785942492013, + "loss": 0.3018, + "step": 5100 + }, + { + "epoch": 3.27, + "learning_rate": 0.00032651757188498405, + "loss": 0.3096, + "step": 5110 + }, + { + "epoch": 3.27, + "learning_rate": 0.0003271565495207668, + "loss": 0.2857, + "step": 5120 + }, + { + "epoch": 3.28, + "learning_rate": 0.0003277955271565495, + "loss": 0.2586, + "step": 5130 + }, + { + "epoch": 3.28, + "learning_rate": 0.0003284345047923323, + "loss": 0.2943, + "step": 5140 + }, + { + "epoch": 3.29, + "learning_rate": 0.000329073482428115, + "loss": 0.2719, + "step": 5150 + }, + { + "epoch": 3.3, + "learning_rate": 0.00032971246006389775, + "loss": 0.2682, + "step": 5160 + }, + { + "epoch": 3.3, + "learning_rate": 0.00033035143769968053, + "loss": 0.2883, + "step": 5170 + }, + { + "epoch": 3.31, + "learning_rate": 0.00033099041533546326, + "loss": 0.286, + "step": 5180 + }, + { + "epoch": 3.32, + "learning_rate": 0.000331629392971246, + "loss": 0.3188, + "step": 5190 + }, + { + "epoch": 3.32, + "learning_rate": 0.0003322683706070288, + "loss": 0.2683, + "step": 5200 + }, + { + "epoch": 3.33, + "learning_rate": 0.0003329073482428115, + "loss": 0.2782, + "step": 5210 + }, + { + "epoch": 3.34, + "learning_rate": 0.00033354632587859423, + "loss": 0.2968, + "step": 5220 + }, + { + "epoch": 3.34, + "learning_rate": 0.00033418530351437696, + "loss": 0.2765, + "step": 5230 + }, + { + "epoch": 3.35, + "learning_rate": 0.00033482428115015974, + "loss": 0.2481, + "step": 5240 + }, + { + "epoch": 3.35, + "learning_rate": 0.00033546325878594253, + "loss": 0.301, + "step": 5250 + }, + { + "epoch": 3.36, + "learning_rate": 0.00033610223642172526, + "loss": 0.2576, + "step": 5260 + }, + { + "epoch": 3.37, + "learning_rate": 0.00033674121405750804, + "loss": 0.3013, + "step": 5270 + }, + { + "epoch": 3.37, + "learning_rate": 0.00033738019169329077, + "loss": 0.3062, + "step": 5280 + }, + { + "epoch": 3.38, + "learning_rate": 0.0003380191693290735, + "loss": 0.2969, + "step": 5290 + }, + { + "epoch": 3.39, + "learning_rate": 0.00033865814696485623, + "loss": 0.3039, + "step": 5300 + }, + { + "epoch": 3.39, + "learning_rate": 0.000339297124600639, + "loss": 0.2954, + "step": 5310 + }, + { + "epoch": 3.4, + "learning_rate": 0.00033993610223642174, + "loss": 0.2835, + "step": 5320 + }, + { + "epoch": 3.41, + "learning_rate": 0.00034057507987220447, + "loss": 0.3215, + "step": 5330 + }, + { + "epoch": 3.41, + "learning_rate": 0.00034121405750798725, + "loss": 0.2902, + "step": 5340 + }, + { + "epoch": 3.42, + "learning_rate": 0.00034185303514377, + "loss": 0.2957, + "step": 5350 + }, + { + "epoch": 3.42, + "learning_rate": 0.0003424920127795527, + "loss": 0.2844, + "step": 5360 + }, + { + "epoch": 3.43, + "learning_rate": 0.00034313099041533544, + "loss": 0.2502, + "step": 5370 + }, + { + "epoch": 3.44, + "learning_rate": 0.0003437699680511182, + "loss": 0.2757, + "step": 5380 + }, + { + "epoch": 3.44, + "learning_rate": 0.00034440894568690095, + "loss": 0.3069, + "step": 5390 + }, + { + "epoch": 3.45, + "learning_rate": 0.0003450479233226837, + "loss": 0.2948, + "step": 5400 + }, + { + "epoch": 3.46, + "learning_rate": 0.00034568690095846647, + "loss": 0.2886, + "step": 5410 + }, + { + "epoch": 3.46, + "learning_rate": 0.0003463258785942492, + "loss": 0.2625, + "step": 5420 + }, + { + "epoch": 3.47, + "learning_rate": 0.0003469648562300319, + "loss": 0.3224, + "step": 5430 + }, + { + "epoch": 3.48, + "learning_rate": 0.00034760383386581465, + "loss": 0.3151, + "step": 5440 + }, + { + "epoch": 3.48, + "learning_rate": 0.0003482428115015975, + "loss": 0.3082, + "step": 5450 + }, + { + "epoch": 3.49, + "learning_rate": 0.0003488817891373802, + "loss": 0.2851, + "step": 5460 + }, + { + "epoch": 3.5, + "learning_rate": 0.00034952076677316295, + "loss": 0.2495, + "step": 5470 + }, + { + "epoch": 3.5, + "learning_rate": 0.00035015974440894573, + "loss": 0.3161, + "step": 5480 + }, + { + "epoch": 3.51, + "learning_rate": 0.00035079872204472846, + "loss": 0.3085, + "step": 5490 + }, + { + "epoch": 3.51, + "learning_rate": 0.0003514376996805112, + "loss": 0.2806, + "step": 5500 + }, + { + "epoch": 3.52, + "learning_rate": 0.0003520766773162939, + "loss": 0.2427, + "step": 5510 + }, + { + "epoch": 3.53, + "learning_rate": 0.0003527156549520767, + "loss": 0.2567, + "step": 5520 + }, + { + "epoch": 3.53, + "learning_rate": 0.00035335463258785943, + "loss": 0.2581, + "step": 5530 + }, + { + "epoch": 3.54, + "learning_rate": 0.00035399361022364216, + "loss": 0.3021, + "step": 5540 + }, + { + "epoch": 3.55, + "learning_rate": 0.00035463258785942495, + "loss": 0.2633, + "step": 5550 + }, + { + "epoch": 3.55, + "learning_rate": 0.0003552715654952077, + "loss": 0.2753, + "step": 5560 + }, + { + "epoch": 3.56, + "learning_rate": 0.0003559105431309904, + "loss": 0.2755, + "step": 5570 + }, + { + "epoch": 3.57, + "learning_rate": 0.0003565495207667732, + "loss": 0.297, + "step": 5580 + }, + { + "epoch": 3.57, + "learning_rate": 0.0003571884984025559, + "loss": 0.2739, + "step": 5590 + }, + { + "epoch": 3.58, + "learning_rate": 0.00035782747603833865, + "loss": 0.2994, + "step": 5600 + }, + { + "epoch": 3.58, + "learning_rate": 0.0003584664536741214, + "loss": 0.3026, + "step": 5610 + }, + { + "epoch": 3.59, + "learning_rate": 0.00035910543130990416, + "loss": 0.2724, + "step": 5620 + }, + { + "epoch": 3.6, + "learning_rate": 0.0003597444089456869, + "loss": 0.2863, + "step": 5630 + }, + { + "epoch": 3.6, + "learning_rate": 0.0003603833865814696, + "loss": 0.3245, + "step": 5640 + }, + { + "epoch": 3.61, + "learning_rate": 0.00036102236421725245, + "loss": 0.2751, + "step": 5650 + }, + { + "epoch": 3.62, + "learning_rate": 0.0003616613418530352, + "loss": 0.2783, + "step": 5660 + }, + { + "epoch": 3.62, + "learning_rate": 0.0003623003194888179, + "loss": 0.3159, + "step": 5670 + }, + { + "epoch": 3.63, + "learning_rate": 0.00036293929712460064, + "loss": 0.3649, + "step": 5680 + }, + { + "epoch": 3.64, + "learning_rate": 0.0003635782747603834, + "loss": 0.2486, + "step": 5690 + }, + { + "epoch": 3.64, + "learning_rate": 0.00036421725239616615, + "loss": 0.319, + "step": 5700 + }, + { + "epoch": 3.65, + "learning_rate": 0.0003648562300319489, + "loss": 0.2897, + "step": 5710 + }, + { + "epoch": 3.65, + "learning_rate": 0.00036549520766773167, + "loss": 0.2648, + "step": 5720 + }, + { + "epoch": 3.66, + "learning_rate": 0.0003661341853035144, + "loss": 0.3063, + "step": 5730 + }, + { + "epoch": 3.67, + "learning_rate": 0.0003667731629392971, + "loss": 0.2459, + "step": 5740 + }, + { + "epoch": 3.67, + "learning_rate": 0.00036741214057507985, + "loss": 0.3034, + "step": 5750 + }, + { + "epoch": 3.68, + "learning_rate": 0.00036805111821086264, + "loss": 0.2805, + "step": 5760 + }, + { + "epoch": 3.69, + "learning_rate": 0.00036869009584664537, + "loss": 0.2748, + "step": 5770 + }, + { + "epoch": 3.69, + "learning_rate": 0.0003693290734824281, + "loss": 0.2738, + "step": 5780 + }, + { + "epoch": 3.7, + "learning_rate": 0.0003699680511182109, + "loss": 0.2739, + "step": 5790 + }, + { + "epoch": 3.71, + "learning_rate": 0.0003706070287539936, + "loss": 0.3178, + "step": 5800 + }, + { + "epoch": 3.71, + "learning_rate": 0.00037124600638977634, + "loss": 0.2705, + "step": 5810 + }, + { + "epoch": 3.72, + "learning_rate": 0.00037188498402555907, + "loss": 0.3111, + "step": 5820 + }, + { + "epoch": 3.73, + "learning_rate": 0.00037252396166134185, + "loss": 0.2781, + "step": 5830 + }, + { + "epoch": 3.73, + "learning_rate": 0.0003731629392971246, + "loss": 0.2899, + "step": 5840 + }, + { + "epoch": 3.74, + "learning_rate": 0.00037380191693290736, + "loss": 0.316, + "step": 5850 + }, + { + "epoch": 3.74, + "learning_rate": 0.00037444089456869015, + "loss": 0.3097, + "step": 5860 + }, + { + "epoch": 3.75, + "learning_rate": 0.0003750798722044729, + "loss": 0.2693, + "step": 5870 + }, + { + "epoch": 3.76, + "learning_rate": 0.0003757188498402556, + "loss": 0.2846, + "step": 5880 + }, + { + "epoch": 3.76, + "learning_rate": 0.00037635782747603833, + "loss": 0.2892, + "step": 5890 + }, + { + "epoch": 3.77, + "learning_rate": 0.0003769968051118211, + "loss": 0.313, + "step": 5900 + }, + { + "epoch": 3.78, + "learning_rate": 0.00037763578274760385, + "loss": 0.2979, + "step": 5910 + }, + { + "epoch": 3.78, + "learning_rate": 0.0003782747603833866, + "loss": 0.2734, + "step": 5920 + }, + { + "epoch": 3.79, + "learning_rate": 0.00037891373801916936, + "loss": 0.2706, + "step": 5930 + }, + { + "epoch": 3.8, + "learning_rate": 0.0003795527156549521, + "loss": 0.2963, + "step": 5940 + }, + { + "epoch": 3.8, + "learning_rate": 0.0003801916932907348, + "loss": 0.2532, + "step": 5950 + }, + { + "epoch": 3.81, + "learning_rate": 0.0003808306709265176, + "loss": 0.3183, + "step": 5960 + }, + { + "epoch": 3.81, + "learning_rate": 0.00038146964856230033, + "loss": 0.3032, + "step": 5970 + }, + { + "epoch": 3.82, + "learning_rate": 0.00038210862619808306, + "loss": 0.3129, + "step": 5980 + }, + { + "epoch": 3.83, + "learning_rate": 0.0003827476038338658, + "loss": 0.267, + "step": 5990 + }, + { + "epoch": 3.83, + "learning_rate": 0.00038338658146964857, + "loss": 0.2766, + "step": 6000 + }, + { + "epoch": 3.84, + "learning_rate": 0.0003840255591054313, + "loss": 0.2861, + "step": 6010 + }, + { + "epoch": 3.85, + "learning_rate": 0.00038466453674121403, + "loss": 0.2807, + "step": 6020 + }, + { + "epoch": 3.85, + "learning_rate": 0.0003853035143769968, + "loss": 0.3118, + "step": 6030 + }, + { + "epoch": 3.86, + "learning_rate": 0.00038594249201277954, + "loss": 0.286, + "step": 6040 + }, + { + "epoch": 3.87, + "learning_rate": 0.0003865814696485623, + "loss": 0.2676, + "step": 6050 + }, + { + "epoch": 3.87, + "learning_rate": 0.00038722044728434506, + "loss": 0.3028, + "step": 6060 + }, + { + "epoch": 3.88, + "learning_rate": 0.00038785942492012784, + "loss": 0.2564, + "step": 6070 + }, + { + "epoch": 3.88, + "learning_rate": 0.00038849840255591057, + "loss": 0.2974, + "step": 6080 + }, + { + "epoch": 3.89, + "learning_rate": 0.0003891373801916933, + "loss": 0.2727, + "step": 6090 + }, + { + "epoch": 3.9, + "learning_rate": 0.0003897763578274761, + "loss": 0.2673, + "step": 6100 + }, + { + "epoch": 3.9, + "learning_rate": 0.0003904153354632588, + "loss": 0.27, + "step": 6110 + }, + { + "epoch": 3.91, + "learning_rate": 0.00039105431309904154, + "loss": 0.2758, + "step": 6120 + }, + { + "epoch": 3.92, + "learning_rate": 0.00039169329073482427, + "loss": 0.2891, + "step": 6130 + }, + { + "epoch": 3.92, + "learning_rate": 0.00039233226837060705, + "loss": 0.2904, + "step": 6140 + }, + { + "epoch": 3.93, + "learning_rate": 0.0003929712460063898, + "loss": 0.3009, + "step": 6150 + }, + { + "epoch": 3.94, + "learning_rate": 0.0003936102236421725, + "loss": 0.314, + "step": 6160 + }, + { + "epoch": 3.94, + "learning_rate": 0.0003942492012779553, + "loss": 0.289, + "step": 6170 + }, + { + "epoch": 3.95, + "learning_rate": 0.000394888178913738, + "loss": 0.3163, + "step": 6180 + }, + { + "epoch": 3.95, + "learning_rate": 0.00039552715654952075, + "loss": 0.2605, + "step": 6190 + }, + { + "epoch": 3.96, + "learning_rate": 0.0003961661341853035, + "loss": 0.2758, + "step": 6200 + }, + { + "epoch": 3.97, + "learning_rate": 0.00039680511182108626, + "loss": 0.3064, + "step": 6210 + }, + { + "epoch": 3.97, + "learning_rate": 0.000397444089456869, + "loss": 0.2991, + "step": 6220 + }, + { + "epoch": 3.98, + "learning_rate": 0.0003980830670926517, + "loss": 0.307, + "step": 6230 + }, + { + "epoch": 3.99, + "learning_rate": 0.0003987220447284345, + "loss": 0.285, + "step": 6240 + }, + { + "epoch": 3.99, + "learning_rate": 0.0003993610223642173, + "loss": 0.2797, + "step": 6250 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004, + "loss": 0.2737, + "step": 6260 + }, + { + "epoch": 4.01, + "learning_rate": 0.00040063897763578275, + "loss": 0.2392, + "step": 6270 + }, + { + "epoch": 4.01, + "learning_rate": 0.00040127795527156553, + "loss": 0.2275, + "step": 6280 + }, + { + "epoch": 4.02, + "learning_rate": 0.00040191693290734826, + "loss": 0.2617, + "step": 6290 + }, + { + "epoch": 4.03, + "learning_rate": 0.000402555910543131, + "loss": 0.2528, + "step": 6300 + }, + { + "epoch": 4.03, + "learning_rate": 0.0004031948881789138, + "loss": 0.2262, + "step": 6310 + }, + { + "epoch": 4.04, + "learning_rate": 0.0004038338658146965, + "loss": 0.259, + "step": 6320 + }, + { + "epoch": 4.04, + "learning_rate": 0.00040447284345047923, + "loss": 0.2519, + "step": 6330 + }, + { + "epoch": 4.05, + "learning_rate": 0.000405111821086262, + "loss": 0.2259, + "step": 6340 + }, + { + "epoch": 4.06, + "learning_rate": 0.00040575079872204474, + "loss": 0.2386, + "step": 6350 + }, + { + "epoch": 4.06, + "learning_rate": 0.0004063897763578275, + "loss": 0.2217, + "step": 6360 + }, + { + "epoch": 4.07, + "learning_rate": 0.0004070287539936102, + "loss": 0.2309, + "step": 6370 + }, + { + "epoch": 4.08, + "learning_rate": 0.000407667731629393, + "loss": 0.211, + "step": 6380 + }, + { + "epoch": 4.08, + "learning_rate": 0.0004083067092651757, + "loss": 0.2459, + "step": 6390 + }, + { + "epoch": 4.09, + "learning_rate": 0.00040894568690095844, + "loss": 0.2603, + "step": 6400 + }, + { + "epoch": 4.1, + "learning_rate": 0.00040958466453674123, + "loss": 0.2343, + "step": 6410 + }, + { + "epoch": 4.1, + "learning_rate": 0.00041022364217252396, + "loss": 0.2562, + "step": 6420 + }, + { + "epoch": 4.11, + "learning_rate": 0.0004108626198083067, + "loss": 0.2148, + "step": 6430 + }, + { + "epoch": 4.11, + "learning_rate": 0.00041150159744408947, + "loss": 0.2349, + "step": 6440 + }, + { + "epoch": 4.12, + "learning_rate": 0.00041214057507987225, + "loss": 0.2975, + "step": 6450 + }, + { + "epoch": 4.13, + "learning_rate": 0.000412779552715655, + "loss": 0.2541, + "step": 6460 + }, + { + "epoch": 4.13, + "learning_rate": 0.0004134185303514377, + "loss": 0.2555, + "step": 6470 + }, + { + "epoch": 4.14, + "learning_rate": 0.0004140575079872205, + "loss": 0.2818, + "step": 6480 + }, + { + "epoch": 4.15, + "learning_rate": 0.0004146964856230032, + "loss": 0.2577, + "step": 6490 + }, + { + "epoch": 4.15, + "learning_rate": 0.00041533546325878595, + "loss": 0.2433, + "step": 6500 + }, + { + "epoch": 4.16, + "learning_rate": 0.0004159744408945687, + "loss": 0.2231, + "step": 6510 + }, + { + "epoch": 4.17, + "learning_rate": 0.00041661341853035147, + "loss": 0.2841, + "step": 6520 + }, + { + "epoch": 4.17, + "learning_rate": 0.0004172523961661342, + "loss": 0.2048, + "step": 6530 + }, + { + "epoch": 4.18, + "learning_rate": 0.0004178913738019169, + "loss": 0.2484, + "step": 6540 + }, + { + "epoch": 4.19, + "learning_rate": 0.0004185303514376997, + "loss": 0.2542, + "step": 6550 + }, + { + "epoch": 4.19, + "learning_rate": 0.00041916932907348244, + "loss": 0.2293, + "step": 6560 + }, + { + "epoch": 4.2, + "learning_rate": 0.00041980830670926517, + "loss": 0.2545, + "step": 6570 + }, + { + "epoch": 4.2, + "learning_rate": 0.0004204472843450479, + "loss": 0.2565, + "step": 6580 + }, + { + "epoch": 4.21, + "learning_rate": 0.0004210862619808307, + "loss": 0.2441, + "step": 6590 + }, + { + "epoch": 4.22, + "learning_rate": 0.0004217252396166134, + "loss": 0.2623, + "step": 6600 + }, + { + "epoch": 4.22, + "learning_rate": 0.00042236421725239614, + "loss": 0.2542, + "step": 6610 + }, + { + "epoch": 4.23, + "learning_rate": 0.0004230031948881789, + "loss": 0.2368, + "step": 6620 + }, + { + "epoch": 4.24, + "learning_rate": 0.00042364217252396165, + "loss": 0.2683, + "step": 6630 + }, + { + "epoch": 4.24, + "learning_rate": 0.00042428115015974443, + "loss": 0.2468, + "step": 6640 + }, + { + "epoch": 4.25, + "learning_rate": 0.00042492012779552716, + "loss": 0.226, + "step": 6650 + }, + { + "epoch": 4.26, + "learning_rate": 0.00042555910543130995, + "loss": 0.2362, + "step": 6660 + }, + { + "epoch": 4.26, + "learning_rate": 0.0004261980830670927, + "loss": 0.2606, + "step": 6670 + }, + { + "epoch": 4.27, + "learning_rate": 0.0004268370607028754, + "loss": 0.2436, + "step": 6680 + }, + { + "epoch": 4.27, + "learning_rate": 0.0004274760383386582, + "loss": 0.2501, + "step": 6690 + }, + { + "epoch": 4.28, + "learning_rate": 0.0004281150159744409, + "loss": 0.2367, + "step": 6700 + }, + { + "epoch": 4.29, + "learning_rate": 0.00042875399361022365, + "loss": 0.248, + "step": 6710 + }, + { + "epoch": 4.29, + "learning_rate": 0.00042939297124600643, + "loss": 0.2475, + "step": 6720 + }, + { + "epoch": 4.3, + "learning_rate": 0.00043003194888178916, + "loss": 0.2544, + "step": 6730 + }, + { + "epoch": 4.31, + "learning_rate": 0.0004306709265175719, + "loss": 0.2127, + "step": 6740 + }, + { + "epoch": 4.31, + "learning_rate": 0.0004313099041533546, + "loss": 0.2495, + "step": 6750 + }, + { + "epoch": 4.32, + "learning_rate": 0.0004319488817891374, + "loss": 0.2434, + "step": 6760 + }, + { + "epoch": 4.33, + "learning_rate": 0.00043258785942492013, + "loss": 0.2641, + "step": 6770 + }, + { + "epoch": 4.33, + "learning_rate": 0.00043322683706070286, + "loss": 0.2549, + "step": 6780 + }, + { + "epoch": 4.34, + "learning_rate": 0.00043386581469648564, + "loss": 0.2748, + "step": 6790 + }, + { + "epoch": 4.34, + "learning_rate": 0.00043450479233226837, + "loss": 0.2991, + "step": 6800 + }, + { + "epoch": 4.35, + "learning_rate": 0.0004351437699680511, + "loss": 0.2217, + "step": 6810 + }, + { + "epoch": 4.36, + "learning_rate": 0.00043578274760383383, + "loss": 0.2307, + "step": 6820 + }, + { + "epoch": 4.36, + "learning_rate": 0.0004364217252396166, + "loss": 0.234, + "step": 6830 + }, + { + "epoch": 4.37, + "learning_rate": 0.0004370607028753994, + "loss": 0.2637, + "step": 6840 + }, + { + "epoch": 4.38, + "learning_rate": 0.0004376996805111821, + "loss": 0.2285, + "step": 6850 + }, + { + "epoch": 4.38, + "learning_rate": 0.0004383386581469649, + "loss": 0.2585, + "step": 6860 + }, + { + "epoch": 4.39, + "learning_rate": 0.00043897763578274764, + "loss": 0.2358, + "step": 6870 + }, + { + "epoch": 4.4, + "learning_rate": 0.00043961661341853037, + "loss": 0.246, + "step": 6880 + }, + { + "epoch": 4.4, + "learning_rate": 0.0004402555910543131, + "loss": 0.2672, + "step": 6890 + }, + { + "epoch": 4.41, + "learning_rate": 0.0004408945686900959, + "loss": 0.2606, + "step": 6900 + }, + { + "epoch": 4.42, + "learning_rate": 0.0004415335463258786, + "loss": 0.2397, + "step": 6910 + }, + { + "epoch": 4.42, + "learning_rate": 0.00044217252396166134, + "loss": 0.2539, + "step": 6920 + }, + { + "epoch": 4.43, + "learning_rate": 0.0004428115015974441, + "loss": 0.2348, + "step": 6930 + }, + { + "epoch": 4.43, + "learning_rate": 0.00044345047923322685, + "loss": 0.2845, + "step": 6940 + }, + { + "epoch": 4.44, + "learning_rate": 0.0004440894568690096, + "loss": 0.2402, + "step": 6950 + }, + { + "epoch": 4.45, + "learning_rate": 0.0004447284345047923, + "loss": 0.2678, + "step": 6960 + }, + { + "epoch": 4.45, + "learning_rate": 0.0004453674121405751, + "loss": 0.2653, + "step": 6970 + }, + { + "epoch": 4.46, + "learning_rate": 0.0004460063897763578, + "loss": 0.2813, + "step": 6980 + }, + { + "epoch": 4.47, + "learning_rate": 0.00044664536741214055, + "loss": 0.262, + "step": 6990 + }, + { + "epoch": 4.47, + "learning_rate": 0.00044728434504792333, + "loss": 0.2247, + "step": 7000 + }, + { + "epoch": 4.48, + "learning_rate": 0.00044792332268370606, + "loss": 0.2547, + "step": 7010 + }, + { + "epoch": 4.49, + "learning_rate": 0.0004485623003194888, + "loss": 0.2627, + "step": 7020 + }, + { + "epoch": 4.49, + "learning_rate": 0.0004492012779552715, + "loss": 0.277, + "step": 7030 + }, + { + "epoch": 4.5, + "learning_rate": 0.00044984025559105436, + "loss": 0.2467, + "step": 7040 + }, + { + "epoch": 4.5, + "learning_rate": 0.0004504792332268371, + "loss": 0.2437, + "step": 7050 + }, + { + "epoch": 4.51, + "learning_rate": 0.0004511182108626198, + "loss": 0.2605, + "step": 7060 + }, + { + "epoch": 4.52, + "learning_rate": 0.0004517571884984026, + "loss": 0.2649, + "step": 7070 + }, + { + "epoch": 4.52, + "learning_rate": 0.00045239616613418533, + "loss": 0.283, + "step": 7080 + }, + { + "epoch": 4.53, + "learning_rate": 0.00045303514376996806, + "loss": 0.2337, + "step": 7090 + }, + { + "epoch": 4.54, + "learning_rate": 0.00045367412140575084, + "loss": 0.2756, + "step": 7100 + }, + { + "epoch": 4.54, + "learning_rate": 0.00045431309904153357, + "loss": 0.2699, + "step": 7110 + }, + { + "epoch": 4.55, + "learning_rate": 0.0004549520766773163, + "loss": 0.2649, + "step": 7120 + }, + { + "epoch": 4.56, + "learning_rate": 0.00045559105431309903, + "loss": 0.2685, + "step": 7130 + }, + { + "epoch": 4.56, + "learning_rate": 0.0004562300319488818, + "loss": 0.2451, + "step": 7140 + }, + { + "epoch": 4.57, + "learning_rate": 0.00045686900958466454, + "loss": 0.2614, + "step": 7150 + }, + { + "epoch": 4.57, + "learning_rate": 0.00045750798722044727, + "loss": 0.2484, + "step": 7160 + }, + { + "epoch": 4.58, + "learning_rate": 0.00045814696485623006, + "loss": 0.2642, + "step": 7170 + }, + { + "epoch": 4.59, + "learning_rate": 0.0004587859424920128, + "loss": 0.2413, + "step": 7180 + }, + { + "epoch": 4.59, + "learning_rate": 0.0004594249201277955, + "loss": 0.2324, + "step": 7190 + }, + { + "epoch": 4.6, + "learning_rate": 0.00046006389776357824, + "loss": 0.2632, + "step": 7200 + }, + { + "epoch": 4.61, + "learning_rate": 0.000460702875399361, + "loss": 0.2472, + "step": 7210 + }, + { + "epoch": 4.61, + "learning_rate": 0.00046134185303514376, + "loss": 0.276, + "step": 7220 + }, + { + "epoch": 4.62, + "learning_rate": 0.0004619808306709265, + "loss": 0.2647, + "step": 7230 + }, + { + "epoch": 4.63, + "learning_rate": 0.0004626198083067093, + "loss": 0.2409, + "step": 7240 + }, + { + "epoch": 4.63, + "learning_rate": 0.00046325878594249205, + "loss": 0.2568, + "step": 7250 + }, + { + "epoch": 4.64, + "learning_rate": 0.0004638977635782748, + "loss": 0.2856, + "step": 7260 + }, + { + "epoch": 4.65, + "learning_rate": 0.0004645367412140575, + "loss": 0.249, + "step": 7270 + }, + { + "epoch": 4.65, + "learning_rate": 0.0004651757188498403, + "loss": 0.2779, + "step": 7280 + }, + { + "epoch": 4.66, + "learning_rate": 0.000465814696485623, + "loss": 0.2317, + "step": 7290 + }, + { + "epoch": 4.66, + "learning_rate": 0.00046645367412140575, + "loss": 0.2889, + "step": 7300 + }, + { + "epoch": 4.67, + "learning_rate": 0.00046709265175718854, + "loss": 0.2711, + "step": 7310 + }, + { + "epoch": 4.68, + "learning_rate": 0.00046773162939297126, + "loss": 0.2467, + "step": 7320 + }, + { + "epoch": 4.68, + "learning_rate": 0.000468370607028754, + "loss": 0.2422, + "step": 7330 + }, + { + "epoch": 4.69, + "learning_rate": 0.0004690095846645367, + "loss": 0.2018, + "step": 7340 + }, + { + "epoch": 4.7, + "learning_rate": 0.0004696485623003195, + "loss": 0.2285, + "step": 7350 + }, + { + "epoch": 4.7, + "learning_rate": 0.00047028753993610224, + "loss": 0.2277, + "step": 7360 + }, + { + "epoch": 4.71, + "learning_rate": 0.00047092651757188496, + "loss": 0.2734, + "step": 7370 + }, + { + "epoch": 4.72, + "learning_rate": 0.00047156549520766775, + "loss": 0.2438, + "step": 7380 + }, + { + "epoch": 4.72, + "learning_rate": 0.0004722044728434505, + "loss": 0.2769, + "step": 7390 + }, + { + "epoch": 4.73, + "learning_rate": 0.0004728434504792332, + "loss": 0.2448, + "step": 7400 + }, + { + "epoch": 4.73, + "learning_rate": 0.00047348242811501594, + "loss": 0.2272, + "step": 7410 + }, + { + "epoch": 4.74, + "learning_rate": 0.0004741214057507987, + "loss": 0.2935, + "step": 7420 + }, + { + "epoch": 4.75, + "learning_rate": 0.0004747603833865815, + "loss": 0.2244, + "step": 7430 + }, + { + "epoch": 4.75, + "learning_rate": 0.00047539936102236423, + "loss": 0.2567, + "step": 7440 + }, + { + "epoch": 4.76, + "learning_rate": 0.000476038338658147, + "loss": 0.2725, + "step": 7450 + }, + { + "epoch": 4.77, + "learning_rate": 0.00047667731629392974, + "loss": 0.2912, + "step": 7460 + }, + { + "epoch": 4.77, + "learning_rate": 0.0004773162939297125, + "loss": 0.2618, + "step": 7470 + }, + { + "epoch": 4.78, + "learning_rate": 0.00047795527156549526, + "loss": 0.2646, + "step": 7480 + }, + { + "epoch": 4.79, + "learning_rate": 0.000478594249201278, + "loss": 0.2548, + "step": 7490 + }, + { + "epoch": 4.79, + "learning_rate": 0.0004792332268370607, + "loss": 0.2626, + "step": 7500 + }, + { + "epoch": 4.8, + "learning_rate": 0.00047987220447284344, + "loss": 0.2671, + "step": 7510 + }, + { + "epoch": 4.8, + "learning_rate": 0.00048051118210862623, + "loss": 0.2704, + "step": 7520 + }, + { + "epoch": 4.81, + "learning_rate": 0.00048115015974440896, + "loss": 0.2659, + "step": 7530 + }, + { + "epoch": 4.82, + "learning_rate": 0.0004817891373801917, + "loss": 0.2804, + "step": 7540 + }, + { + "epoch": 4.82, + "learning_rate": 0.00048242811501597447, + "loss": 0.2503, + "step": 7550 + }, + { + "epoch": 4.83, + "learning_rate": 0.0004830670926517572, + "loss": 0.2489, + "step": 7560 + }, + { + "epoch": 4.84, + "learning_rate": 0.00048370607028753993, + "loss": 0.2837, + "step": 7570 + }, + { + "epoch": 4.84, + "learning_rate": 0.00048434504792332266, + "loss": 0.2364, + "step": 7580 + }, + { + "epoch": 4.85, + "learning_rate": 0.00048498402555910544, + "loss": 0.2508, + "step": 7590 + }, + { + "epoch": 4.86, + "learning_rate": 0.00048562300319488817, + "loss": 0.2279, + "step": 7600 + }, + { + "epoch": 4.86, + "learning_rate": 0.0004862619808306709, + "loss": 0.2174, + "step": 7610 + }, + { + "epoch": 4.87, + "learning_rate": 0.0004869009584664537, + "loss": 0.2837, + "step": 7620 + }, + { + "epoch": 4.88, + "learning_rate": 0.00048753993610223647, + "loss": 0.2414, + "step": 7630 + }, + { + "epoch": 4.88, + "learning_rate": 0.0004881789137380192, + "loss": 0.2452, + "step": 7640 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004888178913738019, + "loss": 0.2854, + "step": 7650 + }, + { + "epoch": 4.89, + "learning_rate": 0.0004894568690095847, + "loss": 0.2593, + "step": 7660 + }, + { + "epoch": 4.9, + "learning_rate": 0.0004900958466453675, + "loss": 0.2507, + "step": 7670 + }, + { + "epoch": 4.91, + "learning_rate": 0.0004907348242811502, + "loss": 0.2407, + "step": 7680 + }, + { + "epoch": 4.91, + "learning_rate": 0.000491373801916933, + "loss": 0.2345, + "step": 7690 + }, + { + "epoch": 4.92, + "learning_rate": 0.0004920127795527157, + "loss": 0.2786, + "step": 7700 + }, + { + "epoch": 4.93, + "learning_rate": 0.0004926517571884984, + "loss": 0.2639, + "step": 7710 + }, + { + "epoch": 4.93, + "learning_rate": 0.0004932907348242811, + "loss": 0.2819, + "step": 7720 + }, + { + "epoch": 4.94, + "learning_rate": 0.0004939297124600639, + "loss": 0.2602, + "step": 7730 + }, + { + "epoch": 4.95, + "learning_rate": 0.0004945686900958467, + "loss": 0.2647, + "step": 7740 + }, + { + "epoch": 4.95, + "learning_rate": 0.0004952076677316294, + "loss": 0.2126, + "step": 7750 + }, + { + "epoch": 4.96, + "learning_rate": 0.0004958466453674122, + "loss": 0.2694, + "step": 7760 + }, + { + "epoch": 4.96, + "learning_rate": 0.0004964856230031949, + "loss": 0.2193, + "step": 7770 + }, + { + "epoch": 4.97, + "learning_rate": 0.0004971246006389776, + "loss": 0.2219, + "step": 7780 + }, + { + "epoch": 4.98, + "learning_rate": 0.0004977635782747603, + "loss": 0.2674, + "step": 7790 + }, + { + "epoch": 4.98, + "learning_rate": 0.0004984025559105431, + "loss": 0.2368, + "step": 7800 + }, + { + "epoch": 4.99, + "learning_rate": 0.0004990415335463259, + "loss": 0.2356, + "step": 7810 + }, + { + "epoch": 5.0, + "learning_rate": 0.0004996805111821086, + "loss": 0.2548, + "step": 7820 + }, + { + "epoch": 5.0, + "learning_rate": 0.0005003194888178914, + "loss": 0.277, + "step": 7830 + }, + { + "epoch": 5.01, + "learning_rate": 0.0005009584664536741, + "loss": 0.2186, + "step": 7840 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005015974440894568, + "loss": 0.2285, + "step": 7850 + }, + { + "epoch": 5.02, + "learning_rate": 0.0005022364217252397, + "loss": 0.2198, + "step": 7860 + }, + { + "epoch": 5.03, + "learning_rate": 0.0005028753993610223, + "loss": 0.1782, + "step": 7870 + }, + { + "epoch": 5.04, + "learning_rate": 0.0005035143769968051, + "loss": 0.2177, + "step": 7880 + }, + { + "epoch": 5.04, + "learning_rate": 0.0005041533546325879, + "loss": 0.2345, + "step": 7890 + }, + { + "epoch": 5.05, + "learning_rate": 0.0005047923322683706, + "loss": 0.2268, + "step": 7900 + }, + { + "epoch": 5.05, + "learning_rate": 0.0005054313099041533, + "loss": 0.2024, + "step": 7910 + }, + { + "epoch": 5.06, + "learning_rate": 0.0005060702875399362, + "loss": 0.2183, + "step": 7920 + }, + { + "epoch": 5.07, + "learning_rate": 0.0005067092651757189, + "loss": 0.2126, + "step": 7930 + }, + { + "epoch": 5.07, + "learning_rate": 0.0005073482428115016, + "loss": 0.241, + "step": 7940 + }, + { + "epoch": 5.08, + "learning_rate": 0.0005079872204472845, + "loss": 0.2137, + "step": 7950 + }, + { + "epoch": 5.09, + "learning_rate": 0.0005086261980830671, + "loss": 0.2156, + "step": 7960 + }, + { + "epoch": 5.09, + "learning_rate": 0.0005092651757188499, + "loss": 0.2081, + "step": 7970 + }, + { + "epoch": 5.1, + "learning_rate": 0.0005099041533546325, + "loss": 0.2235, + "step": 7980 + }, + { + "epoch": 5.11, + "learning_rate": 0.0005105431309904154, + "loss": 0.2023, + "step": 7990 + }, + { + "epoch": 5.11, + "learning_rate": 0.0005111821086261981, + "loss": 0.2001, + "step": 8000 + }, + { + "epoch": 5.12, + "learning_rate": 0.0005118210862619808, + "loss": 0.1899, + "step": 8010 + }, + { + "epoch": 5.12, + "learning_rate": 0.0005124600638977636, + "loss": 0.2328, + "step": 8020 + }, + { + "epoch": 5.13, + "learning_rate": 0.0005130990415335464, + "loss": 0.222, + "step": 8030 + }, + { + "epoch": 5.14, + "learning_rate": 0.000513738019169329, + "loss": 0.2191, + "step": 8040 + }, + { + "epoch": 5.14, + "learning_rate": 0.0005143769968051119, + "loss": 0.2012, + "step": 8050 + }, + { + "epoch": 5.15, + "learning_rate": 0.0005150159744408946, + "loss": 0.2093, + "step": 8060 + }, + { + "epoch": 5.16, + "learning_rate": 0.0005156549520766773, + "loss": 0.192, + "step": 8070 + }, + { + "epoch": 5.16, + "learning_rate": 0.00051629392971246, + "loss": 0.2442, + "step": 8080 + }, + { + "epoch": 5.17, + "learning_rate": 0.0005169329073482429, + "loss": 0.2149, + "step": 8090 + }, + { + "epoch": 5.18, + "learning_rate": 0.0005175718849840255, + "loss": 0.2278, + "step": 8100 + }, + { + "epoch": 5.18, + "learning_rate": 0.0005182108626198083, + "loss": 0.2626, + "step": 8110 + }, + { + "epoch": 5.19, + "learning_rate": 0.0005188498402555911, + "loss": 0.2234, + "step": 8120 + }, + { + "epoch": 5.19, + "learning_rate": 0.0005194888178913738, + "loss": 0.2177, + "step": 8130 + }, + { + "epoch": 5.2, + "learning_rate": 0.0005201277955271566, + "loss": 0.264, + "step": 8140 + }, + { + "epoch": 5.21, + "learning_rate": 0.0005207667731629393, + "loss": 0.2129, + "step": 8150 + }, + { + "epoch": 5.21, + "learning_rate": 0.0005214057507987221, + "loss": 0.2431, + "step": 8160 + }, + { + "epoch": 5.22, + "learning_rate": 0.0005220447284345048, + "loss": 0.2204, + "step": 8170 + }, + { + "epoch": 5.23, + "learning_rate": 0.0005226837060702875, + "loss": 0.2081, + "step": 8180 + }, + { + "epoch": 5.23, + "learning_rate": 0.0005233226837060703, + "loss": 0.207, + "step": 8190 + }, + { + "epoch": 5.24, + "learning_rate": 0.0005239616613418531, + "loss": 0.2233, + "step": 8200 + }, + { + "epoch": 5.25, + "learning_rate": 0.0005246006389776357, + "loss": 0.2488, + "step": 8210 + }, + { + "epoch": 5.25, + "learning_rate": 0.0005252396166134186, + "loss": 0.207, + "step": 8220 + }, + { + "epoch": 5.26, + "learning_rate": 0.0005258785942492013, + "loss": 0.2447, + "step": 8230 + }, + { + "epoch": 5.27, + "learning_rate": 0.000526517571884984, + "loss": 0.2295, + "step": 8240 + }, + { + "epoch": 5.27, + "learning_rate": 0.0005271565495207668, + "loss": 0.2539, + "step": 8250 + }, + { + "epoch": 5.28, + "learning_rate": 0.0005277955271565496, + "loss": 0.243, + "step": 8260 + }, + { + "epoch": 5.28, + "learning_rate": 0.0005284345047923322, + "loss": 0.2116, + "step": 8270 + }, + { + "epoch": 5.29, + "learning_rate": 0.0005290734824281151, + "loss": 0.2204, + "step": 8280 + }, + { + "epoch": 5.3, + "learning_rate": 0.0005297124600638977, + "loss": 0.2041, + "step": 8290 + }, + { + "epoch": 5.3, + "learning_rate": 0.0005303514376996805, + "loss": 0.2402, + "step": 8300 + }, + { + "epoch": 5.31, + "learning_rate": 0.0005309904153354632, + "loss": 0.2276, + "step": 8310 + }, + { + "epoch": 5.32, + "learning_rate": 0.000531629392971246, + "loss": 0.2305, + "step": 8320 + }, + { + "epoch": 5.32, + "learning_rate": 0.0005322683706070288, + "loss": 0.2257, + "step": 8330 + }, + { + "epoch": 5.33, + "learning_rate": 0.0005329073482428115, + "loss": 0.2062, + "step": 8340 + }, + { + "epoch": 5.34, + "learning_rate": 0.0005335463258785943, + "loss": 0.2013, + "step": 8350 + }, + { + "epoch": 5.34, + "learning_rate": 0.000534185303514377, + "loss": 0.2429, + "step": 8360 + }, + { + "epoch": 5.35, + "learning_rate": 0.0005348242811501598, + "loss": 0.1853, + "step": 8370 + }, + { + "epoch": 5.35, + "learning_rate": 0.0005354632587859425, + "loss": 0.2314, + "step": 8380 + }, + { + "epoch": 5.36, + "learning_rate": 0.0005361022364217253, + "loss": 0.2033, + "step": 8390 + }, + { + "epoch": 5.37, + "learning_rate": 0.000536741214057508, + "loss": 0.2227, + "step": 8400 + }, + { + "epoch": 5.37, + "learning_rate": 0.0005373801916932908, + "loss": 0.2243, + "step": 8410 + }, + { + "epoch": 5.38, + "learning_rate": 0.0005380191693290735, + "loss": 0.2322, + "step": 8420 + }, + { + "epoch": 5.39, + "learning_rate": 0.0005386581469648563, + "loss": 0.2103, + "step": 8430 + }, + { + "epoch": 5.39, + "learning_rate": 0.0005392971246006389, + "loss": 0.2361, + "step": 8440 + }, + { + "epoch": 5.4, + "learning_rate": 0.0005399361022364218, + "loss": 0.2518, + "step": 8450 + }, + { + "epoch": 5.41, + "learning_rate": 0.0005405750798722044, + "loss": 0.2079, + "step": 8460 + }, + { + "epoch": 5.41, + "learning_rate": 0.0005412140575079872, + "loss": 0.2353, + "step": 8470 + }, + { + "epoch": 5.42, + "learning_rate": 0.00054185303514377, + "loss": 0.2471, + "step": 8480 + }, + { + "epoch": 5.42, + "learning_rate": 0.0005424920127795527, + "loss": 0.2506, + "step": 8490 + }, + { + "epoch": 5.43, + "learning_rate": 0.0005431309904153354, + "loss": 0.2277, + "step": 8500 + }, + { + "epoch": 5.44, + "learning_rate": 0.0005437699680511183, + "loss": 0.2754, + "step": 8510 + }, + { + "epoch": 5.44, + "learning_rate": 0.000544408945686901, + "loss": 0.2325, + "step": 8520 + }, + { + "epoch": 5.45, + "learning_rate": 0.0005450479233226837, + "loss": 0.2329, + "step": 8530 + }, + { + "epoch": 5.46, + "learning_rate": 0.0005456869009584666, + "loss": 0.2542, + "step": 8540 + }, + { + "epoch": 5.46, + "learning_rate": 0.0005463258785942492, + "loss": 0.2054, + "step": 8550 + }, + { + "epoch": 5.47, + "learning_rate": 0.000546964856230032, + "loss": 0.2109, + "step": 8560 + }, + { + "epoch": 5.48, + "learning_rate": 0.0005476038338658147, + "loss": 0.2119, + "step": 8570 + }, + { + "epoch": 5.48, + "learning_rate": 0.0005482428115015975, + "loss": 0.2293, + "step": 8580 + }, + { + "epoch": 5.49, + "learning_rate": 0.0005488817891373802, + "loss": 0.2151, + "step": 8590 + }, + { + "epoch": 5.5, + "learning_rate": 0.0005495207667731629, + "loss": 0.2247, + "step": 8600 + }, + { + "epoch": 5.5, + "learning_rate": 0.0005501597444089457, + "loss": 0.2787, + "step": 8610 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005507987220447285, + "loss": 0.2135, + "step": 8620 + }, + { + "epoch": 5.51, + "learning_rate": 0.0005514376996805111, + "loss": 0.2429, + "step": 8630 + }, + { + "epoch": 5.52, + "learning_rate": 0.000552076677316294, + "loss": 0.224, + "step": 8640 + }, + { + "epoch": 5.53, + "learning_rate": 0.0005527156549520767, + "loss": 0.2559, + "step": 8650 + }, + { + "epoch": 5.53, + "learning_rate": 0.0005533546325878594, + "loss": 0.2006, + "step": 8660 + }, + { + "epoch": 5.54, + "learning_rate": 0.0005539936102236421, + "loss": 0.2098, + "step": 8670 + }, + { + "epoch": 5.55, + "learning_rate": 0.000554632587859425, + "loss": 0.2211, + "step": 8680 + }, + { + "epoch": 5.55, + "learning_rate": 0.0005552715654952076, + "loss": 0.2151, + "step": 8690 + }, + { + "epoch": 5.56, + "learning_rate": 0.0005559105431309904, + "loss": 0.2487, + "step": 8700 + }, + { + "epoch": 5.57, + "learning_rate": 0.0005565495207667732, + "loss": 0.1934, + "step": 8710 + }, + { + "epoch": 5.57, + "learning_rate": 0.0005571884984025559, + "loss": 0.237, + "step": 8720 + }, + { + "epoch": 5.58, + "learning_rate": 0.0005578274760383387, + "loss": 0.2356, + "step": 8730 + }, + { + "epoch": 5.58, + "learning_rate": 0.0005584664536741214, + "loss": 0.242, + "step": 8740 + }, + { + "epoch": 5.59, + "learning_rate": 0.0005591054313099042, + "loss": 0.2187, + "step": 8750 + }, + { + "epoch": 5.6, + "learning_rate": 0.0005597444089456869, + "loss": 0.2434, + "step": 8760 + }, + { + "epoch": 5.6, + "learning_rate": 0.0005603833865814697, + "loss": 0.2641, + "step": 8770 + }, + { + "epoch": 5.61, + "learning_rate": 0.0005610223642172524, + "loss": 0.2223, + "step": 8780 + }, + { + "epoch": 5.62, + "learning_rate": 0.0005616613418530352, + "loss": 0.2159, + "step": 8790 + }, + { + "epoch": 5.62, + "learning_rate": 0.0005623003194888178, + "loss": 0.233, + "step": 8800 + }, + { + "epoch": 5.63, + "learning_rate": 0.0005629392971246007, + "loss": 0.2623, + "step": 8810 + }, + { + "epoch": 5.64, + "learning_rate": 0.0005635782747603834, + "loss": 0.2424, + "step": 8820 + }, + { + "epoch": 5.64, + "learning_rate": 0.0005642172523961661, + "loss": 0.236, + "step": 8830 + }, + { + "epoch": 5.65, + "learning_rate": 0.0005648562300319489, + "loss": 0.2372, + "step": 8840 + }, + { + "epoch": 5.65, + "learning_rate": 0.0005654952076677317, + "loss": 0.2165, + "step": 8850 + }, + { + "epoch": 5.66, + "learning_rate": 0.0005661341853035143, + "loss": 0.2702, + "step": 8860 + }, + { + "epoch": 5.67, + "learning_rate": 0.0005667731629392972, + "loss": 0.2253, + "step": 8870 + }, + { + "epoch": 5.67, + "learning_rate": 0.0005674121405750799, + "loss": 0.2361, + "step": 8880 + }, + { + "epoch": 5.68, + "learning_rate": 0.0005680511182108626, + "loss": 0.2314, + "step": 8890 + }, + { + "epoch": 5.69, + "learning_rate": 0.0005686900958466453, + "loss": 0.2188, + "step": 8900 + }, + { + "epoch": 5.69, + "learning_rate": 0.0005693290734824281, + "loss": 0.2121, + "step": 8910 + }, + { + "epoch": 5.7, + "learning_rate": 0.0005699680511182109, + "loss": 0.208, + "step": 8920 + }, + { + "epoch": 5.71, + "learning_rate": 0.0005706070287539936, + "loss": 0.2587, + "step": 8930 + }, + { + "epoch": 5.71, + "learning_rate": 0.0005712460063897764, + "loss": 0.248, + "step": 8940 + }, + { + "epoch": 5.72, + "learning_rate": 0.0005718849840255591, + "loss": 0.2321, + "step": 8950 + }, + { + "epoch": 5.73, + "learning_rate": 0.0005725239616613419, + "loss": 0.2464, + "step": 8960 + }, + { + "epoch": 5.73, + "learning_rate": 0.0005731629392971246, + "loss": 0.2344, + "step": 8970 + }, + { + "epoch": 5.74, + "learning_rate": 0.0005738019169329074, + "loss": 0.2346, + "step": 8980 + }, + { + "epoch": 5.74, + "learning_rate": 0.0005744408945686901, + "loss": 0.2296, + "step": 8990 + }, + { + "epoch": 5.75, + "learning_rate": 0.0005750798722044729, + "loss": 0.2556, + "step": 9000 + }, + { + "epoch": 5.76, + "learning_rate": 0.0005757188498402556, + "loss": 0.2424, + "step": 9010 + }, + { + "epoch": 5.76, + "learning_rate": 0.0005763578274760384, + "loss": 0.2561, + "step": 9020 + }, + { + "epoch": 5.77, + "learning_rate": 0.000576996805111821, + "loss": 0.2339, + "step": 9030 + }, + { + "epoch": 5.78, + "learning_rate": 0.0005776357827476039, + "loss": 0.2646, + "step": 9040 + }, + { + "epoch": 5.78, + "learning_rate": 0.0005782747603833865, + "loss": 0.2297, + "step": 9050 + }, + { + "epoch": 5.79, + "learning_rate": 0.0005789137380191693, + "loss": 0.1751, + "step": 9060 + }, + { + "epoch": 5.8, + "learning_rate": 0.0005795527156549521, + "loss": 0.2185, + "step": 9070 + }, + { + "epoch": 5.8, + "learning_rate": 0.0005801916932907348, + "loss": 0.2366, + "step": 9080 + }, + { + "epoch": 5.81, + "learning_rate": 0.0005808306709265175, + "loss": 0.2377, + "step": 9090 + }, + { + "epoch": 5.81, + "learning_rate": 0.0005814696485623004, + "loss": 0.2242, + "step": 9100 + }, + { + "epoch": 5.82, + "learning_rate": 0.000582108626198083, + "loss": 0.2487, + "step": 9110 + }, + { + "epoch": 5.83, + "learning_rate": 0.0005827476038338658, + "loss": 0.2498, + "step": 9120 + }, + { + "epoch": 5.83, + "learning_rate": 0.0005833865814696487, + "loss": 0.21, + "step": 9130 + }, + { + "epoch": 5.84, + "learning_rate": 0.0005840255591054313, + "loss": 0.2414, + "step": 9140 + }, + { + "epoch": 5.85, + "learning_rate": 0.0005846645367412141, + "loss": 0.2372, + "step": 9150 + }, + { + "epoch": 5.85, + "learning_rate": 0.0005853035143769969, + "loss": 0.2066, + "step": 9160 + }, + { + "epoch": 5.86, + "learning_rate": 0.0005859424920127796, + "loss": 0.2338, + "step": 9170 + }, + { + "epoch": 5.87, + "learning_rate": 0.0005865814696485623, + "loss": 0.2372, + "step": 9180 + }, + { + "epoch": 5.87, + "learning_rate": 0.0005872204472843451, + "loss": 0.2528, + "step": 9190 + }, + { + "epoch": 5.88, + "learning_rate": 0.0005878594249201278, + "loss": 0.2418, + "step": 9200 + }, + { + "epoch": 5.88, + "learning_rate": 0.0005884984025559106, + "loss": 0.2089, + "step": 9210 + }, + { + "epoch": 5.89, + "learning_rate": 0.0005891373801916932, + "loss": 0.2637, + "step": 9220 + }, + { + "epoch": 5.9, + "learning_rate": 0.0005897763578274761, + "loss": 0.262, + "step": 9230 + }, + { + "epoch": 5.9, + "learning_rate": 0.0005904153354632588, + "loss": 0.2451, + "step": 9240 + }, + { + "epoch": 5.91, + "learning_rate": 0.0005910543130990415, + "loss": 0.2283, + "step": 9250 + }, + { + "epoch": 5.92, + "learning_rate": 0.0005916932907348243, + "loss": 0.227, + "step": 9260 + }, + { + "epoch": 5.92, + "learning_rate": 0.0005923322683706071, + "loss": 0.2483, + "step": 9270 + }, + { + "epoch": 5.93, + "learning_rate": 0.0005929712460063897, + "loss": 0.2338, + "step": 9280 + }, + { + "epoch": 5.94, + "learning_rate": 0.0005936102236421725, + "loss": 0.2519, + "step": 9290 + }, + { + "epoch": 5.94, + "learning_rate": 0.0005942492012779553, + "loss": 0.2135, + "step": 9300 + }, + { + "epoch": 5.95, + "learning_rate": 0.000594888178913738, + "loss": 0.2144, + "step": 9310 + }, + { + "epoch": 5.95, + "learning_rate": 0.0005955271565495208, + "loss": 0.2324, + "step": 9320 + }, + { + "epoch": 5.96, + "learning_rate": 0.0005961661341853036, + "loss": 0.2121, + "step": 9330 + }, + { + "epoch": 5.97, + "learning_rate": 0.0005968051118210863, + "loss": 0.2149, + "step": 9340 + }, + { + "epoch": 5.97, + "learning_rate": 0.000597444089456869, + "loss": 0.2414, + "step": 9350 + }, + { + "epoch": 5.98, + "learning_rate": 0.0005980830670926518, + "loss": 0.2348, + "step": 9360 + }, + { + "epoch": 5.99, + "learning_rate": 0.0005987220447284345, + "loss": 0.2711, + "step": 9370 + }, + { + "epoch": 5.99, + "learning_rate": 0.0005993610223642173, + "loss": 0.2151, + "step": 9380 + }, + { + "epoch": 6.0, + "learning_rate": 0.0006, + "loss": 0.2554, + "step": 9390 + }, + { + "epoch": 6.01, + "learning_rate": 0.0006006389776357828, + "loss": 0.1843, + "step": 9400 + }, + { + "epoch": 6.01, + "learning_rate": 0.0006012779552715655, + "loss": 0.2518, + "step": 9410 + }, + { + "epoch": 6.02, + "learning_rate": 0.0006019169329073482, + "loss": 0.2017, + "step": 9420 + }, + { + "epoch": 6.03, + "learning_rate": 0.000602555910543131, + "loss": 0.2016, + "step": 9430 + }, + { + "epoch": 6.03, + "learning_rate": 0.0006031948881789138, + "loss": 0.1955, + "step": 9440 + }, + { + "epoch": 6.04, + "learning_rate": 0.0006038338658146964, + "loss": 0.2106, + "step": 9450 + }, + { + "epoch": 6.04, + "learning_rate": 0.0006044728434504793, + "loss": 0.2085, + "step": 9460 + }, + { + "epoch": 6.05, + "learning_rate": 0.000605111821086262, + "loss": 0.2208, + "step": 9470 + }, + { + "epoch": 6.06, + "learning_rate": 0.0006057507987220447, + "loss": 0.2107, + "step": 9480 + }, + { + "epoch": 6.06, + "learning_rate": 0.0006063897763578275, + "loss": 0.2016, + "step": 9490 + }, + { + "epoch": 6.07, + "learning_rate": 0.0006070287539936102, + "loss": 0.2203, + "step": 9500 + }, + { + "epoch": 6.08, + "learning_rate": 0.000607667731629393, + "loss": 0.1773, + "step": 9510 + }, + { + "epoch": 6.08, + "learning_rate": 0.0006083067092651758, + "loss": 0.2162, + "step": 9520 + }, + { + "epoch": 6.09, + "learning_rate": 0.0006089456869009585, + "loss": 0.1805, + "step": 9530 + }, + { + "epoch": 6.1, + "learning_rate": 0.0006095846645367412, + "loss": 0.275, + "step": 9540 + }, + { + "epoch": 6.1, + "learning_rate": 0.000610223642172524, + "loss": 0.2398, + "step": 9550 + }, + { + "epoch": 6.11, + "learning_rate": 0.0006108626198083067, + "loss": 0.1857, + "step": 9560 + }, + { + "epoch": 6.11, + "learning_rate": 0.0006115015974440895, + "loss": 0.2261, + "step": 9570 + }, + { + "epoch": 6.12, + "learning_rate": 0.0006121405750798722, + "loss": 0.1816, + "step": 9580 + }, + { + "epoch": 6.13, + "learning_rate": 0.000612779552715655, + "loss": 0.212, + "step": 9590 + }, + { + "epoch": 6.13, + "learning_rate": 0.0006134185303514377, + "loss": 0.2027, + "step": 9600 + }, + { + "epoch": 6.14, + "learning_rate": 0.0006140575079872205, + "loss": 0.2054, + "step": 9610 + }, + { + "epoch": 6.15, + "learning_rate": 0.0006146964856230032, + "loss": 0.2073, + "step": 9620 + }, + { + "epoch": 6.15, + "learning_rate": 0.000615335463258786, + "loss": 0.199, + "step": 9630 + }, + { + "epoch": 6.16, + "learning_rate": 0.0006159744408945687, + "loss": 0.2037, + "step": 9640 + }, + { + "epoch": 6.17, + "learning_rate": 0.0006166134185303514, + "loss": 0.1966, + "step": 9650 + }, + { + "epoch": 6.17, + "learning_rate": 0.0006172523961661342, + "loss": 0.2098, + "step": 9660 + }, + { + "epoch": 6.18, + "learning_rate": 0.0006178913738019169, + "loss": 0.2208, + "step": 9670 + }, + { + "epoch": 6.19, + "learning_rate": 0.0006185303514376996, + "loss": 0.1932, + "step": 9680 + }, + { + "epoch": 6.19, + "learning_rate": 0.0006191693290734825, + "loss": 0.2283, + "step": 9690 + }, + { + "epoch": 6.2, + "learning_rate": 0.0006198083067092651, + "loss": 0.2247, + "step": 9700 + }, + { + "epoch": 6.2, + "learning_rate": 0.0006204472843450479, + "loss": 0.2131, + "step": 9710 + }, + { + "epoch": 6.21, + "learning_rate": 0.0006210862619808308, + "loss": 0.1977, + "step": 9720 + }, + { + "epoch": 6.22, + "learning_rate": 0.0006217252396166134, + "loss": 0.2284, + "step": 9730 + }, + { + "epoch": 6.22, + "learning_rate": 0.0006223642172523962, + "loss": 0.2088, + "step": 9740 + }, + { + "epoch": 6.23, + "learning_rate": 0.000623003194888179, + "loss": 0.238, + "step": 9750 + }, + { + "epoch": 6.24, + "learning_rate": 0.0006236421725239617, + "loss": 0.198, + "step": 9760 + }, + { + "epoch": 6.24, + "learning_rate": 0.0006242811501597444, + "loss": 0.2132, + "step": 9770 + }, + { + "epoch": 6.25, + "learning_rate": 0.0006249201277955273, + "loss": 0.2107, + "step": 9780 + }, + { + "epoch": 6.26, + "learning_rate": 0.0006255591054313099, + "loss": 0.1984, + "step": 9790 + }, + { + "epoch": 6.26, + "learning_rate": 0.0006261980830670927, + "loss": 0.2316, + "step": 9800 + }, + { + "epoch": 6.27, + "learning_rate": 0.0006268370607028753, + "loss": 0.2404, + "step": 9810 + }, + { + "epoch": 6.27, + "learning_rate": 0.0006274760383386582, + "loss": 0.227, + "step": 9820 + }, + { + "epoch": 6.28, + "learning_rate": 0.0006281150159744409, + "loss": 0.1707, + "step": 9830 + }, + { + "epoch": 6.29, + "learning_rate": 0.0006287539936102236, + "loss": 0.2225, + "step": 9840 + }, + { + "epoch": 6.29, + "learning_rate": 0.0006293929712460064, + "loss": 0.2663, + "step": 9850 + }, + { + "epoch": 6.3, + "learning_rate": 0.0006300319488817892, + "loss": 0.2101, + "step": 9860 + }, + { + "epoch": 6.31, + "learning_rate": 0.0006306709265175718, + "loss": 0.2007, + "step": 9870 + }, + { + "epoch": 6.31, + "learning_rate": 0.0006313099041533547, + "loss": 0.2094, + "step": 9880 + }, + { + "epoch": 6.32, + "learning_rate": 0.0006319488817891374, + "loss": 0.2393, + "step": 9890 + }, + { + "epoch": 6.33, + "learning_rate": 0.0006325878594249201, + "loss": 0.2459, + "step": 9900 + }, + { + "epoch": 6.33, + "learning_rate": 0.000633226837060703, + "loss": 0.195, + "step": 9910 + }, + { + "epoch": 6.34, + "learning_rate": 0.0006338658146964857, + "loss": 0.1721, + "step": 9920 + }, + { + "epoch": 6.34, + "learning_rate": 0.0006345047923322684, + "loss": 0.2254, + "step": 9930 + }, + { + "epoch": 6.35, + "learning_rate": 0.0006351437699680511, + "loss": 0.2252, + "step": 9940 + }, + { + "epoch": 6.36, + "learning_rate": 0.000635782747603834, + "loss": 0.1812, + "step": 9950 + }, + { + "epoch": 6.36, + "learning_rate": 0.0006364217252396166, + "loss": 0.2133, + "step": 9960 + }, + { + "epoch": 6.37, + "learning_rate": 0.0006370607028753994, + "loss": 0.2453, + "step": 9970 + }, + { + "epoch": 6.38, + "learning_rate": 0.000637699680511182, + "loss": 0.2257, + "step": 9980 + }, + { + "epoch": 6.38, + "learning_rate": 0.0006383386581469649, + "loss": 0.2017, + "step": 9990 + }, + { + "epoch": 6.39, + "learning_rate": 0.0006389776357827476, + "loss": 0.2141, + "step": 10000 + }, + { + "epoch": 6.4, + "learning_rate": 0.0006396166134185303, + "loss": 0.221, + "step": 10010 + }, + { + "epoch": 6.4, + "learning_rate": 0.0006402555910543131, + "loss": 0.2082, + "step": 10020 + }, + { + "epoch": 6.41, + "learning_rate": 0.0006408945686900959, + "loss": 0.2305, + "step": 10030 + }, + { + "epoch": 6.42, + "learning_rate": 0.0006415335463258785, + "loss": 0.1989, + "step": 10040 + }, + { + "epoch": 6.42, + "learning_rate": 0.0006421725239616614, + "loss": 0.2114, + "step": 10050 + }, + { + "epoch": 6.43, + "learning_rate": 0.0006428115015974441, + "loss": 0.244, + "step": 10060 + }, + { + "epoch": 6.43, + "learning_rate": 0.0006434504792332268, + "loss": 0.2128, + "step": 10070 + }, + { + "epoch": 6.44, + "learning_rate": 0.0006440894568690096, + "loss": 0.2411, + "step": 10080 + }, + { + "epoch": 6.45, + "learning_rate": 0.0006447284345047924, + "loss": 0.2089, + "step": 10090 + }, + { + "epoch": 6.45, + "learning_rate": 0.000645367412140575, + "loss": 0.2048, + "step": 10100 + }, + { + "epoch": 6.46, + "learning_rate": 0.0006460063897763579, + "loss": 0.2081, + "step": 10110 + }, + { + "epoch": 6.47, + "learning_rate": 0.0006466453674121406, + "loss": 0.2285, + "step": 10120 + }, + { + "epoch": 6.47, + "learning_rate": 0.0006472843450479233, + "loss": 0.217, + "step": 10130 + }, + { + "epoch": 6.48, + "learning_rate": 0.0006479233226837062, + "loss": 0.2066, + "step": 10140 + }, + { + "epoch": 6.49, + "learning_rate": 0.0006485623003194888, + "loss": 0.2274, + "step": 10150 + }, + { + "epoch": 6.49, + "learning_rate": 0.0006492012779552716, + "loss": 0.2116, + "step": 10160 + }, + { + "epoch": 6.5, + "learning_rate": 0.0006498402555910543, + "loss": 0.2406, + "step": 10170 + }, + { + "epoch": 6.5, + "learning_rate": 0.0006504792332268371, + "loss": 0.2094, + "step": 10180 + }, + { + "epoch": 6.51, + "learning_rate": 0.0006511182108626198, + "loss": 0.2406, + "step": 10190 + }, + { + "epoch": 6.52, + "learning_rate": 0.0006517571884984026, + "loss": 0.2134, + "step": 10200 + }, + { + "epoch": 6.52, + "learning_rate": 0.0006523961661341853, + "loss": 0.2337, + "step": 10210 + }, + { + "epoch": 6.53, + "learning_rate": 0.0006530351437699681, + "loss": 0.2169, + "step": 10220 + }, + { + "epoch": 6.54, + "learning_rate": 0.0006536741214057508, + "loss": 0.1976, + "step": 10230 + }, + { + "epoch": 6.54, + "learning_rate": 0.0006543130990415336, + "loss": 0.2305, + "step": 10240 + }, + { + "epoch": 6.55, + "learning_rate": 0.0006549520766773163, + "loss": 0.2315, + "step": 10250 + }, + { + "epoch": 6.56, + "learning_rate": 0.000655591054313099, + "loss": 0.2116, + "step": 10260 + }, + { + "epoch": 6.56, + "learning_rate": 0.0006562300319488817, + "loss": 0.2238, + "step": 10270 + }, + { + "epoch": 6.57, + "learning_rate": 0.0006568690095846646, + "loss": 0.1958, + "step": 10280 + }, + { + "epoch": 6.57, + "learning_rate": 0.0006575079872204472, + "loss": 0.2142, + "step": 10290 + }, + { + "epoch": 6.58, + "learning_rate": 0.00065814696485623, + "loss": 0.2262, + "step": 10300 + }, + { + "epoch": 6.59, + "learning_rate": 0.0006587859424920129, + "loss": 0.2009, + "step": 10310 + }, + { + "epoch": 6.59, + "learning_rate": 0.0006594249201277955, + "loss": 0.2405, + "step": 10320 + }, + { + "epoch": 6.6, + "learning_rate": 0.0006600638977635783, + "loss": 0.239, + "step": 10330 + }, + { + "epoch": 6.61, + "learning_rate": 0.0006607028753993611, + "loss": 0.2234, + "step": 10340 + }, + { + "epoch": 6.61, + "learning_rate": 0.0006613418530351438, + "loss": 0.2276, + "step": 10350 + }, + { + "epoch": 6.62, + "learning_rate": 0.0006619808306709265, + "loss": 0.2201, + "step": 10360 + }, + { + "epoch": 6.63, + "learning_rate": 0.0006626198083067094, + "loss": 0.2277, + "step": 10370 + }, + { + "epoch": 6.63, + "learning_rate": 0.000663258785942492, + "loss": 0.2399, + "step": 10380 + }, + { + "epoch": 6.64, + "learning_rate": 0.0006638977635782748, + "loss": 0.2097, + "step": 10390 + }, + { + "epoch": 6.65, + "learning_rate": 0.0006645367412140575, + "loss": 0.2211, + "step": 10400 + }, + { + "epoch": 6.65, + "learning_rate": 0.0006651757188498403, + "loss": 0.2249, + "step": 10410 + }, + { + "epoch": 6.66, + "learning_rate": 0.000665814696485623, + "loss": 0.223, + "step": 10420 + }, + { + "epoch": 6.66, + "learning_rate": 0.0006664536741214057, + "loss": 0.2492, + "step": 10430 + }, + { + "epoch": 6.67, + "learning_rate": 0.0006670926517571885, + "loss": 0.1998, + "step": 10440 + }, + { + "epoch": 6.68, + "learning_rate": 0.0006677316293929713, + "loss": 0.2508, + "step": 10450 + }, + { + "epoch": 6.68, + "learning_rate": 0.0006683706070287539, + "loss": 0.2545, + "step": 10460 + }, + { + "epoch": 6.69, + "learning_rate": 0.0006690095846645368, + "loss": 0.2027, + "step": 10470 + }, + { + "epoch": 6.7, + "learning_rate": 0.0006696485623003195, + "loss": 0.2344, + "step": 10480 + }, + { + "epoch": 6.7, + "learning_rate": 0.0006702875399361022, + "loss": 0.2268, + "step": 10490 + }, + { + "epoch": 6.71, + "learning_rate": 0.0006709265175718851, + "loss": 0.2414, + "step": 10500 + }, + { + "epoch": 6.72, + "learning_rate": 0.0006715654952076678, + "loss": 0.2263, + "step": 10510 + }, + { + "epoch": 6.72, + "learning_rate": 0.0006722044728434505, + "loss": 0.1974, + "step": 10520 + }, + { + "epoch": 6.73, + "learning_rate": 0.0006728434504792332, + "loss": 0.2196, + "step": 10530 + }, + { + "epoch": 6.73, + "learning_rate": 0.0006734824281150161, + "loss": 0.1958, + "step": 10540 + }, + { + "epoch": 6.74, + "learning_rate": 0.0006741214057507987, + "loss": 0.2002, + "step": 10550 + }, + { + "epoch": 6.75, + "learning_rate": 0.0006747603833865815, + "loss": 0.2186, + "step": 10560 + }, + { + "epoch": 6.75, + "learning_rate": 0.0006753993610223642, + "loss": 0.2254, + "step": 10570 + }, + { + "epoch": 6.76, + "learning_rate": 0.000676038338658147, + "loss": 0.1954, + "step": 10580 + }, + { + "epoch": 6.77, + "learning_rate": 0.0006766773162939297, + "loss": 0.2413, + "step": 10590 + }, + { + "epoch": 6.77, + "learning_rate": 0.0006773162939297125, + "loss": 0.2373, + "step": 10600 + }, + { + "epoch": 6.78, + "learning_rate": 0.0006779552715654952, + "loss": 0.2389, + "step": 10610 + }, + { + "epoch": 6.79, + "learning_rate": 0.000678594249201278, + "loss": 0.203, + "step": 10620 + }, + { + "epoch": 6.79, + "learning_rate": 0.0006792332268370606, + "loss": 0.2357, + "step": 10630 + }, + { + "epoch": 6.8, + "learning_rate": 0.0006798722044728435, + "loss": 0.2449, + "step": 10640 + }, + { + "epoch": 6.8, + "learning_rate": 0.0006805111821086262, + "loss": 0.2056, + "step": 10650 + }, + { + "epoch": 6.81, + "learning_rate": 0.0006811501597444089, + "loss": 0.2524, + "step": 10660 + }, + { + "epoch": 6.82, + "learning_rate": 0.0006817891373801917, + "loss": 0.2157, + "step": 10670 + }, + { + "epoch": 6.82, + "learning_rate": 0.0006824281150159745, + "loss": 0.1798, + "step": 10680 + }, + { + "epoch": 6.83, + "learning_rate": 0.0006830670926517571, + "loss": 0.1969, + "step": 10690 + }, + { + "epoch": 6.84, + "learning_rate": 0.00068370607028754, + "loss": 0.2223, + "step": 10700 + }, + { + "epoch": 6.84, + "learning_rate": 0.0006843450479233228, + "loss": 0.2193, + "step": 10710 + }, + { + "epoch": 6.85, + "learning_rate": 0.0006849840255591054, + "loss": 0.2223, + "step": 10720 + }, + { + "epoch": 6.86, + "learning_rate": 0.0006856230031948883, + "loss": 0.2113, + "step": 10730 + }, + { + "epoch": 6.86, + "learning_rate": 0.0006862619808306709, + "loss": 0.2488, + "step": 10740 + }, + { + "epoch": 6.87, + "learning_rate": 0.0006869009584664537, + "loss": 0.2308, + "step": 10750 + }, + { + "epoch": 6.88, + "learning_rate": 0.0006875399361022364, + "loss": 0.2283, + "step": 10760 + }, + { + "epoch": 6.88, + "learning_rate": 0.0006881789137380192, + "loss": 0.2672, + "step": 10770 + }, + { + "epoch": 6.89, + "learning_rate": 0.0006888178913738019, + "loss": 0.2538, + "step": 10780 + }, + { + "epoch": 6.89, + "learning_rate": 0.0006894568690095847, + "loss": 0.2907, + "step": 10790 + }, + { + "epoch": 6.9, + "learning_rate": 0.0006900958466453674, + "loss": 0.1976, + "step": 10800 + }, + { + "epoch": 6.91, + "learning_rate": 0.0006907348242811502, + "loss": 0.2619, + "step": 10810 + }, + { + "epoch": 6.91, + "learning_rate": 0.0006913738019169329, + "loss": 0.1975, + "step": 10820 + }, + { + "epoch": 6.92, + "learning_rate": 0.0006920127795527157, + "loss": 0.1838, + "step": 10830 + }, + { + "epoch": 6.93, + "learning_rate": 0.0006926517571884984, + "loss": 0.2242, + "step": 10840 + }, + { + "epoch": 6.93, + "learning_rate": 0.0006932907348242812, + "loss": 0.2057, + "step": 10850 + }, + { + "epoch": 6.94, + "learning_rate": 0.0006939297124600638, + "loss": 0.2116, + "step": 10860 + }, + { + "epoch": 6.95, + "learning_rate": 0.0006945686900958467, + "loss": 0.2289, + "step": 10870 + }, + { + "epoch": 6.95, + "learning_rate": 0.0006952076677316293, + "loss": 0.202, + "step": 10880 + }, + { + "epoch": 6.96, + "learning_rate": 0.0006958466453674121, + "loss": 0.2221, + "step": 10890 + }, + { + "epoch": 6.96, + "learning_rate": 0.000696485623003195, + "loss": 0.237, + "step": 10900 + }, + { + "epoch": 6.97, + "learning_rate": 0.0006971246006389776, + "loss": 0.3199, + "step": 10910 + }, + { + "epoch": 6.98, + "learning_rate": 0.0006977635782747604, + "loss": 0.2029, + "step": 10920 + }, + { + "epoch": 6.98, + "learning_rate": 0.0006984025559105432, + "loss": 0.223, + "step": 10930 + }, + { + "epoch": 6.99, + "learning_rate": 0.0006990415335463259, + "loss": 0.216, + "step": 10940 + }, + { + "epoch": 7.0, + "learning_rate": 0.0006996805111821086, + "loss": 0.1952, + "step": 10950 + }, + { + "epoch": 7.0, + "learning_rate": 0.0007003194888178915, + "loss": 0.2173, + "step": 10960 + }, + { + "epoch": 7.01, + "learning_rate": 0.0007009584664536741, + "loss": 0.1895, + "step": 10970 + }, + { + "epoch": 7.02, + "learning_rate": 0.0007015974440894569, + "loss": 0.1721, + "step": 10980 + }, + { + "epoch": 7.02, + "learning_rate": 0.0007022364217252397, + "loss": 0.1872, + "step": 10990 + }, + { + "epoch": 7.03, + "learning_rate": 0.0007028753993610224, + "loss": 0.1846, + "step": 11000 + }, + { + "epoch": 7.04, + "learning_rate": 0.0007035143769968051, + "loss": 0.207, + "step": 11010 + }, + { + "epoch": 7.04, + "learning_rate": 0.0007041533546325878, + "loss": 0.2032, + "step": 11020 + }, + { + "epoch": 7.05, + "learning_rate": 0.0007047923322683706, + "loss": 0.1662, + "step": 11030 + }, + { + "epoch": 7.05, + "learning_rate": 0.0007054313099041534, + "loss": 0.167, + "step": 11040 + }, + { + "epoch": 7.06, + "learning_rate": 0.000706070287539936, + "loss": 0.2351, + "step": 11050 + }, + { + "epoch": 7.07, + "learning_rate": 0.0007067092651757189, + "loss": 0.1653, + "step": 11060 + }, + { + "epoch": 7.07, + "learning_rate": 0.0007073482428115016, + "loss": 0.1826, + "step": 11070 + }, + { + "epoch": 7.08, + "learning_rate": 0.0007079872204472843, + "loss": 0.2162, + "step": 11080 + }, + { + "epoch": 7.09, + "learning_rate": 0.000708626198083067, + "loss": 0.1853, + "step": 11090 + }, + { + "epoch": 7.09, + "learning_rate": 0.0007092651757188499, + "loss": 0.1865, + "step": 11100 + }, + { + "epoch": 7.1, + "learning_rate": 0.0007099041533546326, + "loss": 0.1868, + "step": 11110 + }, + { + "epoch": 7.11, + "learning_rate": 0.0007105431309904153, + "loss": 0.1628, + "step": 11120 + }, + { + "epoch": 7.11, + "learning_rate": 0.0007111821086261982, + "loss": 0.1975, + "step": 11130 + }, + { + "epoch": 7.12, + "learning_rate": 0.0007118210862619808, + "loss": 0.1853, + "step": 11140 + }, + { + "epoch": 7.12, + "learning_rate": 0.0007124600638977636, + "loss": 0.2046, + "step": 11150 + }, + { + "epoch": 7.13, + "learning_rate": 0.0007130990415335464, + "loss": 0.1966, + "step": 11160 + }, + { + "epoch": 7.14, + "learning_rate": 0.0007137380191693291, + "loss": 0.2022, + "step": 11170 + }, + { + "epoch": 7.14, + "learning_rate": 0.0007143769968051118, + "loss": 0.1635, + "step": 11180 + }, + { + "epoch": 7.15, + "learning_rate": 0.0007150159744408946, + "loss": 0.2223, + "step": 11190 + }, + { + "epoch": 7.16, + "learning_rate": 0.0007156549520766773, + "loss": 0.2094, + "step": 11200 + }, + { + "epoch": 7.16, + "learning_rate": 0.0007162939297124601, + "loss": 0.1938, + "step": 11210 + }, + { + "epoch": 7.17, + "learning_rate": 0.0007169329073482428, + "loss": 0.2053, + "step": 11220 + }, + { + "epoch": 7.18, + "learning_rate": 0.0007175718849840256, + "loss": 0.1981, + "step": 11230 + }, + { + "epoch": 7.18, + "learning_rate": 0.0007182108626198083, + "loss": 0.1969, + "step": 11240 + }, + { + "epoch": 7.19, + "learning_rate": 0.000718849840255591, + "loss": 0.1912, + "step": 11250 + }, + { + "epoch": 7.19, + "learning_rate": 0.0007194888178913738, + "loss": 0.1875, + "step": 11260 + }, + { + "epoch": 7.2, + "learning_rate": 0.0007201277955271566, + "loss": 0.1944, + "step": 11270 + }, + { + "epoch": 7.21, + "learning_rate": 0.0007207667731629392, + "loss": 0.2254, + "step": 11280 + }, + { + "epoch": 7.21, + "learning_rate": 0.0007214057507987221, + "loss": 0.2204, + "step": 11290 + }, + { + "epoch": 7.22, + "learning_rate": 0.0007220447284345049, + "loss": 0.2089, + "step": 11300 + }, + { + "epoch": 7.23, + "learning_rate": 0.0007226837060702875, + "loss": 0.2098, + "step": 11310 + }, + { + "epoch": 7.23, + "learning_rate": 0.0007233226837060704, + "loss": 0.2107, + "step": 11320 + }, + { + "epoch": 7.24, + "learning_rate": 0.000723961661341853, + "loss": 0.2035, + "step": 11330 + }, + { + "epoch": 7.25, + "learning_rate": 0.0007246006389776358, + "loss": 0.2098, + "step": 11340 + }, + { + "epoch": 7.25, + "learning_rate": 0.0007252396166134186, + "loss": 0.2332, + "step": 11350 + }, + { + "epoch": 7.26, + "learning_rate": 0.0007258785942492013, + "loss": 0.1978, + "step": 11360 + }, + { + "epoch": 7.27, + "learning_rate": 0.000726517571884984, + "loss": 0.1873, + "step": 11370 + }, + { + "epoch": 7.27, + "learning_rate": 0.0007271565495207669, + "loss": 0.1933, + "step": 11380 + }, + { + "epoch": 7.28, + "learning_rate": 0.0007277955271565495, + "loss": 0.2329, + "step": 11390 + }, + { + "epoch": 7.28, + "learning_rate": 0.0007284345047923323, + "loss": 0.1914, + "step": 11400 + }, + { + "epoch": 7.29, + "learning_rate": 0.000729073482428115, + "loss": 0.1788, + "step": 11410 + }, + { + "epoch": 7.3, + "learning_rate": 0.0007297124600638978, + "loss": 0.2233, + "step": 11420 + }, + { + "epoch": 7.3, + "learning_rate": 0.0007303514376996805, + "loss": 0.2045, + "step": 11430 + }, + { + "epoch": 7.31, + "learning_rate": 0.0007309904153354633, + "loss": 0.1945, + "step": 11440 + }, + { + "epoch": 7.32, + "learning_rate": 0.000731629392971246, + "loss": 0.2035, + "step": 11450 + }, + { + "epoch": 7.32, + "learning_rate": 0.0007322683706070288, + "loss": 0.1888, + "step": 11460 + }, + { + "epoch": 7.33, + "learning_rate": 0.0007329073482428114, + "loss": 0.2102, + "step": 11470 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007335463258785943, + "loss": 0.2422, + "step": 11480 + }, + { + "epoch": 7.34, + "learning_rate": 0.0007341853035143771, + "loss": 0.2067, + "step": 11490 + }, + { + "epoch": 7.35, + "learning_rate": 0.0007348242811501597, + "loss": 0.2258, + "step": 11500 + }, + { + "epoch": 7.35, + "learning_rate": 0.0007354632587859425, + "loss": 0.1953, + "step": 11510 + }, + { + "epoch": 7.36, + "learning_rate": 0.0007361022364217253, + "loss": 0.2008, + "step": 11520 + }, + { + "epoch": 7.37, + "learning_rate": 0.000736741214057508, + "loss": 0.1799, + "step": 11530 + }, + { + "epoch": 7.37, + "learning_rate": 0.0007373801916932907, + "loss": 0.1961, + "step": 11540 + }, + { + "epoch": 7.38, + "learning_rate": 0.0007380191693290736, + "loss": 0.2147, + "step": 11550 + }, + { + "epoch": 7.39, + "learning_rate": 0.0007386581469648562, + "loss": 0.2173, + "step": 11560 + }, + { + "epoch": 7.39, + "learning_rate": 0.000739297124600639, + "loss": 0.2015, + "step": 11570 + }, + { + "epoch": 7.4, + "learning_rate": 0.0007399361022364218, + "loss": 0.2245, + "step": 11580 + }, + { + "epoch": 7.41, + "learning_rate": 0.0007405750798722045, + "loss": 0.2001, + "step": 11590 + }, + { + "epoch": 7.41, + "learning_rate": 0.0007412140575079872, + "loss": 0.2022, + "step": 11600 + }, + { + "epoch": 7.42, + "learning_rate": 0.0007418530351437701, + "loss": 0.2094, + "step": 11610 + }, + { + "epoch": 7.42, + "learning_rate": 0.0007424920127795527, + "loss": 0.2429, + "step": 11620 + }, + { + "epoch": 7.43, + "learning_rate": 0.0007431309904153355, + "loss": 0.174, + "step": 11630 + }, + { + "epoch": 7.44, + "learning_rate": 0.0007437699680511181, + "loss": 0.2271, + "step": 11640 + }, + { + "epoch": 7.44, + "learning_rate": 0.000744408945686901, + "loss": 0.1932, + "step": 11650 + }, + { + "epoch": 7.45, + "learning_rate": 0.0007450479233226837, + "loss": 0.1983, + "step": 11660 + }, + { + "epoch": 7.46, + "learning_rate": 0.0007456869009584664, + "loss": 0.1996, + "step": 11670 + }, + { + "epoch": 7.46, + "learning_rate": 0.0007463258785942492, + "loss": 0.2084, + "step": 11680 + }, + { + "epoch": 7.47, + "learning_rate": 0.000746964856230032, + "loss": 0.2157, + "step": 11690 + }, + { + "epoch": 7.48, + "learning_rate": 0.0007476038338658147, + "loss": 0.2366, + "step": 11700 + }, + { + "epoch": 7.48, + "learning_rate": 0.0007482428115015975, + "loss": 0.2104, + "step": 11710 + }, + { + "epoch": 7.49, + "learning_rate": 0.0007488817891373803, + "loss": 0.2164, + "step": 11720 + }, + { + "epoch": 7.5, + "learning_rate": 0.0007495207667731629, + "loss": 0.1951, + "step": 11730 + }, + { + "epoch": 7.5, + "learning_rate": 0.0007501597444089458, + "loss": 0.2207, + "step": 11740 + }, + { + "epoch": 7.51, + "learning_rate": 0.0007507987220447285, + "loss": 0.1786, + "step": 11750 + }, + { + "epoch": 7.51, + "learning_rate": 0.0007514376996805112, + "loss": 0.2087, + "step": 11760 + }, + { + "epoch": 7.52, + "learning_rate": 0.0007520766773162939, + "loss": 0.2059, + "step": 11770 + }, + { + "epoch": 7.53, + "learning_rate": 0.0007527156549520767, + "loss": 0.2304, + "step": 11780 + }, + { + "epoch": 7.53, + "learning_rate": 0.0007533546325878594, + "loss": 0.2173, + "step": 11790 + }, + { + "epoch": 7.54, + "learning_rate": 0.0007539936102236422, + "loss": 0.2221, + "step": 11800 + }, + { + "epoch": 7.55, + "learning_rate": 0.0007546325878594249, + "loss": 0.203, + "step": 11810 + }, + { + "epoch": 7.55, + "learning_rate": 0.0007552715654952077, + "loss": 0.2087, + "step": 11820 + }, + { + "epoch": 7.56, + "learning_rate": 0.0007559105431309904, + "loss": 0.2292, + "step": 11830 + }, + { + "epoch": 7.57, + "learning_rate": 0.0007565495207667732, + "loss": 0.2116, + "step": 11840 + }, + { + "epoch": 7.57, + "learning_rate": 0.0007571884984025559, + "loss": 0.2227, + "step": 11850 + }, + { + "epoch": 7.58, + "learning_rate": 0.0007578274760383387, + "loss": 0.207, + "step": 11860 + }, + { + "epoch": 7.58, + "learning_rate": 0.0007584664536741213, + "loss": 0.2051, + "step": 11870 + }, + { + "epoch": 7.59, + "learning_rate": 0.0007591054313099042, + "loss": 0.2409, + "step": 11880 + }, + { + "epoch": 7.6, + "learning_rate": 0.000759744408945687, + "loss": 0.1849, + "step": 11890 + }, + { + "epoch": 7.6, + "learning_rate": 0.0007603833865814696, + "loss": 0.2098, + "step": 11900 + }, + { + "epoch": 7.61, + "learning_rate": 0.0007610223642172525, + "loss": 0.2225, + "step": 11910 + }, + { + "epoch": 7.62, + "learning_rate": 0.0007616613418530352, + "loss": 0.1973, + "step": 11920 + }, + { + "epoch": 7.62, + "learning_rate": 0.0007623003194888179, + "loss": 0.239, + "step": 11930 + }, + { + "epoch": 7.63, + "learning_rate": 0.0007629392971246007, + "loss": 0.2186, + "step": 11940 + }, + { + "epoch": 7.64, + "learning_rate": 0.0007635782747603834, + "loss": 0.1849, + "step": 11950 + }, + { + "epoch": 7.64, + "learning_rate": 0.0007642172523961661, + "loss": 0.2316, + "step": 11960 + }, + { + "epoch": 7.65, + "learning_rate": 0.000764856230031949, + "loss": 0.2223, + "step": 11970 + }, + { + "epoch": 7.65, + "learning_rate": 0.0007654952076677316, + "loss": 0.2457, + "step": 11980 + }, + { + "epoch": 7.66, + "learning_rate": 0.0007661341853035144, + "loss": 0.2006, + "step": 11990 + }, + { + "epoch": 7.67, + "learning_rate": 0.0007667731629392971, + "loss": 0.2142, + "step": 12000 + }, + { + "epoch": 7.67, + "learning_rate": 0.0007674121405750799, + "loss": 0.1964, + "step": 12010 + }, + { + "epoch": 7.68, + "learning_rate": 0.0007680511182108626, + "loss": 0.2215, + "step": 12020 + }, + { + "epoch": 7.69, + "learning_rate": 0.0007686900958466454, + "loss": 0.2158, + "step": 12030 + }, + { + "epoch": 7.69, + "learning_rate": 0.0007693290734824281, + "loss": 0.2071, + "step": 12040 + }, + { + "epoch": 7.7, + "learning_rate": 0.0007699680511182109, + "loss": 0.221, + "step": 12050 + }, + { + "epoch": 7.71, + "learning_rate": 0.0007706070287539936, + "loss": 0.2322, + "step": 12060 + }, + { + "epoch": 7.71, + "learning_rate": 0.0007712460063897764, + "loss": 0.2176, + "step": 12070 + }, + { + "epoch": 7.72, + "learning_rate": 0.0007718849840255591, + "loss": 0.1703, + "step": 12080 + }, + { + "epoch": 7.73, + "learning_rate": 0.0007725239616613418, + "loss": 0.2264, + "step": 12090 + }, + { + "epoch": 7.73, + "learning_rate": 0.0007731629392971247, + "loss": 0.2123, + "step": 12100 + }, + { + "epoch": 7.74, + "learning_rate": 0.0007738019169329074, + "loss": 0.2096, + "step": 12110 + }, + { + "epoch": 7.74, + "learning_rate": 0.0007744408945686901, + "loss": 0.228, + "step": 12120 + }, + { + "epoch": 7.75, + "learning_rate": 0.0007750798722044728, + "loss": 0.2193, + "step": 12130 + }, + { + "epoch": 7.76, + "learning_rate": 0.0007757188498402557, + "loss": 0.2062, + "step": 12140 + }, + { + "epoch": 7.76, + "learning_rate": 0.0007763578274760383, + "loss": 0.1992, + "step": 12150 + }, + { + "epoch": 7.77, + "learning_rate": 0.0007769968051118211, + "loss": 0.1922, + "step": 12160 + }, + { + "epoch": 7.78, + "learning_rate": 0.0007776357827476039, + "loss": 0.1991, + "step": 12170 + }, + { + "epoch": 7.78, + "learning_rate": 0.0007782747603833866, + "loss": 0.2427, + "step": 12180 + }, + { + "epoch": 7.79, + "learning_rate": 0.0007789137380191693, + "loss": 0.2242, + "step": 12190 + }, + { + "epoch": 7.8, + "learning_rate": 0.0007795527156549522, + "loss": 0.2559, + "step": 12200 + }, + { + "epoch": 7.8, + "learning_rate": 0.0007801916932907348, + "loss": 0.2238, + "step": 12210 + }, + { + "epoch": 7.81, + "learning_rate": 0.0007808306709265176, + "loss": 0.2055, + "step": 12220 + }, + { + "epoch": 7.81, + "learning_rate": 0.0007814696485623002, + "loss": 0.2139, + "step": 12230 + }, + { + "epoch": 7.82, + "learning_rate": 0.0007821086261980831, + "loss": 0.2198, + "step": 12240 + }, + { + "epoch": 7.83, + "learning_rate": 0.0007827476038338658, + "loss": 0.2401, + "step": 12250 + }, + { + "epoch": 7.83, + "learning_rate": 0.0007833865814696485, + "loss": 0.2253, + "step": 12260 + }, + { + "epoch": 7.84, + "learning_rate": 0.0007840255591054313, + "loss": 0.2157, + "step": 12270 + }, + { + "epoch": 7.85, + "learning_rate": 0.0007846645367412141, + "loss": 0.2274, + "step": 12280 + }, + { + "epoch": 7.85, + "learning_rate": 0.0007853035143769968, + "loss": 0.2281, + "step": 12290 + }, + { + "epoch": 7.86, + "learning_rate": 0.0007859424920127796, + "loss": 0.1965, + "step": 12300 + }, + { + "epoch": 7.87, + "learning_rate": 0.0007865814696485624, + "loss": 0.2179, + "step": 12310 + }, + { + "epoch": 7.87, + "learning_rate": 0.000787220447284345, + "loss": 0.1826, + "step": 12320 + }, + { + "epoch": 7.88, + "learning_rate": 0.0007878594249201279, + "loss": 0.172, + "step": 12330 + }, + { + "epoch": 7.88, + "learning_rate": 0.0007884984025559106, + "loss": 0.2562, + "step": 12340 + }, + { + "epoch": 7.89, + "learning_rate": 0.0007891373801916933, + "loss": 0.2094, + "step": 12350 + }, + { + "epoch": 7.9, + "learning_rate": 0.000789776357827476, + "loss": 0.1897, + "step": 12360 + }, + { + "epoch": 7.9, + "learning_rate": 0.0007904153354632589, + "loss": 0.2108, + "step": 12370 + }, + { + "epoch": 7.91, + "learning_rate": 0.0007910543130990415, + "loss": 0.21, + "step": 12380 + }, + { + "epoch": 7.92, + "learning_rate": 0.0007916932907348243, + "loss": 0.2316, + "step": 12390 + }, + { + "epoch": 7.92, + "learning_rate": 0.000792332268370607, + "loss": 0.1809, + "step": 12400 + }, + { + "epoch": 7.93, + "learning_rate": 0.0007929712460063898, + "loss": 0.2491, + "step": 12410 + }, + { + "epoch": 7.94, + "learning_rate": 0.0007936102236421725, + "loss": 0.1887, + "step": 12420 + }, + { + "epoch": 7.94, + "learning_rate": 0.0007942492012779553, + "loss": 0.2278, + "step": 12430 + }, + { + "epoch": 7.95, + "learning_rate": 0.000794888178913738, + "loss": 0.2504, + "step": 12440 + }, + { + "epoch": 7.95, + "learning_rate": 0.0007955271565495208, + "loss": 0.2111, + "step": 12450 + }, + { + "epoch": 7.96, + "learning_rate": 0.0007961661341853034, + "loss": 0.2649, + "step": 12460 + }, + { + "epoch": 7.97, + "learning_rate": 0.0007968051118210863, + "loss": 0.2035, + "step": 12470 + }, + { + "epoch": 7.97, + "learning_rate": 0.000797444089456869, + "loss": 0.2132, + "step": 12480 + }, + { + "epoch": 7.98, + "learning_rate": 0.0007980830670926517, + "loss": 0.2869, + "step": 12490 + }, + { + "epoch": 7.99, + "learning_rate": 0.0007987220447284346, + "loss": 0.261, + "step": 12500 + }, + { + "epoch": 7.99, + "learning_rate": 0.0007993610223642173, + "loss": 0.1863, + "step": 12510 + }, + { + "epoch": 8.0, + "learning_rate": 0.0008, + "loss": 0.2178, + "step": 12520 + }, + { + "epoch": 8.01, + "learning_rate": 0.0008006389776357828, + "loss": 0.1807, + "step": 12530 + }, + { + "epoch": 8.01, + "learning_rate": 0.0008012779552715655, + "loss": 0.1813, + "step": 12540 + }, + { + "epoch": 8.02, + "learning_rate": 0.0008019169329073482, + "loss": 0.1791, + "step": 12550 + }, + { + "epoch": 8.03, + "learning_rate": 0.0008025559105431311, + "loss": 0.1472, + "step": 12560 + }, + { + "epoch": 8.03, + "learning_rate": 0.0008031948881789137, + "loss": 0.18, + "step": 12570 + }, + { + "epoch": 8.04, + "learning_rate": 0.0008038338658146965, + "loss": 0.1829, + "step": 12580 + }, + { + "epoch": 8.04, + "learning_rate": 0.0008044728434504793, + "loss": 0.1877, + "step": 12590 + }, + { + "epoch": 8.05, + "learning_rate": 0.000805111821086262, + "loss": 0.2126, + "step": 12600 + }, + { + "epoch": 8.06, + "learning_rate": 0.0008057507987220447, + "loss": 0.1921, + "step": 12610 + }, + { + "epoch": 8.06, + "learning_rate": 0.0008063897763578275, + "loss": 0.1952, + "step": 12620 + }, + { + "epoch": 8.07, + "learning_rate": 0.0008070287539936102, + "loss": 0.1775, + "step": 12630 + }, + { + "epoch": 8.08, + "learning_rate": 0.000807667731629393, + "loss": 0.2038, + "step": 12640 + }, + { + "epoch": 8.08, + "learning_rate": 0.0008083067092651757, + "loss": 0.2054, + "step": 12650 + }, + { + "epoch": 8.09, + "learning_rate": 0.0008089456869009585, + "loss": 0.2258, + "step": 12660 + }, + { + "epoch": 8.1, + "learning_rate": 0.0008095846645367412, + "loss": 0.2226, + "step": 12670 + }, + { + "epoch": 8.1, + "learning_rate": 0.000810223642172524, + "loss": 0.1902, + "step": 12680 + }, + { + "epoch": 8.11, + "learning_rate": 0.0008108626198083068, + "loss": 0.1996, + "step": 12690 + }, + { + "epoch": 8.11, + "learning_rate": 0.0008115015974440895, + "loss": 0.1904, + "step": 12700 + }, + { + "epoch": 8.12, + "learning_rate": 0.0008121405750798722, + "loss": 0.1486, + "step": 12710 + }, + { + "epoch": 8.13, + "learning_rate": 0.000812779552715655, + "loss": 0.1905, + "step": 12720 + }, + { + "epoch": 8.13, + "learning_rate": 0.0008134185303514378, + "loss": 0.1883, + "step": 12730 + }, + { + "epoch": 8.14, + "learning_rate": 0.0008140575079872204, + "loss": 0.1863, + "step": 12740 + }, + { + "epoch": 8.15, + "learning_rate": 0.0008146964856230032, + "loss": 0.1934, + "step": 12750 + }, + { + "epoch": 8.15, + "learning_rate": 0.000815335463258786, + "loss": 0.1778, + "step": 12760 + }, + { + "epoch": 8.16, + "learning_rate": 0.0008159744408945687, + "loss": 0.1796, + "step": 12770 + }, + { + "epoch": 8.17, + "learning_rate": 0.0008166134185303514, + "loss": 0.1902, + "step": 12780 + }, + { + "epoch": 8.17, + "learning_rate": 0.0008172523961661343, + "loss": 0.1913, + "step": 12790 + }, + { + "epoch": 8.18, + "learning_rate": 0.0008178913738019169, + "loss": 0.1887, + "step": 12800 + }, + { + "epoch": 8.19, + "learning_rate": 0.0008185303514376997, + "loss": 0.1658, + "step": 12810 + }, + { + "epoch": 8.19, + "learning_rate": 0.0008191693290734825, + "loss": 0.169, + "step": 12820 + }, + { + "epoch": 8.2, + "learning_rate": 0.0008198083067092652, + "loss": 0.1946, + "step": 12830 + }, + { + "epoch": 8.2, + "learning_rate": 0.0008204472843450479, + "loss": 0.1913, + "step": 12840 + }, + { + "epoch": 8.21, + "learning_rate": 0.0008210862619808306, + "loss": 0.2135, + "step": 12850 + }, + { + "epoch": 8.22, + "learning_rate": 0.0008217252396166134, + "loss": 0.1913, + "step": 12860 + }, + { + "epoch": 8.22, + "learning_rate": 0.0008223642172523962, + "loss": 0.1958, + "step": 12870 + }, + { + "epoch": 8.23, + "learning_rate": 0.0008230031948881789, + "loss": 0.2013, + "step": 12880 + }, + { + "epoch": 8.24, + "learning_rate": 0.0008236421725239617, + "loss": 0.2246, + "step": 12890 + }, + { + "epoch": 8.24, + "learning_rate": 0.0008242811501597445, + "loss": 0.2016, + "step": 12900 + }, + { + "epoch": 8.25, + "learning_rate": 0.0008249201277955271, + "loss": 0.1932, + "step": 12910 + }, + { + "epoch": 8.26, + "learning_rate": 0.00082555910543131, + "loss": 0.1766, + "step": 12920 + }, + { + "epoch": 8.26, + "learning_rate": 0.0008261980830670927, + "loss": 0.1959, + "step": 12930 + }, + { + "epoch": 8.27, + "learning_rate": 0.0008268370607028754, + "loss": 0.1978, + "step": 12940 + }, + { + "epoch": 8.27, + "learning_rate": 0.0008274760383386582, + "loss": 0.2187, + "step": 12950 + }, + { + "epoch": 8.28, + "learning_rate": 0.000828115015974441, + "loss": 0.1971, + "step": 12960 + }, + { + "epoch": 8.29, + "learning_rate": 0.0008287539936102236, + "loss": 0.1839, + "step": 12970 + }, + { + "epoch": 8.29, + "learning_rate": 0.0008293929712460064, + "loss": 0.2237, + "step": 12980 + }, + { + "epoch": 8.3, + "learning_rate": 0.0008300319488817891, + "loss": 0.1553, + "step": 12990 + }, + { + "epoch": 8.31, + "learning_rate": 0.0008306709265175719, + "loss": 0.193, + "step": 13000 + }, + { + "epoch": 8.31, + "learning_rate": 0.0008313099041533546, + "loss": 0.2027, + "step": 13010 + }, + { + "epoch": 8.32, + "learning_rate": 0.0008319488817891374, + "loss": 0.1927, + "step": 13020 + }, + { + "epoch": 8.33, + "learning_rate": 0.0008325878594249201, + "loss": 0.2067, + "step": 13030 + }, + { + "epoch": 8.33, + "learning_rate": 0.0008332268370607029, + "loss": 0.2119, + "step": 13040 + }, + { + "epoch": 8.34, + "learning_rate": 0.0008338658146964856, + "loss": 0.2125, + "step": 13050 + }, + { + "epoch": 8.34, + "learning_rate": 0.0008345047923322684, + "loss": 0.2148, + "step": 13060 + }, + { + "epoch": 8.35, + "learning_rate": 0.0008351437699680511, + "loss": 0.1722, + "step": 13070 + }, + { + "epoch": 8.36, + "learning_rate": 0.0008357827476038338, + "loss": 0.212, + "step": 13080 + }, + { + "epoch": 8.36, + "learning_rate": 0.0008364217252396167, + "loss": 0.2077, + "step": 13090 + }, + { + "epoch": 8.37, + "learning_rate": 0.0008370607028753994, + "loss": 0.2223, + "step": 13100 + }, + { + "epoch": 8.38, + "learning_rate": 0.0008376996805111821, + "loss": 0.2055, + "step": 13110 + }, + { + "epoch": 8.38, + "learning_rate": 0.0008383386581469649, + "loss": 0.1995, + "step": 13120 + }, + { + "epoch": 8.39, + "learning_rate": 0.0008389776357827477, + "loss": 0.176, + "step": 13130 + }, + { + "epoch": 8.4, + "learning_rate": 0.0008396166134185303, + "loss": 0.2049, + "step": 13140 + }, + { + "epoch": 8.4, + "learning_rate": 0.0008402555910543132, + "loss": 0.2177, + "step": 13150 + }, + { + "epoch": 8.41, + "learning_rate": 0.0008408945686900958, + "loss": 0.1961, + "step": 13160 + }, + { + "epoch": 8.42, + "learning_rate": 0.0008415335463258786, + "loss": 0.2094, + "step": 13170 + }, + { + "epoch": 8.42, + "learning_rate": 0.0008421725239616614, + "loss": 0.2121, + "step": 13180 + }, + { + "epoch": 8.43, + "learning_rate": 0.0008428115015974441, + "loss": 0.1996, + "step": 13190 + }, + { + "epoch": 8.43, + "learning_rate": 0.0008434504792332268, + "loss": 0.1836, + "step": 13200 + }, + { + "epoch": 8.44, + "learning_rate": 0.0008440894568690097, + "loss": 0.1804, + "step": 13210 + }, + { + "epoch": 8.45, + "learning_rate": 0.0008447284345047923, + "loss": 0.2604, + "step": 13220 + }, + { + "epoch": 8.45, + "learning_rate": 0.0008453674121405751, + "loss": 0.2071, + "step": 13230 + }, + { + "epoch": 8.46, + "learning_rate": 0.0008460063897763578, + "loss": 0.2113, + "step": 13240 + }, + { + "epoch": 8.47, + "learning_rate": 0.0008466453674121406, + "loss": 0.1961, + "step": 13250 + }, + { + "epoch": 8.47, + "learning_rate": 0.0008472843450479233, + "loss": 0.2149, + "step": 13260 + }, + { + "epoch": 8.48, + "learning_rate": 0.0008479233226837061, + "loss": 0.2112, + "step": 13270 + }, + { + "epoch": 8.49, + "learning_rate": 0.0008485623003194889, + "loss": 0.2003, + "step": 13280 + }, + { + "epoch": 8.49, + "learning_rate": 0.0008492012779552716, + "loss": 0.2098, + "step": 13290 + }, + { + "epoch": 8.5, + "learning_rate": 0.0008498402555910543, + "loss": 0.1986, + "step": 13300 + }, + { + "epoch": 8.5, + "learning_rate": 0.000850479233226837, + "loss": 0.2055, + "step": 13310 + }, + { + "epoch": 8.51, + "learning_rate": 0.0008511182108626199, + "loss": 0.169, + "step": 13320 + }, + { + "epoch": 8.52, + "learning_rate": 0.0008517571884984025, + "loss": 0.2417, + "step": 13330 + }, + { + "epoch": 8.52, + "learning_rate": 0.0008523961661341853, + "loss": 0.2163, + "step": 13340 + }, + { + "epoch": 8.53, + "learning_rate": 0.0008530351437699681, + "loss": 0.1996, + "step": 13350 + }, + { + "epoch": 8.54, + "learning_rate": 0.0008536741214057508, + "loss": 0.1736, + "step": 13360 + }, + { + "epoch": 8.54, + "learning_rate": 0.0008543130990415335, + "loss": 0.204, + "step": 13370 + }, + { + "epoch": 8.55, + "learning_rate": 0.0008549520766773164, + "loss": 0.2189, + "step": 13380 + }, + { + "epoch": 8.56, + "learning_rate": 0.000855591054313099, + "loss": 0.1797, + "step": 13390 + }, + { + "epoch": 8.56, + "learning_rate": 0.0008562300319488818, + "loss": 0.1723, + "step": 13400 + }, + { + "epoch": 8.57, + "learning_rate": 0.0008568690095846646, + "loss": 0.2189, + "step": 13410 + }, + { + "epoch": 8.57, + "learning_rate": 0.0008575079872204473, + "loss": 0.2004, + "step": 13420 + }, + { + "epoch": 8.58, + "learning_rate": 0.00085814696485623, + "loss": 0.2102, + "step": 13430 + }, + { + "epoch": 8.59, + "learning_rate": 0.0008587859424920129, + "loss": 0.173, + "step": 13440 + }, + { + "epoch": 8.59, + "learning_rate": 0.0008594249201277955, + "loss": 0.2067, + "step": 13450 + }, + { + "epoch": 8.6, + "learning_rate": 0.0008600638977635783, + "loss": 0.2229, + "step": 13460 + }, + { + "epoch": 8.61, + "learning_rate": 0.0008607028753993609, + "loss": 0.2101, + "step": 13470 + }, + { + "epoch": 8.61, + "learning_rate": 0.0008613418530351438, + "loss": 0.1975, + "step": 13480 + }, + { + "epoch": 8.62, + "learning_rate": 0.0008619808306709266, + "loss": 0.1821, + "step": 13490 + }, + { + "epoch": 8.63, + "learning_rate": 0.0008626198083067092, + "loss": 0.244, + "step": 13500 + }, + { + "epoch": 8.63, + "learning_rate": 0.0008632587859424921, + "loss": 0.1859, + "step": 13510 + }, + { + "epoch": 8.64, + "learning_rate": 0.0008638977635782748, + "loss": 0.178, + "step": 13520 + }, + { + "epoch": 8.65, + "learning_rate": 0.0008645367412140575, + "loss": 0.2166, + "step": 13530 + }, + { + "epoch": 8.65, + "learning_rate": 0.0008651757188498403, + "loss": 0.166, + "step": 13540 + }, + { + "epoch": 8.66, + "learning_rate": 0.0008658146964856231, + "loss": 0.1821, + "step": 13550 + }, + { + "epoch": 8.66, + "learning_rate": 0.0008664536741214057, + "loss": 0.1787, + "step": 13560 + }, + { + "epoch": 8.67, + "learning_rate": 0.0008670926517571886, + "loss": 0.1851, + "step": 13570 + }, + { + "epoch": 8.68, + "learning_rate": 0.0008677316293929713, + "loss": 0.181, + "step": 13580 + }, + { + "epoch": 8.68, + "learning_rate": 0.000868370607028754, + "loss": 0.1952, + "step": 13590 + }, + { + "epoch": 8.69, + "learning_rate": 0.0008690095846645367, + "loss": 0.1873, + "step": 13600 + }, + { + "epoch": 8.7, + "learning_rate": 0.0008696485623003195, + "loss": 0.212, + "step": 13610 + }, + { + "epoch": 8.7, + "learning_rate": 0.0008702875399361022, + "loss": 0.2072, + "step": 13620 + }, + { + "epoch": 8.71, + "learning_rate": 0.000870926517571885, + "loss": 0.2092, + "step": 13630 + }, + { + "epoch": 8.72, + "learning_rate": 0.0008715654952076677, + "loss": 0.2119, + "step": 13640 + }, + { + "epoch": 8.72, + "learning_rate": 0.0008722044728434505, + "loss": 0.2139, + "step": 13650 + }, + { + "epoch": 8.73, + "learning_rate": 0.0008728434504792332, + "loss": 0.1895, + "step": 13660 + }, + { + "epoch": 8.73, + "learning_rate": 0.000873482428115016, + "loss": 0.2178, + "step": 13670 + }, + { + "epoch": 8.74, + "learning_rate": 0.0008741214057507988, + "loss": 0.229, + "step": 13680 + }, + { + "epoch": 8.75, + "learning_rate": 0.0008747603833865815, + "loss": 0.2104, + "step": 13690 + }, + { + "epoch": 8.75, + "learning_rate": 0.0008753993610223643, + "loss": 0.2215, + "step": 13700 + }, + { + "epoch": 8.76, + "learning_rate": 0.000876038338658147, + "loss": 0.1943, + "step": 13710 + }, + { + "epoch": 8.77, + "learning_rate": 0.0008766773162939298, + "loss": 0.2193, + "step": 13720 + }, + { + "epoch": 8.77, + "learning_rate": 0.0008773162939297124, + "loss": 0.2294, + "step": 13730 + }, + { + "epoch": 8.78, + "learning_rate": 0.0008779552715654953, + "loss": 0.1998, + "step": 13740 + }, + { + "epoch": 8.79, + "learning_rate": 0.0008785942492012779, + "loss": 0.2152, + "step": 13750 + }, + { + "epoch": 8.79, + "learning_rate": 0.0008792332268370607, + "loss": 0.1832, + "step": 13760 + }, + { + "epoch": 8.8, + "learning_rate": 0.0008798722044728435, + "loss": 0.2159, + "step": 13770 + }, + { + "epoch": 8.8, + "learning_rate": 0.0008805111821086262, + "loss": 0.2204, + "step": 13780 + }, + { + "epoch": 8.81, + "learning_rate": 0.0008811501597444089, + "loss": 0.224, + "step": 13790 + }, + { + "epoch": 8.82, + "learning_rate": 0.0008817891373801918, + "loss": 0.2238, + "step": 13800 + }, + { + "epoch": 8.82, + "learning_rate": 0.0008824281150159744, + "loss": 0.2188, + "step": 13810 + }, + { + "epoch": 8.83, + "learning_rate": 0.0008830670926517572, + "loss": 0.2244, + "step": 13820 + }, + { + "epoch": 8.84, + "learning_rate": 0.00088370607028754, + "loss": 0.193, + "step": 13830 + }, + { + "epoch": 8.84, + "learning_rate": 0.0008843450479233227, + "loss": 0.2003, + "step": 13840 + }, + { + "epoch": 8.85, + "learning_rate": 0.0008849840255591054, + "loss": 0.2043, + "step": 13850 + }, + { + "epoch": 8.86, + "learning_rate": 0.0008856230031948882, + "loss": 0.2051, + "step": 13860 + }, + { + "epoch": 8.86, + "learning_rate": 0.000886261980830671, + "loss": 0.183, + "step": 13870 + }, + { + "epoch": 8.87, + "learning_rate": 0.0008869009584664537, + "loss": 0.2322, + "step": 13880 + }, + { + "epoch": 8.88, + "learning_rate": 0.0008875399361022365, + "loss": 0.2282, + "step": 13890 + }, + { + "epoch": 8.88, + "learning_rate": 0.0008881789137380192, + "loss": 0.196, + "step": 13900 + }, + { + "epoch": 8.89, + "learning_rate": 0.000888817891373802, + "loss": 0.1948, + "step": 13910 + }, + { + "epoch": 8.89, + "learning_rate": 0.0008894568690095846, + "loss": 0.2252, + "step": 13920 + }, + { + "epoch": 8.9, + "learning_rate": 0.0008900958466453675, + "loss": 0.2003, + "step": 13930 + }, + { + "epoch": 8.91, + "learning_rate": 0.0008907348242811502, + "loss": 0.2181, + "step": 13940 + }, + { + "epoch": 8.91, + "learning_rate": 0.0008913738019169329, + "loss": 0.2073, + "step": 13950 + }, + { + "epoch": 8.92, + "learning_rate": 0.0008920127795527156, + "loss": 0.2436, + "step": 13960 + }, + { + "epoch": 8.93, + "learning_rate": 0.0008926517571884985, + "loss": 0.1896, + "step": 13970 + }, + { + "epoch": 8.93, + "learning_rate": 0.0008932907348242811, + "loss": 0.201, + "step": 13980 + }, + { + "epoch": 8.94, + "learning_rate": 0.0008939297124600639, + "loss": 0.2023, + "step": 13990 + }, + { + "epoch": 8.95, + "learning_rate": 0.0008945686900958467, + "loss": 0.2138, + "step": 14000 + }, + { + "epoch": 8.95, + "learning_rate": 0.0008952076677316294, + "loss": 0.2247, + "step": 14010 + }, + { + "epoch": 8.96, + "learning_rate": 0.0008958466453674121, + "loss": 0.2329, + "step": 14020 + }, + { + "epoch": 8.96, + "learning_rate": 0.000896485623003195, + "loss": 0.1843, + "step": 14030 + }, + { + "epoch": 8.97, + "learning_rate": 0.0008971246006389776, + "loss": 0.1895, + "step": 14040 + }, + { + "epoch": 8.98, + "learning_rate": 0.0008977635782747604, + "loss": 0.1814, + "step": 14050 + }, + { + "epoch": 8.98, + "learning_rate": 0.000898402555910543, + "loss": 0.2078, + "step": 14060 + }, + { + "epoch": 8.99, + "learning_rate": 0.0008990415335463259, + "loss": 0.2324, + "step": 14070 + }, + { + "epoch": 9.0, + "learning_rate": 0.0008996805111821087, + "loss": 0.2346, + "step": 14080 + }, + { + "epoch": 9.0, + "learning_rate": 0.0009003194888178913, + "loss": 0.2024, + "step": 14090 + }, + { + "epoch": 9.01, + "learning_rate": 0.0009009584664536742, + "loss": 0.174, + "step": 14100 + }, + { + "epoch": 9.02, + "learning_rate": 0.0009015974440894569, + "loss": 0.1736, + "step": 14110 + }, + { + "epoch": 9.02, + "learning_rate": 0.0009022364217252396, + "loss": 0.2153, + "step": 14120 + }, + { + "epoch": 9.03, + "learning_rate": 0.0009028753993610224, + "loss": 0.1848, + "step": 14130 + }, + { + "epoch": 9.04, + "learning_rate": 0.0009035143769968052, + "loss": 0.1719, + "step": 14140 + }, + { + "epoch": 9.04, + "learning_rate": 0.0009041533546325878, + "loss": 0.1904, + "step": 14150 + }, + { + "epoch": 9.05, + "learning_rate": 0.0009047923322683707, + "loss": 0.1692, + "step": 14160 + }, + { + "epoch": 9.05, + "learning_rate": 0.0009054313099041534, + "loss": 0.1367, + "step": 14170 + }, + { + "epoch": 9.06, + "learning_rate": 0.0009060702875399361, + "loss": 0.1853, + "step": 14180 + }, + { + "epoch": 9.07, + "learning_rate": 0.0009067092651757188, + "loss": 0.1945, + "step": 14190 + }, + { + "epoch": 9.07, + "learning_rate": 0.0009073482428115017, + "loss": 0.1827, + "step": 14200 + }, + { + "epoch": 9.08, + "learning_rate": 0.0009079872204472843, + "loss": 0.1927, + "step": 14210 + }, + { + "epoch": 9.09, + "learning_rate": 0.0009086261980830671, + "loss": 0.1513, + "step": 14220 + }, + { + "epoch": 9.09, + "learning_rate": 0.0009092651757188498, + "loss": 0.2149, + "step": 14230 + }, + { + "epoch": 9.1, + "learning_rate": 0.0009099041533546326, + "loss": 0.1609, + "step": 14240 + }, + { + "epoch": 9.11, + "learning_rate": 0.0009105431309904153, + "loss": 0.1814, + "step": 14250 + }, + { + "epoch": 9.11, + "learning_rate": 0.0009111821086261981, + "loss": 0.192, + "step": 14260 + }, + { + "epoch": 9.12, + "learning_rate": 0.0009118210862619809, + "loss": 0.2028, + "step": 14270 + }, + { + "epoch": 9.12, + "learning_rate": 0.0009124600638977636, + "loss": 0.1814, + "step": 14280 + }, + { + "epoch": 9.13, + "learning_rate": 0.0009130990415335464, + "loss": 0.1721, + "step": 14290 + }, + { + "epoch": 9.14, + "learning_rate": 0.0009137380191693291, + "loss": 0.1894, + "step": 14300 + }, + { + "epoch": 9.14, + "learning_rate": 0.0009143769968051119, + "loss": 0.1716, + "step": 14310 + }, + { + "epoch": 9.15, + "learning_rate": 0.0009150159744408945, + "loss": 0.1957, + "step": 14320 + }, + { + "epoch": 9.16, + "learning_rate": 0.0009156549520766774, + "loss": 0.1671, + "step": 14330 + }, + { + "epoch": 9.16, + "learning_rate": 0.0009162939297124601, + "loss": 0.1852, + "step": 14340 + }, + { + "epoch": 9.17, + "learning_rate": 0.0009169329073482428, + "loss": 0.2002, + "step": 14350 + }, + { + "epoch": 9.18, + "learning_rate": 0.0009175718849840256, + "loss": 0.1912, + "step": 14360 + }, + { + "epoch": 9.18, + "learning_rate": 0.0009182108626198083, + "loss": 0.1929, + "step": 14370 + }, + { + "epoch": 9.19, + "learning_rate": 0.000918849840255591, + "loss": 0.1772, + "step": 14380 + }, + { + "epoch": 9.19, + "learning_rate": 0.0009194888178913739, + "loss": 0.1767, + "step": 14390 + }, + { + "epoch": 9.2, + "learning_rate": 0.0009201277955271565, + "loss": 0.1999, + "step": 14400 + }, + { + "epoch": 9.21, + "learning_rate": 0.0009207667731629393, + "loss": 0.2022, + "step": 14410 + }, + { + "epoch": 9.21, + "learning_rate": 0.000921405750798722, + "loss": 0.2222, + "step": 14420 + }, + { + "epoch": 9.22, + "learning_rate": 0.0009220447284345048, + "loss": 0.1796, + "step": 14430 + }, + { + "epoch": 9.23, + "learning_rate": 0.0009226837060702875, + "loss": 0.197, + "step": 14440 + }, + { + "epoch": 9.23, + "learning_rate": 0.0009233226837060703, + "loss": 0.1699, + "step": 14450 + }, + { + "epoch": 9.24, + "learning_rate": 0.000923961661341853, + "loss": 0.1662, + "step": 14460 + }, + { + "epoch": 9.25, + "learning_rate": 0.0009246006389776358, + "loss": 0.1596, + "step": 14470 + }, + { + "epoch": 9.25, + "learning_rate": 0.0009252396166134186, + "loss": 0.1826, + "step": 14480 + }, + { + "epoch": 9.26, + "learning_rate": 0.0009258785942492013, + "loss": 0.1801, + "step": 14490 + }, + { + "epoch": 9.27, + "learning_rate": 0.0009265175718849841, + "loss": 0.217, + "step": 14500 + }, + { + "epoch": 9.27, + "learning_rate": 0.0009271565495207667, + "loss": 0.1575, + "step": 14510 + }, + { + "epoch": 9.28, + "learning_rate": 0.0009277955271565496, + "loss": 0.2204, + "step": 14520 + }, + { + "epoch": 9.28, + "learning_rate": 0.0009284345047923323, + "loss": 0.1904, + "step": 14530 + }, + { + "epoch": 9.29, + "learning_rate": 0.000929073482428115, + "loss": 0.1612, + "step": 14540 + }, + { + "epoch": 9.3, + "learning_rate": 0.0009297124600638978, + "loss": 0.2108, + "step": 14550 + }, + { + "epoch": 9.3, + "learning_rate": 0.0009303514376996806, + "loss": 0.2171, + "step": 14560 + }, + { + "epoch": 9.31, + "learning_rate": 0.0009309904153354632, + "loss": 0.1911, + "step": 14570 + }, + { + "epoch": 9.32, + "learning_rate": 0.000931629392971246, + "loss": 0.2062, + "step": 14580 + }, + { + "epoch": 9.32, + "learning_rate": 0.0009322683706070288, + "loss": 0.2073, + "step": 14590 + }, + { + "epoch": 9.33, + "learning_rate": 0.0009329073482428115, + "loss": 0.2118, + "step": 14600 + }, + { + "epoch": 9.34, + "learning_rate": 0.0009335463258785942, + "loss": 0.1814, + "step": 14610 + }, + { + "epoch": 9.34, + "learning_rate": 0.0009341853035143771, + "loss": 0.1654, + "step": 14620 + }, + { + "epoch": 9.35, + "learning_rate": 0.0009348242811501597, + "loss": 0.1794, + "step": 14630 + }, + { + "epoch": 9.35, + "learning_rate": 0.0009354632587859425, + "loss": 0.199, + "step": 14640 + }, + { + "epoch": 9.36, + "learning_rate": 0.0009361022364217253, + "loss": 0.1785, + "step": 14650 + }, + { + "epoch": 9.37, + "learning_rate": 0.000936741214057508, + "loss": 0.222, + "step": 14660 + }, + { + "epoch": 9.37, + "learning_rate": 0.0009373801916932908, + "loss": 0.2213, + "step": 14670 + }, + { + "epoch": 9.38, + "learning_rate": 0.0009380191693290734, + "loss": 0.1864, + "step": 14680 + }, + { + "epoch": 9.39, + "learning_rate": 0.0009386581469648563, + "loss": 0.1973, + "step": 14690 + }, + { + "epoch": 9.39, + "learning_rate": 0.000939297124600639, + "loss": 0.2027, + "step": 14700 + }, + { + "epoch": 9.4, + "learning_rate": 0.0009399361022364217, + "loss": 0.2115, + "step": 14710 + }, + { + "epoch": 9.41, + "learning_rate": 0.0009405750798722045, + "loss": 0.207, + "step": 14720 + }, + { + "epoch": 9.41, + "learning_rate": 0.0009412140575079873, + "loss": 0.1878, + "step": 14730 + }, + { + "epoch": 9.42, + "learning_rate": 0.0009418530351437699, + "loss": 0.1794, + "step": 14740 + }, + { + "epoch": 9.42, + "learning_rate": 0.0009424920127795528, + "loss": 0.2063, + "step": 14750 + }, + { + "epoch": 9.43, + "learning_rate": 0.0009431309904153355, + "loss": 0.2194, + "step": 14760 + }, + { + "epoch": 9.44, + "learning_rate": 0.0009437699680511182, + "loss": 0.1841, + "step": 14770 + }, + { + "epoch": 9.44, + "learning_rate": 0.000944408945686901, + "loss": 0.2375, + "step": 14780 + }, + { + "epoch": 9.45, + "learning_rate": 0.0009450479233226838, + "loss": 0.1869, + "step": 14790 + }, + { + "epoch": 9.46, + "learning_rate": 0.0009456869009584664, + "loss": 0.1953, + "step": 14800 + }, + { + "epoch": 9.46, + "learning_rate": 0.0009463258785942493, + "loss": 0.202, + "step": 14810 + }, + { + "epoch": 9.47, + "learning_rate": 0.0009469648562300319, + "loss": 0.2095, + "step": 14820 + }, + { + "epoch": 9.48, + "learning_rate": 0.0009476038338658147, + "loss": 0.1894, + "step": 14830 + }, + { + "epoch": 9.48, + "learning_rate": 0.0009482428115015974, + "loss": 0.2249, + "step": 14840 + }, + { + "epoch": 9.49, + "learning_rate": 0.0009488817891373802, + "loss": 0.1649, + "step": 14850 + }, + { + "epoch": 9.5, + "learning_rate": 0.000949520766773163, + "loss": 0.1917, + "step": 14860 + }, + { + "epoch": 9.5, + "learning_rate": 0.0009501597444089457, + "loss": 0.1856, + "step": 14870 + }, + { + "epoch": 9.51, + "learning_rate": 0.0009507987220447285, + "loss": 0.2134, + "step": 14880 + }, + { + "epoch": 9.51, + "learning_rate": 0.0009514376996805112, + "loss": 0.2183, + "step": 14890 + }, + { + "epoch": 9.52, + "learning_rate": 0.000952076677316294, + "loss": 0.2113, + "step": 14900 + }, + { + "epoch": 9.53, + "learning_rate": 0.0009527156549520767, + "loss": 0.1836, + "step": 14910 + }, + { + "epoch": 9.53, + "learning_rate": 0.0009533546325878595, + "loss": 0.1806, + "step": 14920 + }, + { + "epoch": 9.54, + "learning_rate": 0.0009539936102236422, + "loss": 0.2051, + "step": 14930 + }, + { + "epoch": 9.55, + "learning_rate": 0.000954632587859425, + "loss": 0.1987, + "step": 14940 + }, + { + "epoch": 9.55, + "learning_rate": 0.0009552715654952077, + "loss": 0.2183, + "step": 14950 + }, + { + "epoch": 9.56, + "learning_rate": 0.0009559105431309905, + "loss": 0.188, + "step": 14960 + }, + { + "epoch": 9.57, + "learning_rate": 0.0009565495207667731, + "loss": 0.2088, + "step": 14970 + }, + { + "epoch": 9.57, + "learning_rate": 0.000957188498402556, + "loss": 0.2153, + "step": 14980 + }, + { + "epoch": 9.58, + "learning_rate": 0.0009578274760383386, + "loss": 0.2069, + "step": 14990 + }, + { + "epoch": 9.58, + "learning_rate": 0.0009584664536741214, + "loss": 0.1698, + "step": 15000 + }, + { + "epoch": 9.59, + "learning_rate": 0.0009591054313099042, + "loss": 0.205, + "step": 15010 + }, + { + "epoch": 9.6, + "learning_rate": 0.0009597444089456869, + "loss": 0.1753, + "step": 15020 + }, + { + "epoch": 9.6, + "learning_rate": 0.0009603833865814696, + "loss": 0.1908, + "step": 15030 + }, + { + "epoch": 9.61, + "learning_rate": 0.0009610223642172525, + "loss": 0.1935, + "step": 15040 + }, + { + "epoch": 9.62, + "learning_rate": 0.0009616613418530351, + "loss": 0.2243, + "step": 15050 + }, + { + "epoch": 9.62, + "learning_rate": 0.0009623003194888179, + "loss": 0.2018, + "step": 15060 + }, + { + "epoch": 9.63, + "learning_rate": 0.0009629392971246008, + "loss": 0.2062, + "step": 15070 + }, + { + "epoch": 9.64, + "learning_rate": 0.0009635782747603834, + "loss": 0.2033, + "step": 15080 + }, + { + "epoch": 9.64, + "learning_rate": 0.0009642172523961662, + "loss": 0.1993, + "step": 15090 + }, + { + "epoch": 9.65, + "learning_rate": 0.0009648562300319489, + "loss": 0.1946, + "step": 15100 + }, + { + "epoch": 9.65, + "learning_rate": 0.0009654952076677317, + "loss": 0.2113, + "step": 15110 + }, + { + "epoch": 9.66, + "learning_rate": 0.0009661341853035144, + "loss": 0.2004, + "step": 15120 + }, + { + "epoch": 9.67, + "learning_rate": 0.0009667731629392971, + "loss": 0.21, + "step": 15130 + }, + { + "epoch": 9.67, + "learning_rate": 0.0009674121405750799, + "loss": 0.1794, + "step": 15140 + }, + { + "epoch": 9.68, + "learning_rate": 0.0009680511182108627, + "loss": 0.2395, + "step": 15150 + }, + { + "epoch": 9.69, + "learning_rate": 0.0009686900958466453, + "loss": 0.2506, + "step": 15160 + }, + { + "epoch": 9.69, + "learning_rate": 0.0009693290734824282, + "loss": 0.1916, + "step": 15170 + }, + { + "epoch": 9.7, + "learning_rate": 0.0009699680511182109, + "loss": 0.1933, + "step": 15180 + }, + { + "epoch": 9.71, + "learning_rate": 0.0009706070287539936, + "loss": 0.2162, + "step": 15190 + }, + { + "epoch": 9.71, + "learning_rate": 0.0009712460063897763, + "loss": 0.221, + "step": 15200 + }, + { + "epoch": 9.72, + "learning_rate": 0.0009718849840255592, + "loss": 0.1934, + "step": 15210 + }, + { + "epoch": 9.73, + "learning_rate": 0.0009725239616613418, + "loss": 0.1856, + "step": 15220 + }, + { + "epoch": 9.73, + "learning_rate": 0.0009731629392971246, + "loss": 0.1951, + "step": 15230 + }, + { + "epoch": 9.74, + "learning_rate": 0.0009738019169329074, + "loss": 0.1962, + "step": 15240 + }, + { + "epoch": 9.74, + "learning_rate": 0.0009744408945686901, + "loss": 0.1947, + "step": 15250 + }, + { + "epoch": 9.75, + "learning_rate": 0.0009750798722044729, + "loss": 0.217, + "step": 15260 + }, + { + "epoch": 9.76, + "learning_rate": 0.0009757188498402556, + "loss": 0.2092, + "step": 15270 + }, + { + "epoch": 9.76, + "learning_rate": 0.0009763578274760384, + "loss": 0.1695, + "step": 15280 + }, + { + "epoch": 9.77, + "learning_rate": 0.0009769968051118211, + "loss": 0.1863, + "step": 15290 + }, + { + "epoch": 9.78, + "learning_rate": 0.0009776357827476038, + "loss": 0.1947, + "step": 15300 + }, + { + "epoch": 9.78, + "learning_rate": 0.0009782747603833866, + "loss": 0.1878, + "step": 15310 + }, + { + "epoch": 9.79, + "learning_rate": 0.0009789137380191693, + "loss": 0.2087, + "step": 15320 + }, + { + "epoch": 9.8, + "learning_rate": 0.000979552715654952, + "loss": 0.183, + "step": 15330 + }, + { + "epoch": 9.8, + "learning_rate": 0.000980191693290735, + "loss": 0.1836, + "step": 15340 + }, + { + "epoch": 9.81, + "learning_rate": 0.0009808306709265175, + "loss": 0.2152, + "step": 15350 + }, + { + "epoch": 9.81, + "learning_rate": 0.0009814696485623004, + "loss": 0.2007, + "step": 15360 + }, + { + "epoch": 9.82, + "learning_rate": 0.000982108626198083, + "loss": 0.2202, + "step": 15370 + }, + { + "epoch": 9.83, + "learning_rate": 0.000982747603833866, + "loss": 0.2071, + "step": 15380 + }, + { + "epoch": 9.83, + "learning_rate": 0.0009833865814696486, + "loss": 0.212, + "step": 15390 + }, + { + "epoch": 9.84, + "learning_rate": 0.0009840255591054314, + "loss": 0.217, + "step": 15400 + }, + { + "epoch": 9.85, + "learning_rate": 0.000984664536741214, + "loss": 0.1713, + "step": 15410 + }, + { + "epoch": 9.85, + "learning_rate": 0.0009853035143769968, + "loss": 0.1993, + "step": 15420 + }, + { + "epoch": 9.86, + "learning_rate": 0.0009859424920127795, + "loss": 0.1823, + "step": 15430 + }, + { + "epoch": 9.87, + "learning_rate": 0.0009865814696485623, + "loss": 0.179, + "step": 15440 + }, + { + "epoch": 9.87, + "learning_rate": 0.000987220447284345, + "loss": 0.1938, + "step": 15450 + }, + { + "epoch": 9.88, + "learning_rate": 0.0009878594249201277, + "loss": 0.2068, + "step": 15460 + }, + { + "epoch": 9.88, + "learning_rate": 0.0009884984025559107, + "loss": 0.1709, + "step": 15470 + }, + { + "epoch": 9.89, + "learning_rate": 0.0009891373801916934, + "loss": 0.2332, + "step": 15480 + }, + { + "epoch": 9.9, + "learning_rate": 0.0009897763578274761, + "loss": 0.193, + "step": 15490 + }, + { + "epoch": 9.9, + "learning_rate": 0.0009904153354632589, + "loss": 0.212, + "step": 15500 + }, + { + "epoch": 9.91, + "learning_rate": 0.0009910543130990416, + "loss": 0.2095, + "step": 15510 + }, + { + "epoch": 9.92, + "learning_rate": 0.0009916932907348243, + "loss": 0.1849, + "step": 15520 + }, + { + "epoch": 9.92, + "learning_rate": 0.000992332268370607, + "loss": 0.212, + "step": 15530 + }, + { + "epoch": 9.93, + "learning_rate": 0.0009929712460063898, + "loss": 0.1945, + "step": 15540 + }, + { + "epoch": 9.94, + "learning_rate": 0.0009936102236421725, + "loss": 0.2293, + "step": 15550 + }, + { + "epoch": 9.94, + "learning_rate": 0.0009942492012779552, + "loss": 0.2218, + "step": 15560 + }, + { + "epoch": 9.95, + "learning_rate": 0.000994888178913738, + "loss": 0.2198, + "step": 15570 + }, + { + "epoch": 9.95, + "learning_rate": 0.0009955271565495207, + "loss": 0.2099, + "step": 15580 + }, + { + "epoch": 9.96, + "learning_rate": 0.0009961661341853036, + "loss": 0.2159, + "step": 15590 + }, + { + "epoch": 9.97, + "learning_rate": 0.0009968051118210862, + "loss": 0.2077, + "step": 15600 + }, + { + "epoch": 9.97, + "learning_rate": 0.000997444089456869, + "loss": 0.2324, + "step": 15610 + }, + { + "epoch": 9.98, + "learning_rate": 0.0009980830670926518, + "loss": 0.2459, + "step": 15620 + }, + { + "epoch": 9.99, + "learning_rate": 0.0009987220447284346, + "loss": 0.2167, + "step": 15630 + }, + { + "epoch": 9.99, + "learning_rate": 0.0009993610223642173, + "loss": 0.1978, + "step": 15640 + }, + { + "epoch": 10.0, + "learning_rate": 0.001, + "loss": 0.1983, + "step": 15650 + }, + { + "epoch": 10.01, + "learning_rate": 0.000999929002484913, + "loss": 0.1927, + "step": 15660 + }, + { + "epoch": 10.01, + "learning_rate": 0.000999858004969826, + "loss": 0.1822, + "step": 15670 + }, + { + "epoch": 10.02, + "learning_rate": 0.0009997870074547391, + "loss": 0.1596, + "step": 15680 + }, + { + "epoch": 10.03, + "learning_rate": 0.000999716009939652, + "loss": 0.163, + "step": 15690 + }, + { + "epoch": 10.03, + "learning_rate": 0.0009996450124245652, + "loss": 0.1961, + "step": 15700 + }, + { + "epoch": 10.04, + "learning_rate": 0.0009995740149094782, + "loss": 0.1907, + "step": 15710 + }, + { + "epoch": 10.04, + "learning_rate": 0.0009995030173943912, + "loss": 0.1704, + "step": 15720 + }, + { + "epoch": 10.05, + "learning_rate": 0.0009994320198793041, + "loss": 0.1994, + "step": 15730 + }, + { + "epoch": 10.06, + "learning_rate": 0.0009993610223642173, + "loss": 0.1503, + "step": 15740 + }, + { + "epoch": 10.06, + "learning_rate": 0.0009992900248491302, + "loss": 0.151, + "step": 15750 + }, + { + "epoch": 10.07, + "learning_rate": 0.0009992190273340434, + "loss": 0.1875, + "step": 15760 + }, + { + "epoch": 10.08, + "learning_rate": 0.0009991480298189564, + "loss": 0.1754, + "step": 15770 + }, + { + "epoch": 10.08, + "learning_rate": 0.0009990770323038693, + "loss": 0.1899, + "step": 15780 + }, + { + "epoch": 10.09, + "learning_rate": 0.0009990060347887823, + "loss": 0.1809, + "step": 15790 + }, + { + "epoch": 10.1, + "learning_rate": 0.0009989350372736955, + "loss": 0.1691, + "step": 15800 + }, + { + "epoch": 10.1, + "learning_rate": 0.0009988640397586084, + "loss": 0.1784, + "step": 15810 + }, + { + "epoch": 10.11, + "learning_rate": 0.0009987930422435216, + "loss": 0.1872, + "step": 15820 + }, + { + "epoch": 10.11, + "learning_rate": 0.0009987220447284346, + "loss": 0.1759, + "step": 15830 + }, + { + "epoch": 10.12, + "learning_rate": 0.0009986510472133475, + "loss": 0.1688, + "step": 15840 + }, + { + "epoch": 10.13, + "learning_rate": 0.0009985800496982605, + "loss": 0.2019, + "step": 15850 + }, + { + "epoch": 10.13, + "learning_rate": 0.0009985090521831737, + "loss": 0.1546, + "step": 15860 + }, + { + "epoch": 10.14, + "learning_rate": 0.0009984380546680866, + "loss": 0.207, + "step": 15870 + }, + { + "epoch": 10.15, + "learning_rate": 0.0009983670571529998, + "loss": 0.1832, + "step": 15880 + }, + { + "epoch": 10.15, + "learning_rate": 0.0009982960596379127, + "loss": 0.1724, + "step": 15890 + }, + { + "epoch": 10.16, + "learning_rate": 0.0009982250621228257, + "loss": 0.1905, + "step": 15900 + }, + { + "epoch": 10.17, + "learning_rate": 0.0009981540646077387, + "loss": 0.1705, + "step": 15910 + }, + { + "epoch": 10.17, + "learning_rate": 0.0009980830670926518, + "loss": 0.1848, + "step": 15920 + }, + { + "epoch": 10.18, + "learning_rate": 0.0009980120695775648, + "loss": 0.1958, + "step": 15930 + }, + { + "epoch": 10.19, + "learning_rate": 0.000997941072062478, + "loss": 0.1769, + "step": 15940 + }, + { + "epoch": 10.19, + "learning_rate": 0.000997870074547391, + "loss": 0.1765, + "step": 15950 + }, + { + "epoch": 10.2, + "learning_rate": 0.0009977990770323039, + "loss": 0.2068, + "step": 15960 + }, + { + "epoch": 10.2, + "learning_rate": 0.0009977280795172168, + "loss": 0.146, + "step": 15970 + }, + { + "epoch": 10.21, + "learning_rate": 0.0009976570820021298, + "loss": 0.1541, + "step": 15980 + }, + { + "epoch": 10.22, + "learning_rate": 0.000997586084487043, + "loss": 0.188, + "step": 15990 + }, + { + "epoch": 10.22, + "learning_rate": 0.000997515086971956, + "loss": 0.1822, + "step": 16000 + }, + { + "epoch": 10.23, + "learning_rate": 0.000997444089456869, + "loss": 0.1641, + "step": 16010 + }, + { + "epoch": 10.24, + "learning_rate": 0.000997373091941782, + "loss": 0.1893, + "step": 16020 + }, + { + "epoch": 10.24, + "learning_rate": 0.000997302094426695, + "loss": 0.191, + "step": 16030 + }, + { + "epoch": 10.25, + "learning_rate": 0.000997231096911608, + "loss": 0.2111, + "step": 16040 + }, + { + "epoch": 10.26, + "learning_rate": 0.0009971600993965212, + "loss": 0.186, + "step": 16050 + }, + { + "epoch": 10.26, + "learning_rate": 0.0009970891018814341, + "loss": 0.1952, + "step": 16060 + }, + { + "epoch": 10.27, + "learning_rate": 0.0009970181043663473, + "loss": 0.1963, + "step": 16070 + }, + { + "epoch": 10.27, + "learning_rate": 0.0009969471068512602, + "loss": 0.183, + "step": 16080 + }, + { + "epoch": 10.28, + "learning_rate": 0.0009968761093361732, + "loss": 0.1845, + "step": 16090 + }, + { + "epoch": 10.29, + "learning_rate": 0.0009968051118210862, + "loss": 0.2002, + "step": 16100 + }, + { + "epoch": 10.29, + "learning_rate": 0.0009967341143059993, + "loss": 0.1953, + "step": 16110 + }, + { + "epoch": 10.3, + "learning_rate": 0.0009966631167909123, + "loss": 0.1778, + "step": 16120 + }, + { + "epoch": 10.31, + "learning_rate": 0.0009965921192758255, + "loss": 0.2073, + "step": 16130 + }, + { + "epoch": 10.31, + "learning_rate": 0.0009965211217607384, + "loss": 0.1755, + "step": 16140 + }, + { + "epoch": 10.32, + "learning_rate": 0.0009964501242456514, + "loss": 0.2085, + "step": 16150 + }, + { + "epoch": 10.33, + "learning_rate": 0.0009963791267305643, + "loss": 0.1978, + "step": 16160 + }, + { + "epoch": 10.33, + "learning_rate": 0.0009963081292154775, + "loss": 0.2101, + "step": 16170 + }, + { + "epoch": 10.34, + "learning_rate": 0.0009962371317003905, + "loss": 0.2027, + "step": 16180 + }, + { + "epoch": 10.34, + "learning_rate": 0.0009961661341853036, + "loss": 0.1905, + "step": 16190 + }, + { + "epoch": 10.35, + "learning_rate": 0.0009960951366702166, + "loss": 0.175, + "step": 16200 + }, + { + "epoch": 10.36, + "learning_rate": 0.0009960241391551296, + "loss": 0.1816, + "step": 16210 + }, + { + "epoch": 10.36, + "learning_rate": 0.0009959531416400425, + "loss": 0.1917, + "step": 16220 + }, + { + "epoch": 10.37, + "learning_rate": 0.0009958821441249557, + "loss": 0.1956, + "step": 16230 + }, + { + "epoch": 10.38, + "learning_rate": 0.0009958111466098687, + "loss": 0.2037, + "step": 16240 + }, + { + "epoch": 10.38, + "learning_rate": 0.0009957401490947818, + "loss": 0.2194, + "step": 16250 + }, + { + "epoch": 10.39, + "learning_rate": 0.0009956691515796948, + "loss": 0.1633, + "step": 16260 + }, + { + "epoch": 10.4, + "learning_rate": 0.0009955981540646077, + "loss": 0.1803, + "step": 16270 + }, + { + "epoch": 10.4, + "learning_rate": 0.0009955271565495207, + "loss": 0.1885, + "step": 16280 + }, + { + "epoch": 10.41, + "learning_rate": 0.0009954561590344337, + "loss": 0.2004, + "step": 16290 + }, + { + "epoch": 10.42, + "learning_rate": 0.0009953851615193468, + "loss": 0.1961, + "step": 16300 + }, + { + "epoch": 10.42, + "learning_rate": 0.0009953141640042598, + "loss": 0.1827, + "step": 16310 + }, + { + "epoch": 10.43, + "learning_rate": 0.000995243166489173, + "loss": 0.1775, + "step": 16320 + }, + { + "epoch": 10.43, + "learning_rate": 0.000995172168974086, + "loss": 0.1905, + "step": 16330 + }, + { + "epoch": 10.44, + "learning_rate": 0.0009951011714589989, + "loss": 0.2072, + "step": 16340 + }, + { + "epoch": 10.45, + "learning_rate": 0.0009950301739439118, + "loss": 0.1725, + "step": 16350 + }, + { + "epoch": 10.45, + "learning_rate": 0.000994959176428825, + "loss": 0.2023, + "step": 16360 + }, + { + "epoch": 10.46, + "learning_rate": 0.000994888178913738, + "loss": 0.1921, + "step": 16370 + }, + { + "epoch": 10.47, + "learning_rate": 0.0009948171813986511, + "loss": 0.1988, + "step": 16380 + }, + { + "epoch": 10.47, + "learning_rate": 0.000994746183883564, + "loss": 0.1888, + "step": 16390 + }, + { + "epoch": 10.48, + "learning_rate": 0.000994675186368477, + "loss": 0.2047, + "step": 16400 + }, + { + "epoch": 10.49, + "learning_rate": 0.00099460418885339, + "loss": 0.179, + "step": 16410 + }, + { + "epoch": 10.49, + "learning_rate": 0.0009945331913383032, + "loss": 0.1867, + "step": 16420 + }, + { + "epoch": 10.5, + "learning_rate": 0.0009944621938232162, + "loss": 0.1868, + "step": 16430 + }, + { + "epoch": 10.5, + "learning_rate": 0.0009943911963081293, + "loss": 0.212, + "step": 16440 + }, + { + "epoch": 10.51, + "learning_rate": 0.0009943201987930423, + "loss": 0.1937, + "step": 16450 + }, + { + "epoch": 10.52, + "learning_rate": 0.0009942492012779552, + "loss": 0.1915, + "step": 16460 + }, + { + "epoch": 10.52, + "learning_rate": 0.0009941782037628682, + "loss": 0.1712, + "step": 16470 + }, + { + "epoch": 10.53, + "learning_rate": 0.0009941072062477814, + "loss": 0.2135, + "step": 16480 + }, + { + "epoch": 10.54, + "learning_rate": 0.0009940362087326943, + "loss": 0.1716, + "step": 16490 + }, + { + "epoch": 10.54, + "learning_rate": 0.0009939652112176075, + "loss": 0.193, + "step": 16500 + }, + { + "epoch": 10.55, + "learning_rate": 0.0009938942137025205, + "loss": 0.2042, + "step": 16510 + }, + { + "epoch": 10.56, + "learning_rate": 0.0009938232161874334, + "loss": 0.2068, + "step": 16520 + }, + { + "epoch": 10.56, + "learning_rate": 0.0009937522186723464, + "loss": 0.1977, + "step": 16530 + }, + { + "epoch": 10.57, + "learning_rate": 0.0009936812211572596, + "loss": 0.1968, + "step": 16540 + }, + { + "epoch": 10.57, + "learning_rate": 0.0009936102236421725, + "loss": 0.2021, + "step": 16550 + }, + { + "epoch": 10.58, + "learning_rate": 0.0009935392261270857, + "loss": 0.2083, + "step": 16560 + }, + { + "epoch": 10.59, + "learning_rate": 0.0009934682286119986, + "loss": 0.1932, + "step": 16570 + }, + { + "epoch": 10.59, + "learning_rate": 0.0009933972310969116, + "loss": 0.2191, + "step": 16580 + }, + { + "epoch": 10.6, + "learning_rate": 0.0009933262335818246, + "loss": 0.2102, + "step": 16590 + }, + { + "epoch": 10.61, + "learning_rate": 0.0009932552360667377, + "loss": 0.1698, + "step": 16600 + }, + { + "epoch": 10.61, + "learning_rate": 0.0009931842385516507, + "loss": 0.2065, + "step": 16610 + }, + { + "epoch": 10.62, + "learning_rate": 0.0009931132410365637, + "loss": 0.1976, + "step": 16620 + }, + { + "epoch": 10.63, + "learning_rate": 0.0009930422435214768, + "loss": 0.2074, + "step": 16630 + }, + { + "epoch": 10.63, + "learning_rate": 0.0009929712460063898, + "loss": 0.22, + "step": 16640 + }, + { + "epoch": 10.64, + "learning_rate": 0.0009929002484913027, + "loss": 0.1612, + "step": 16650 + }, + { + "epoch": 10.65, + "learning_rate": 0.000992829250976216, + "loss": 0.2161, + "step": 16660 + }, + { + "epoch": 10.65, + "learning_rate": 0.0009927582534611289, + "loss": 0.1898, + "step": 16670 + }, + { + "epoch": 10.66, + "learning_rate": 0.0009926872559460418, + "loss": 0.155, + "step": 16680 + }, + { + "epoch": 10.66, + "learning_rate": 0.000992616258430955, + "loss": 0.1943, + "step": 16690 + }, + { + "epoch": 10.67, + "learning_rate": 0.000992545260915868, + "loss": 0.1812, + "step": 16700 + }, + { + "epoch": 10.68, + "learning_rate": 0.000992474263400781, + "loss": 0.19, + "step": 16710 + }, + { + "epoch": 10.68, + "learning_rate": 0.000992403265885694, + "loss": 0.2032, + "step": 16720 + }, + { + "epoch": 10.69, + "learning_rate": 0.000992332268370607, + "loss": 0.1948, + "step": 16730 + }, + { + "epoch": 10.7, + "learning_rate": 0.00099226127085552, + "loss": 0.2, + "step": 16740 + }, + { + "epoch": 10.7, + "learning_rate": 0.0009921902733404332, + "loss": 0.1899, + "step": 16750 + }, + { + "epoch": 10.71, + "learning_rate": 0.0009921192758253461, + "loss": 0.1937, + "step": 16760 + }, + { + "epoch": 10.72, + "learning_rate": 0.000992048278310259, + "loss": 0.1854, + "step": 16770 + }, + { + "epoch": 10.72, + "learning_rate": 0.0009919772807951723, + "loss": 0.2145, + "step": 16780 + }, + { + "epoch": 10.73, + "learning_rate": 0.0009919062832800852, + "loss": 0.2008, + "step": 16790 + }, + { + "epoch": 10.73, + "learning_rate": 0.0009918352857649982, + "loss": 0.1859, + "step": 16800 + }, + { + "epoch": 10.74, + "learning_rate": 0.0009917642882499114, + "loss": 0.1952, + "step": 16810 + }, + { + "epoch": 10.75, + "learning_rate": 0.0009916932907348243, + "loss": 0.1662, + "step": 16820 + }, + { + "epoch": 10.75, + "learning_rate": 0.0009916222932197373, + "loss": 0.1971, + "step": 16830 + }, + { + "epoch": 10.76, + "learning_rate": 0.0009915512957046502, + "loss": 0.176, + "step": 16840 + }, + { + "epoch": 10.77, + "learning_rate": 0.0009914802981895634, + "loss": 0.1621, + "step": 16850 + }, + { + "epoch": 10.77, + "learning_rate": 0.0009914093006744764, + "loss": 0.2204, + "step": 16860 + }, + { + "epoch": 10.78, + "learning_rate": 0.0009913383031593895, + "loss": 0.1798, + "step": 16870 + }, + { + "epoch": 10.79, + "learning_rate": 0.0009912673056443025, + "loss": 0.1984, + "step": 16880 + }, + { + "epoch": 10.79, + "learning_rate": 0.0009911963081292155, + "loss": 0.1789, + "step": 16890 + }, + { + "epoch": 10.8, + "learning_rate": 0.0009911253106141284, + "loss": 0.1897, + "step": 16900 + }, + { + "epoch": 10.8, + "learning_rate": 0.0009910543130990416, + "loss": 0.1888, + "step": 16910 + }, + { + "epoch": 10.81, + "learning_rate": 0.0009909833155839546, + "loss": 0.1828, + "step": 16920 + }, + { + "epoch": 10.82, + "learning_rate": 0.0009909123180688675, + "loss": 0.1798, + "step": 16930 + }, + { + "epoch": 10.82, + "learning_rate": 0.0009908413205537807, + "loss": 0.1729, + "step": 16940 + }, + { + "epoch": 10.83, + "learning_rate": 0.0009907703230386936, + "loss": 0.2113, + "step": 16950 + }, + { + "epoch": 10.84, + "learning_rate": 0.0009906993255236066, + "loss": 0.2041, + "step": 16960 + }, + { + "epoch": 10.84, + "learning_rate": 0.0009906283280085198, + "loss": 0.1625, + "step": 16970 + }, + { + "epoch": 10.85, + "learning_rate": 0.0009905573304934327, + "loss": 0.1726, + "step": 16980 + }, + { + "epoch": 10.86, + "learning_rate": 0.0009904863329783457, + "loss": 0.1875, + "step": 16990 + }, + { + "epoch": 10.86, + "learning_rate": 0.0009904153354632589, + "loss": 0.2126, + "step": 17000 + }, + { + "epoch": 10.87, + "learning_rate": 0.0009903443379481718, + "loss": 0.1882, + "step": 17010 + }, + { + "epoch": 10.88, + "learning_rate": 0.0009902733404330848, + "loss": 0.2062, + "step": 17020 + }, + { + "epoch": 10.88, + "learning_rate": 0.000990202342917998, + "loss": 0.1705, + "step": 17030 + }, + { + "epoch": 10.89, + "learning_rate": 0.000990131345402911, + "loss": 0.2103, + "step": 17040 + }, + { + "epoch": 10.89, + "learning_rate": 0.0009900603478878239, + "loss": 0.1766, + "step": 17050 + }, + { + "epoch": 10.9, + "learning_rate": 0.000989989350372737, + "loss": 0.189, + "step": 17060 + }, + { + "epoch": 10.91, + "learning_rate": 0.00098991835285765, + "loss": 0.1797, + "step": 17070 + }, + { + "epoch": 10.91, + "learning_rate": 0.000989847355342563, + "loss": 0.2013, + "step": 17080 + }, + { + "epoch": 10.92, + "learning_rate": 0.0009897763578274761, + "loss": 0.2034, + "step": 17090 + }, + { + "epoch": 10.93, + "learning_rate": 0.000989705360312389, + "loss": 0.1947, + "step": 17100 + }, + { + "epoch": 10.93, + "learning_rate": 0.000989634362797302, + "loss": 0.2248, + "step": 17110 + }, + { + "epoch": 10.94, + "learning_rate": 0.0009895633652822152, + "loss": 0.2021, + "step": 17120 + }, + { + "epoch": 10.95, + "learning_rate": 0.0009894923677671282, + "loss": 0.1932, + "step": 17130 + }, + { + "epoch": 10.95, + "learning_rate": 0.0009894213702520411, + "loss": 0.1998, + "step": 17140 + }, + { + "epoch": 10.96, + "learning_rate": 0.0009893503727369543, + "loss": 0.2081, + "step": 17150 + }, + { + "epoch": 10.96, + "learning_rate": 0.0009892793752218673, + "loss": 0.2047, + "step": 17160 + }, + { + "epoch": 10.97, + "learning_rate": 0.0009892083777067802, + "loss": 0.1864, + "step": 17170 + }, + { + "epoch": 10.98, + "learning_rate": 0.0009891373801916934, + "loss": 0.2137, + "step": 17180 + }, + { + "epoch": 10.98, + "learning_rate": 0.0009890663826766064, + "loss": 0.1858, + "step": 17190 + }, + { + "epoch": 10.99, + "learning_rate": 0.0009889953851615193, + "loss": 0.2036, + "step": 17200 + }, + { + "epoch": 11.0, + "learning_rate": 0.0009889243876464325, + "loss": 0.1979, + "step": 17210 + }, + { + "epoch": 11.0, + "learning_rate": 0.0009888533901313455, + "loss": 0.1788, + "step": 17220 + }, + { + "epoch": 11.01, + "learning_rate": 0.0009887823926162584, + "loss": 0.1777, + "step": 17230 + }, + { + "epoch": 11.02, + "learning_rate": 0.0009887113951011714, + "loss": 0.1835, + "step": 17240 + }, + { + "epoch": 11.02, + "learning_rate": 0.0009886403975860845, + "loss": 0.1546, + "step": 17250 + }, + { + "epoch": 11.03, + "learning_rate": 0.0009885694000709975, + "loss": 0.1461, + "step": 17260 + }, + { + "epoch": 11.04, + "learning_rate": 0.0009884984025559107, + "loss": 0.1716, + "step": 17270 + }, + { + "epoch": 11.04, + "learning_rate": 0.0009884274050408236, + "loss": 0.1556, + "step": 17280 + }, + { + "epoch": 11.05, + "learning_rate": 0.0009883564075257366, + "loss": 0.1664, + "step": 17290 + }, + { + "epoch": 11.05, + "learning_rate": 0.0009882854100106496, + "loss": 0.1738, + "step": 17300 + }, + { + "epoch": 11.06, + "learning_rate": 0.0009882144124955627, + "loss": 0.1797, + "step": 17310 + }, + { + "epoch": 11.07, + "learning_rate": 0.0009881434149804757, + "loss": 0.2018, + "step": 17320 + }, + { + "epoch": 11.07, + "learning_rate": 0.0009880724174653889, + "loss": 0.1683, + "step": 17330 + }, + { + "epoch": 11.08, + "learning_rate": 0.0009880014199503018, + "loss": 0.1928, + "step": 17340 + }, + { + "epoch": 11.09, + "learning_rate": 0.0009879304224352148, + "loss": 0.1737, + "step": 17350 + }, + { + "epoch": 11.09, + "learning_rate": 0.0009878594249201277, + "loss": 0.1715, + "step": 17360 + }, + { + "epoch": 11.1, + "learning_rate": 0.000987788427405041, + "loss": 0.1691, + "step": 17370 + }, + { + "epoch": 11.11, + "learning_rate": 0.0009877174298899539, + "loss": 0.1462, + "step": 17380 + }, + { + "epoch": 11.11, + "learning_rate": 0.000987646432374867, + "loss": 0.1835, + "step": 17390 + }, + { + "epoch": 11.12, + "learning_rate": 0.00098757543485978, + "loss": 0.1515, + "step": 17400 + }, + { + "epoch": 11.12, + "learning_rate": 0.000987504437344693, + "loss": 0.1819, + "step": 17410 + }, + { + "epoch": 11.13, + "learning_rate": 0.000987433439829606, + "loss": 0.137, + "step": 17420 + }, + { + "epoch": 11.14, + "learning_rate": 0.000987362442314519, + "loss": 0.1742, + "step": 17430 + }, + { + "epoch": 11.14, + "learning_rate": 0.000987291444799432, + "loss": 0.147, + "step": 17440 + }, + { + "epoch": 11.15, + "learning_rate": 0.000987220447284345, + "loss": 0.1645, + "step": 17450 + }, + { + "epoch": 11.16, + "learning_rate": 0.0009871494497692582, + "loss": 0.1775, + "step": 17460 + }, + { + "epoch": 11.16, + "learning_rate": 0.0009870784522541711, + "loss": 0.1766, + "step": 17470 + }, + { + "epoch": 11.17, + "learning_rate": 0.000987007454739084, + "loss": 0.19, + "step": 17480 + }, + { + "epoch": 11.18, + "learning_rate": 0.0009869364572239973, + "loss": 0.1521, + "step": 17490 + }, + { + "epoch": 11.18, + "learning_rate": 0.0009868654597089102, + "loss": 0.1355, + "step": 17500 + }, + { + "epoch": 11.19, + "learning_rate": 0.0009867944621938232, + "loss": 0.169, + "step": 17510 + }, + { + "epoch": 11.19, + "learning_rate": 0.0009867234646787364, + "loss": 0.1842, + "step": 17520 + }, + { + "epoch": 11.2, + "learning_rate": 0.0009866524671636493, + "loss": 0.1588, + "step": 17530 + }, + { + "epoch": 11.21, + "learning_rate": 0.0009865814696485623, + "loss": 0.1587, + "step": 17540 + }, + { + "epoch": 11.21, + "learning_rate": 0.0009865104721334752, + "loss": 0.1792, + "step": 17550 + }, + { + "epoch": 11.22, + "learning_rate": 0.0009864394746183884, + "loss": 0.1832, + "step": 17560 + }, + { + "epoch": 11.23, + "learning_rate": 0.0009863684771033014, + "loss": 0.145, + "step": 17570 + }, + { + "epoch": 11.23, + "learning_rate": 0.0009862974795882145, + "loss": 0.1764, + "step": 17580 + }, + { + "epoch": 11.24, + "learning_rate": 0.0009862264820731275, + "loss": 0.1706, + "step": 17590 + }, + { + "epoch": 11.25, + "learning_rate": 0.0009861554845580405, + "loss": 0.1737, + "step": 17600 + }, + { + "epoch": 11.25, + "learning_rate": 0.0009860844870429534, + "loss": 0.1445, + "step": 17610 + }, + { + "epoch": 11.26, + "learning_rate": 0.0009860134895278666, + "loss": 0.1704, + "step": 17620 + }, + { + "epoch": 11.27, + "learning_rate": 0.0009859424920127795, + "loss": 0.1714, + "step": 17630 + }, + { + "epoch": 11.27, + "learning_rate": 0.0009858714944976927, + "loss": 0.1495, + "step": 17640 + }, + { + "epoch": 11.28, + "learning_rate": 0.0009858004969826057, + "loss": 0.1698, + "step": 17650 + }, + { + "epoch": 11.28, + "learning_rate": 0.0009857294994675186, + "loss": 0.1824, + "step": 17660 + }, + { + "epoch": 11.29, + "learning_rate": 0.0009856585019524316, + "loss": 0.1515, + "step": 17670 + }, + { + "epoch": 11.3, + "learning_rate": 0.0009855875044373448, + "loss": 0.169, + "step": 17680 + }, + { + "epoch": 11.3, + "learning_rate": 0.0009855165069222577, + "loss": 0.1814, + "step": 17690 + }, + { + "epoch": 11.31, + "learning_rate": 0.000985445509407171, + "loss": 0.1548, + "step": 17700 + }, + { + "epoch": 11.32, + "learning_rate": 0.0009853745118920839, + "loss": 0.1615, + "step": 17710 + }, + { + "epoch": 11.32, + "learning_rate": 0.0009853035143769968, + "loss": 0.1975, + "step": 17720 + }, + { + "epoch": 11.33, + "learning_rate": 0.0009852325168619098, + "loss": 0.1402, + "step": 17730 + }, + { + "epoch": 11.34, + "learning_rate": 0.000985161519346823, + "loss": 0.1733, + "step": 17740 + }, + { + "epoch": 11.34, + "learning_rate": 0.000985090521831736, + "loss": 0.1782, + "step": 17750 + }, + { + "epoch": 11.35, + "learning_rate": 0.000985019524316649, + "loss": 0.1664, + "step": 17760 + }, + { + "epoch": 11.35, + "learning_rate": 0.000984948526801562, + "loss": 0.2117, + "step": 17770 + }, + { + "epoch": 11.36, + "learning_rate": 0.000984877529286475, + "loss": 0.1698, + "step": 17780 + }, + { + "epoch": 11.37, + "learning_rate": 0.000984806531771388, + "loss": 0.1607, + "step": 17790 + }, + { + "epoch": 11.37, + "learning_rate": 0.0009847355342563011, + "loss": 0.171, + "step": 17800 + }, + { + "epoch": 11.38, + "learning_rate": 0.000984664536741214, + "loss": 0.1969, + "step": 17810 + }, + { + "epoch": 11.39, + "learning_rate": 0.0009845935392261273, + "loss": 0.1615, + "step": 17820 + }, + { + "epoch": 11.39, + "learning_rate": 0.0009845225417110402, + "loss": 0.1649, + "step": 17830 + }, + { + "epoch": 11.4, + "learning_rate": 0.0009844515441959532, + "loss": 0.1773, + "step": 17840 + }, + { + "epoch": 11.41, + "learning_rate": 0.0009843805466808661, + "loss": 0.19, + "step": 17850 + }, + { + "epoch": 11.41, + "learning_rate": 0.000984309549165779, + "loss": 0.1501, + "step": 17860 + }, + { + "epoch": 11.42, + "learning_rate": 0.0009842385516506923, + "loss": 0.1397, + "step": 17870 + }, + { + "epoch": 11.42, + "learning_rate": 0.0009841675541356052, + "loss": 0.1641, + "step": 17880 + }, + { + "epoch": 11.43, + "learning_rate": 0.0009840965566205184, + "loss": 0.161, + "step": 17890 + }, + { + "epoch": 11.44, + "learning_rate": 0.0009840255591054314, + "loss": 0.1751, + "step": 17900 + }, + { + "epoch": 11.44, + "learning_rate": 0.0009839545615903443, + "loss": 0.1774, + "step": 17910 + }, + { + "epoch": 11.45, + "learning_rate": 0.0009838835640752573, + "loss": 0.2226, + "step": 17920 + }, + { + "epoch": 11.46, + "learning_rate": 0.0009838125665601704, + "loss": 0.1531, + "step": 17930 + }, + { + "epoch": 11.46, + "learning_rate": 0.0009837415690450834, + "loss": 0.1807, + "step": 17940 + }, + { + "epoch": 11.47, + "learning_rate": 0.0009836705715299966, + "loss": 0.1769, + "step": 17950 + }, + { + "epoch": 11.48, + "learning_rate": 0.0009835995740149095, + "loss": 0.1727, + "step": 17960 + }, + { + "epoch": 11.48, + "learning_rate": 0.0009835285764998225, + "loss": 0.1738, + "step": 17970 + }, + { + "epoch": 11.49, + "learning_rate": 0.0009834575789847355, + "loss": 0.1354, + "step": 17980 + }, + { + "epoch": 11.5, + "learning_rate": 0.0009833865814696486, + "loss": 0.1979, + "step": 17990 + }, + { + "epoch": 11.5, + "learning_rate": 0.0009833155839545616, + "loss": 0.1774, + "step": 18000 + }, + { + "epoch": 11.51, + "learning_rate": 0.0009832445864394748, + "loss": 0.1869, + "step": 18010 + }, + { + "epoch": 11.51, + "learning_rate": 0.0009831735889243877, + "loss": 0.1486, + "step": 18020 + }, + { + "epoch": 11.52, + "learning_rate": 0.0009831025914093007, + "loss": 0.2041, + "step": 18030 + }, + { + "epoch": 11.53, + "learning_rate": 0.0009830315938942136, + "loss": 0.188, + "step": 18040 + }, + { + "epoch": 11.53, + "learning_rate": 0.0009829605963791268, + "loss": 0.1349, + "step": 18050 + }, + { + "epoch": 11.54, + "learning_rate": 0.0009828895988640398, + "loss": 0.1786, + "step": 18060 + }, + { + "epoch": 11.55, + "learning_rate": 0.000982818601348953, + "loss": 0.1886, + "step": 18070 + }, + { + "epoch": 11.55, + "learning_rate": 0.000982747603833866, + "loss": 0.1876, + "step": 18080 + }, + { + "epoch": 11.56, + "learning_rate": 0.0009826766063187789, + "loss": 0.189, + "step": 18090 + }, + { + "epoch": 11.57, + "learning_rate": 0.0009826056088036918, + "loss": 0.1677, + "step": 18100 + }, + { + "epoch": 11.57, + "learning_rate": 0.000982534611288605, + "loss": 0.2143, + "step": 18110 + }, + { + "epoch": 11.58, + "learning_rate": 0.000982463613773518, + "loss": 0.1722, + "step": 18120 + }, + { + "epoch": 11.58, + "learning_rate": 0.0009823926162584311, + "loss": 0.1762, + "step": 18130 + }, + { + "epoch": 11.59, + "learning_rate": 0.000982321618743344, + "loss": 0.1833, + "step": 18140 + }, + { + "epoch": 11.6, + "learning_rate": 0.000982250621228257, + "loss": 0.1845, + "step": 18150 + }, + { + "epoch": 11.6, + "learning_rate": 0.00098217962371317, + "loss": 0.1567, + "step": 18160 + }, + { + "epoch": 11.61, + "learning_rate": 0.000982108626198083, + "loss": 0.1642, + "step": 18170 + }, + { + "epoch": 11.62, + "learning_rate": 0.0009820376286829961, + "loss": 0.1576, + "step": 18180 + }, + { + "epoch": 11.62, + "learning_rate": 0.000981966631167909, + "loss": 0.1377, + "step": 18190 + }, + { + "epoch": 11.63, + "learning_rate": 0.0009818956336528223, + "loss": 0.1921, + "step": 18200 + }, + { + "epoch": 11.64, + "learning_rate": 0.0009818246361377352, + "loss": 0.2037, + "step": 18210 + }, + { + "epoch": 11.64, + "learning_rate": 0.0009817536386226482, + "loss": 0.1988, + "step": 18220 + }, + { + "epoch": 11.65, + "learning_rate": 0.0009816826411075611, + "loss": 0.1576, + "step": 18230 + }, + { + "epoch": 11.65, + "learning_rate": 0.0009816116435924743, + "loss": 0.1727, + "step": 18240 + }, + { + "epoch": 11.66, + "learning_rate": 0.0009815406460773873, + "loss": 0.1925, + "step": 18250 + }, + { + "epoch": 11.67, + "learning_rate": 0.0009814696485623004, + "loss": 0.1942, + "step": 18260 + }, + { + "epoch": 11.67, + "learning_rate": 0.0009813986510472134, + "loss": 0.1506, + "step": 18270 + }, + { + "epoch": 11.68, + "learning_rate": 0.0009813276535321264, + "loss": 0.1973, + "step": 18280 + }, + { + "epoch": 11.69, + "learning_rate": 0.0009812566560170393, + "loss": 0.1641, + "step": 18290 + }, + { + "epoch": 11.69, + "learning_rate": 0.0009811856585019525, + "loss": 0.1719, + "step": 18300 + }, + { + "epoch": 11.7, + "learning_rate": 0.0009811146609868654, + "loss": 0.1681, + "step": 18310 + }, + { + "epoch": 11.71, + "learning_rate": 0.0009810436634717786, + "loss": 0.1644, + "step": 18320 + }, + { + "epoch": 11.71, + "learning_rate": 0.0009809726659566916, + "loss": 0.153, + "step": 18330 + }, + { + "epoch": 11.72, + "learning_rate": 0.0009809016684416045, + "loss": 0.1978, + "step": 18340 + }, + { + "epoch": 11.73, + "learning_rate": 0.0009808306709265175, + "loss": 0.1808, + "step": 18350 + }, + { + "epoch": 11.73, + "learning_rate": 0.0009807596734114307, + "loss": 0.1967, + "step": 18360 + }, + { + "epoch": 11.74, + "learning_rate": 0.0009806886758963436, + "loss": 0.1699, + "step": 18370 + }, + { + "epoch": 11.74, + "learning_rate": 0.0009806176783812568, + "loss": 0.1866, + "step": 18380 + }, + { + "epoch": 11.75, + "learning_rate": 0.0009805466808661698, + "loss": 0.1813, + "step": 18390 + }, + { + "epoch": 11.76, + "learning_rate": 0.0009804756833510827, + "loss": 0.1742, + "step": 18400 + }, + { + "epoch": 11.76, + "learning_rate": 0.0009804046858359957, + "loss": 0.2112, + "step": 18410 + }, + { + "epoch": 11.77, + "learning_rate": 0.0009803336883209088, + "loss": 0.1966, + "step": 18420 + }, + { + "epoch": 11.78, + "learning_rate": 0.0009802626908058218, + "loss": 0.1639, + "step": 18430 + }, + { + "epoch": 11.78, + "learning_rate": 0.000980191693290735, + "loss": 0.1614, + "step": 18440 + }, + { + "epoch": 11.79, + "learning_rate": 0.000980120695775648, + "loss": 0.1992, + "step": 18450 + }, + { + "epoch": 11.8, + "learning_rate": 0.000980049698260561, + "loss": 0.1965, + "step": 18460 + }, + { + "epoch": 11.8, + "learning_rate": 0.0009799787007454739, + "loss": 0.1885, + "step": 18470 + }, + { + "epoch": 11.81, + "learning_rate": 0.0009799077032303868, + "loss": 0.1509, + "step": 18480 + }, + { + "epoch": 11.81, + "learning_rate": 0.0009798367057153, + "loss": 0.1952, + "step": 18490 + }, + { + "epoch": 11.82, + "learning_rate": 0.000979765708200213, + "loss": 0.1801, + "step": 18500 + }, + { + "epoch": 11.83, + "learning_rate": 0.0009796947106851261, + "loss": 0.1521, + "step": 18510 + }, + { + "epoch": 11.83, + "learning_rate": 0.000979623713170039, + "loss": 0.1632, + "step": 18520 + }, + { + "epoch": 11.84, + "learning_rate": 0.000979552715654952, + "loss": 0.1945, + "step": 18530 + }, + { + "epoch": 11.85, + "learning_rate": 0.000979481718139865, + "loss": 0.1883, + "step": 18540 + }, + { + "epoch": 11.85, + "learning_rate": 0.0009794107206247782, + "loss": 0.1832, + "step": 18550 + }, + { + "epoch": 11.86, + "learning_rate": 0.0009793397231096911, + "loss": 0.1933, + "step": 18560 + }, + { + "epoch": 11.87, + "learning_rate": 0.0009792687255946043, + "loss": 0.1785, + "step": 18570 + }, + { + "epoch": 11.87, + "learning_rate": 0.0009791977280795173, + "loss": 0.1788, + "step": 18580 + }, + { + "epoch": 11.88, + "learning_rate": 0.0009791267305644302, + "loss": 0.18, + "step": 18590 + }, + { + "epoch": 11.88, + "learning_rate": 0.0009790557330493432, + "loss": 0.1882, + "step": 18600 + }, + { + "epoch": 11.89, + "learning_rate": 0.0009789847355342563, + "loss": 0.1823, + "step": 18610 + }, + { + "epoch": 11.9, + "learning_rate": 0.0009789137380191693, + "loss": 0.1843, + "step": 18620 + }, + { + "epoch": 11.9, + "learning_rate": 0.0009788427405040825, + "loss": 0.1725, + "step": 18630 + }, + { + "epoch": 11.91, + "learning_rate": 0.0009787717429889954, + "loss": 0.1905, + "step": 18640 + }, + { + "epoch": 11.92, + "learning_rate": 0.0009787007454739084, + "loss": 0.1904, + "step": 18650 + }, + { + "epoch": 11.92, + "learning_rate": 0.0009786297479588214, + "loss": 0.1537, + "step": 18660 + }, + { + "epoch": 11.93, + "learning_rate": 0.0009785587504437345, + "loss": 0.1519, + "step": 18670 + }, + { + "epoch": 11.94, + "learning_rate": 0.0009784877529286475, + "loss": 0.1831, + "step": 18680 + }, + { + "epoch": 11.94, + "learning_rate": 0.0009784167554135607, + "loss": 0.1562, + "step": 18690 + }, + { + "epoch": 11.95, + "learning_rate": 0.0009783457578984736, + "loss": 0.2007, + "step": 18700 + }, + { + "epoch": 11.95, + "learning_rate": 0.0009782747603833866, + "loss": 0.1683, + "step": 18710 + }, + { + "epoch": 11.96, + "learning_rate": 0.0009782037628682995, + "loss": 0.1927, + "step": 18720 + }, + { + "epoch": 11.97, + "learning_rate": 0.0009781327653532127, + "loss": 0.178, + "step": 18730 + }, + { + "epoch": 11.97, + "learning_rate": 0.0009780617678381257, + "loss": 0.1629, + "step": 18740 + }, + { + "epoch": 11.98, + "learning_rate": 0.0009779907703230388, + "loss": 0.177, + "step": 18750 + }, + { + "epoch": 11.99, + "learning_rate": 0.0009779197728079518, + "loss": 0.1758, + "step": 18760 + }, + { + "epoch": 11.99, + "learning_rate": 0.0009778487752928648, + "loss": 0.2114, + "step": 18770 + }, + { + "epoch": 12.0, + "learning_rate": 0.0009777777777777777, + "loss": 0.1582, + "step": 18780 + }, + { + "epoch": 12.01, + "learning_rate": 0.0009777067802626907, + "loss": 0.1586, + "step": 18790 + }, + { + "epoch": 12.01, + "learning_rate": 0.0009776357827476038, + "loss": 0.1523, + "step": 18800 + }, + { + "epoch": 12.02, + "learning_rate": 0.0009775647852325168, + "loss": 0.1614, + "step": 18810 + }, + { + "epoch": 12.03, + "learning_rate": 0.00097749378771743, + "loss": 0.169, + "step": 18820 + }, + { + "epoch": 12.03, + "learning_rate": 0.000977422790202343, + "loss": 0.1339, + "step": 18830 + }, + { + "epoch": 12.04, + "learning_rate": 0.000977351792687256, + "loss": 0.1313, + "step": 18840 + }, + { + "epoch": 12.04, + "learning_rate": 0.0009772807951721689, + "loss": 0.1481, + "step": 18850 + }, + { + "epoch": 12.05, + "learning_rate": 0.000977209797657082, + "loss": 0.1294, + "step": 18860 + }, + { + "epoch": 12.06, + "learning_rate": 0.000977138800141995, + "loss": 0.1555, + "step": 18870 + }, + { + "epoch": 12.06, + "learning_rate": 0.0009770678026269082, + "loss": 0.1555, + "step": 18880 + }, + { + "epoch": 12.07, + "learning_rate": 0.0009769968051118211, + "loss": 0.1688, + "step": 18890 + }, + { + "epoch": 12.08, + "learning_rate": 0.000976925807596734, + "loss": 0.1738, + "step": 18900 + }, + { + "epoch": 12.08, + "learning_rate": 0.000976854810081647, + "loss": 0.1463, + "step": 18910 + }, + { + "epoch": 12.09, + "learning_rate": 0.0009767838125665602, + "loss": 0.1241, + "step": 18920 + }, + { + "epoch": 12.1, + "learning_rate": 0.0009767128150514732, + "loss": 0.1401, + "step": 18930 + }, + { + "epoch": 12.1, + "learning_rate": 0.0009766418175363863, + "loss": 0.1685, + "step": 18940 + }, + { + "epoch": 12.11, + "learning_rate": 0.0009765708200212993, + "loss": 0.1761, + "step": 18950 + }, + { + "epoch": 12.11, + "learning_rate": 0.0009764998225062123, + "loss": 0.1738, + "step": 18960 + }, + { + "epoch": 12.12, + "learning_rate": 0.0009764288249911253, + "loss": 0.1195, + "step": 18970 + }, + { + "epoch": 12.13, + "learning_rate": 0.0009763578274760384, + "loss": 0.1416, + "step": 18980 + }, + { + "epoch": 12.13, + "learning_rate": 0.0009762868299609515, + "loss": 0.1504, + "step": 18990 + }, + { + "epoch": 12.14, + "learning_rate": 0.0009762158324458644, + "loss": 0.1515, + "step": 19000 + }, + { + "epoch": 12.15, + "learning_rate": 0.0009761448349307775, + "loss": 0.1773, + "step": 19010 + }, + { + "epoch": 12.15, + "learning_rate": 0.0009760738374156904, + "loss": 0.1548, + "step": 19020 + }, + { + "epoch": 12.16, + "learning_rate": 0.0009760028399006035, + "loss": 0.1606, + "step": 19030 + }, + { + "epoch": 12.17, + "learning_rate": 0.0009759318423855166, + "loss": 0.1607, + "step": 19040 + }, + { + "epoch": 12.17, + "learning_rate": 0.0009758608448704296, + "loss": 0.1473, + "step": 19050 + }, + { + "epoch": 12.18, + "learning_rate": 0.0009757898473553426, + "loss": 0.1539, + "step": 19060 + }, + { + "epoch": 12.19, + "learning_rate": 0.0009757188498402556, + "loss": 0.1577, + "step": 19070 + }, + { + "epoch": 12.19, + "learning_rate": 0.0009756478523251686, + "loss": 0.1825, + "step": 19080 + }, + { + "epoch": 12.2, + "learning_rate": 0.0009755768548100816, + "loss": 0.1687, + "step": 19090 + }, + { + "epoch": 12.2, + "learning_rate": 0.0009755058572949948, + "loss": 0.188, + "step": 19100 + }, + { + "epoch": 12.21, + "learning_rate": 0.0009754348597799077, + "loss": 0.1424, + "step": 19110 + }, + { + "epoch": 12.22, + "learning_rate": 0.0009753638622648208, + "loss": 0.1478, + "step": 19120 + }, + { + "epoch": 12.22, + "learning_rate": 0.0009752928647497337, + "loss": 0.1601, + "step": 19130 + }, + { + "epoch": 12.23, + "learning_rate": 0.0009752218672346468, + "loss": 0.1706, + "step": 19140 + }, + { + "epoch": 12.24, + "learning_rate": 0.0009751508697195598, + "loss": 0.1565, + "step": 19150 + }, + { + "epoch": 12.24, + "learning_rate": 0.0009750798722044729, + "loss": 0.1258, + "step": 19160 + }, + { + "epoch": 12.25, + "learning_rate": 0.0009750088746893859, + "loss": 0.1468, + "step": 19170 + }, + { + "epoch": 12.26, + "learning_rate": 0.000974937877174299, + "loss": 0.1865, + "step": 19180 + }, + { + "epoch": 12.26, + "learning_rate": 0.0009748668796592119, + "loss": 0.1551, + "step": 19190 + }, + { + "epoch": 12.27, + "learning_rate": 0.000974795882144125, + "loss": 0.1688, + "step": 19200 + }, + { + "epoch": 12.27, + "learning_rate": 0.0009747248846290379, + "loss": 0.16, + "step": 19210 + }, + { + "epoch": 12.28, + "learning_rate": 0.000974653887113951, + "loss": 0.1686, + "step": 19220 + }, + { + "epoch": 12.29, + "learning_rate": 0.0009745828895988641, + "loss": 0.1644, + "step": 19230 + }, + { + "epoch": 12.29, + "learning_rate": 0.0009745118920837771, + "loss": 0.1605, + "step": 19240 + }, + { + "epoch": 12.3, + "learning_rate": 0.0009744408945686901, + "loss": 0.1506, + "step": 19250 + }, + { + "epoch": 12.31, + "learning_rate": 0.0009743698970536032, + "loss": 0.1436, + "step": 19260 + }, + { + "epoch": 12.31, + "learning_rate": 0.0009742988995385161, + "loss": 0.1685, + "step": 19270 + }, + { + "epoch": 12.32, + "learning_rate": 0.0009742279020234292, + "loss": 0.1505, + "step": 19280 + }, + { + "epoch": 12.33, + "learning_rate": 0.0009741569045083423, + "loss": 0.1609, + "step": 19290 + }, + { + "epoch": 12.33, + "learning_rate": 0.0009740859069932553, + "loss": 0.1803, + "step": 19300 + }, + { + "epoch": 12.34, + "learning_rate": 0.0009740149094781683, + "loss": 0.1806, + "step": 19310 + }, + { + "epoch": 12.34, + "learning_rate": 0.0009739439119630813, + "loss": 0.1733, + "step": 19320 + }, + { + "epoch": 12.35, + "learning_rate": 0.0009738729144479943, + "loss": 0.1618, + "step": 19330 + }, + { + "epoch": 12.36, + "learning_rate": 0.0009738019169329074, + "loss": 0.157, + "step": 19340 + }, + { + "epoch": 12.36, + "learning_rate": 0.0009737309194178204, + "loss": 0.1374, + "step": 19350 + }, + { + "epoch": 12.37, + "learning_rate": 0.0009736599219027335, + "loss": 0.1689, + "step": 19360 + }, + { + "epoch": 12.38, + "learning_rate": 0.0009735889243876465, + "loss": 0.1813, + "step": 19370 + }, + { + "epoch": 12.38, + "learning_rate": 0.0009735179268725594, + "loss": 0.1313, + "step": 19380 + }, + { + "epoch": 12.39, + "learning_rate": 0.0009734469293574725, + "loss": 0.16, + "step": 19390 + }, + { + "epoch": 12.4, + "learning_rate": 0.0009733759318423854, + "loss": 0.1317, + "step": 19400 + }, + { + "epoch": 12.4, + "learning_rate": 0.0009733049343272986, + "loss": 0.13, + "step": 19410 + }, + { + "epoch": 12.41, + "learning_rate": 0.0009732339368122116, + "loss": 0.15, + "step": 19420 + }, + { + "epoch": 12.42, + "learning_rate": 0.0009731629392971246, + "loss": 0.1567, + "step": 19430 + }, + { + "epoch": 12.42, + "learning_rate": 0.0009730919417820376, + "loss": 0.1477, + "step": 19440 + }, + { + "epoch": 12.43, + "learning_rate": 0.0009730209442669507, + "loss": 0.1409, + "step": 19450 + }, + { + "epoch": 12.43, + "learning_rate": 0.0009729499467518636, + "loss": 0.1793, + "step": 19460 + }, + { + "epoch": 12.44, + "learning_rate": 0.0009728789492367768, + "loss": 0.1322, + "step": 19470 + }, + { + "epoch": 12.45, + "learning_rate": 0.0009728079517216898, + "loss": 0.1645, + "step": 19480 + }, + { + "epoch": 12.45, + "learning_rate": 0.0009727369542066028, + "loss": 0.195, + "step": 19490 + }, + { + "epoch": 12.46, + "learning_rate": 0.0009726659566915158, + "loss": 0.1419, + "step": 19500 + }, + { + "epoch": 12.47, + "learning_rate": 0.0009725949591764288, + "loss": 0.1422, + "step": 19510 + }, + { + "epoch": 12.47, + "learning_rate": 0.0009725239616613418, + "loss": 0.1796, + "step": 19520 + }, + { + "epoch": 12.48, + "learning_rate": 0.000972452964146255, + "loss": 0.1615, + "step": 19530 + }, + { + "epoch": 12.49, + "learning_rate": 0.0009723819666311679, + "loss": 0.173, + "step": 19540 + }, + { + "epoch": 12.49, + "learning_rate": 0.000972310969116081, + "loss": 0.1442, + "step": 19550 + }, + { + "epoch": 12.5, + "learning_rate": 0.000972239971600994, + "loss": 0.1648, + "step": 19560 + }, + { + "epoch": 12.5, + "learning_rate": 0.000972168974085907, + "loss": 0.1543, + "step": 19570 + }, + { + "epoch": 12.51, + "learning_rate": 0.00097209797657082, + "loss": 0.1507, + "step": 19580 + }, + { + "epoch": 12.52, + "learning_rate": 0.0009720269790557332, + "loss": 0.1571, + "step": 19590 + }, + { + "epoch": 12.52, + "learning_rate": 0.0009719559815406461, + "loss": 0.1697, + "step": 19600 + }, + { + "epoch": 12.53, + "learning_rate": 0.0009718849840255592, + "loss": 0.1622, + "step": 19610 + }, + { + "epoch": 12.54, + "learning_rate": 0.0009718139865104721, + "loss": 0.1548, + "step": 19620 + }, + { + "epoch": 12.54, + "learning_rate": 0.0009717429889953852, + "loss": 0.169, + "step": 19630 + }, + { + "epoch": 12.55, + "learning_rate": 0.0009716719914802982, + "loss": 0.1644, + "step": 19640 + }, + { + "epoch": 12.56, + "learning_rate": 0.0009716009939652113, + "loss": 0.1387, + "step": 19650 + }, + { + "epoch": 12.56, + "learning_rate": 0.0009715299964501243, + "loss": 0.177, + "step": 19660 + }, + { + "epoch": 12.57, + "learning_rate": 0.0009714589989350374, + "loss": 0.1627, + "step": 19670 + }, + { + "epoch": 12.57, + "learning_rate": 0.0009713880014199503, + "loss": 0.1451, + "step": 19680 + }, + { + "epoch": 12.58, + "learning_rate": 0.0009713170039048633, + "loss": 0.1827, + "step": 19690 + }, + { + "epoch": 12.59, + "learning_rate": 0.0009712460063897763, + "loss": 0.1593, + "step": 19700 + }, + { + "epoch": 12.59, + "learning_rate": 0.0009711750088746894, + "loss": 0.1521, + "step": 19710 + }, + { + "epoch": 12.6, + "learning_rate": 0.0009711040113596025, + "loss": 0.1759, + "step": 19720 + }, + { + "epoch": 12.61, + "learning_rate": 0.0009710330138445154, + "loss": 0.1868, + "step": 19730 + }, + { + "epoch": 12.61, + "learning_rate": 0.0009709620163294285, + "loss": 0.1327, + "step": 19740 + }, + { + "epoch": 12.62, + "learning_rate": 0.0009708910188143415, + "loss": 0.1349, + "step": 19750 + }, + { + "epoch": 12.63, + "learning_rate": 0.0009708200212992545, + "loss": 0.1645, + "step": 19760 + }, + { + "epoch": 12.63, + "learning_rate": 0.0009707490237841676, + "loss": 0.1315, + "step": 19770 + }, + { + "epoch": 12.64, + "learning_rate": 0.0009706780262690807, + "loss": 0.184, + "step": 19780 + }, + { + "epoch": 12.65, + "learning_rate": 0.0009706070287539936, + "loss": 0.1557, + "step": 19790 + }, + { + "epoch": 12.65, + "learning_rate": 0.0009705360312389067, + "loss": 0.1622, + "step": 19800 + }, + { + "epoch": 12.66, + "learning_rate": 0.0009704650337238196, + "loss": 0.1598, + "step": 19810 + }, + { + "epoch": 12.66, + "learning_rate": 0.0009703940362087327, + "loss": 0.1914, + "step": 19820 + }, + { + "epoch": 12.67, + "learning_rate": 0.0009703230386936457, + "loss": 0.1757, + "step": 19830 + }, + { + "epoch": 12.68, + "learning_rate": 0.0009702520411785588, + "loss": 0.1613, + "step": 19840 + }, + { + "epoch": 12.68, + "learning_rate": 0.0009701810436634718, + "loss": 0.175, + "step": 19850 + }, + { + "epoch": 12.69, + "learning_rate": 0.0009701100461483849, + "loss": 0.1778, + "step": 19860 + }, + { + "epoch": 12.7, + "learning_rate": 0.0009700390486332978, + "loss": 0.14, + "step": 19870 + }, + { + "epoch": 12.7, + "learning_rate": 0.0009699680511182109, + "loss": 0.148, + "step": 19880 + }, + { + "epoch": 12.71, + "learning_rate": 0.0009698970536031238, + "loss": 0.1899, + "step": 19890 + }, + { + "epoch": 12.72, + "learning_rate": 0.000969826056088037, + "loss": 0.143, + "step": 19900 + }, + { + "epoch": 12.72, + "learning_rate": 0.00096975505857295, + "loss": 0.1668, + "step": 19910 + }, + { + "epoch": 12.73, + "learning_rate": 0.000969684061057863, + "loss": 0.1607, + "step": 19920 + }, + { + "epoch": 12.73, + "learning_rate": 0.000969613063542776, + "loss": 0.1357, + "step": 19930 + }, + { + "epoch": 12.74, + "learning_rate": 0.0009695420660276891, + "loss": 0.18, + "step": 19940 + }, + { + "epoch": 12.75, + "learning_rate": 0.000969471068512602, + "loss": 0.1766, + "step": 19950 + }, + { + "epoch": 12.75, + "learning_rate": 0.0009694000709975152, + "loss": 0.1852, + "step": 19960 + }, + { + "epoch": 12.76, + "learning_rate": 0.0009693290734824282, + "loss": 0.1969, + "step": 19970 + }, + { + "epoch": 12.77, + "learning_rate": 0.0009692580759673412, + "loss": 0.1483, + "step": 19980 + }, + { + "epoch": 12.77, + "learning_rate": 0.0009691870784522542, + "loss": 0.1823, + "step": 19990 + }, + { + "epoch": 12.78, + "learning_rate": 0.0009691160809371671, + "loss": 0.1495, + "step": 20000 + }, + { + "epoch": 12.79, + "learning_rate": 0.0009690450834220802, + "loss": 0.144, + "step": 20010 + }, + { + "epoch": 12.79, + "learning_rate": 0.0009689740859069933, + "loss": 0.1633, + "step": 20020 + }, + { + "epoch": 12.8, + "learning_rate": 0.0009689030883919063, + "loss": 0.1736, + "step": 20030 + }, + { + "epoch": 12.8, + "learning_rate": 0.0009688320908768193, + "loss": 0.1848, + "step": 20040 + }, + { + "epoch": 12.81, + "learning_rate": 0.0009687610933617324, + "loss": 0.1701, + "step": 20050 + }, + { + "epoch": 12.82, + "learning_rate": 0.0009686900958466453, + "loss": 0.1647, + "step": 20060 + }, + { + "epoch": 12.82, + "learning_rate": 0.0009686190983315584, + "loss": 0.1801, + "step": 20070 + }, + { + "epoch": 12.83, + "learning_rate": 0.0009685481008164714, + "loss": 0.1595, + "step": 20080 + }, + { + "epoch": 12.84, + "learning_rate": 0.0009684771033013845, + "loss": 0.1618, + "step": 20090 + }, + { + "epoch": 12.84, + "learning_rate": 0.0009684061057862975, + "loss": 0.1559, + "step": 20100 + }, + { + "epoch": 12.85, + "learning_rate": 0.0009683351082712105, + "loss": 0.173, + "step": 20110 + }, + { + "epoch": 12.86, + "learning_rate": 0.0009682641107561235, + "loss": 0.15, + "step": 20120 + }, + { + "epoch": 12.86, + "learning_rate": 0.0009681931132410366, + "loss": 0.1707, + "step": 20130 + }, + { + "epoch": 12.87, + "learning_rate": 0.0009681221157259496, + "loss": 0.1403, + "step": 20140 + }, + { + "epoch": 12.88, + "learning_rate": 0.0009680511182108627, + "loss": 0.1787, + "step": 20150 + }, + { + "epoch": 12.88, + "learning_rate": 0.0009679801206957757, + "loss": 0.1708, + "step": 20160 + }, + { + "epoch": 12.89, + "learning_rate": 0.0009679091231806887, + "loss": 0.1595, + "step": 20170 + }, + { + "epoch": 12.89, + "learning_rate": 0.0009678381256656017, + "loss": 0.1536, + "step": 20180 + }, + { + "epoch": 12.9, + "learning_rate": 0.0009677671281505147, + "loss": 0.1783, + "step": 20190 + }, + { + "epoch": 12.91, + "learning_rate": 0.0009676961306354278, + "loss": 0.1694, + "step": 20200 + }, + { + "epoch": 12.91, + "learning_rate": 0.0009676251331203409, + "loss": 0.1593, + "step": 20210 + }, + { + "epoch": 12.92, + "learning_rate": 0.0009675541356052538, + "loss": 0.1378, + "step": 20220 + }, + { + "epoch": 12.93, + "learning_rate": 0.0009674831380901669, + "loss": 0.174, + "step": 20230 + }, + { + "epoch": 12.93, + "learning_rate": 0.0009674121405750799, + "loss": 0.1588, + "step": 20240 + }, + { + "epoch": 12.94, + "learning_rate": 0.0009673411430599929, + "loss": 0.1726, + "step": 20250 + }, + { + "epoch": 12.95, + "learning_rate": 0.000967270145544906, + "loss": 0.1696, + "step": 20260 + }, + { + "epoch": 12.95, + "learning_rate": 0.0009671991480298191, + "loss": 0.1529, + "step": 20270 + }, + { + "epoch": 12.96, + "learning_rate": 0.000967128150514732, + "loss": 0.1869, + "step": 20280 + }, + { + "epoch": 12.96, + "learning_rate": 0.0009670571529996451, + "loss": 0.1673, + "step": 20290 + }, + { + "epoch": 12.97, + "learning_rate": 0.000966986155484558, + "loss": 0.1682, + "step": 20300 + }, + { + "epoch": 12.98, + "learning_rate": 0.000966915157969471, + "loss": 0.1937, + "step": 20310 + }, + { + "epoch": 12.98, + "learning_rate": 0.0009668441604543842, + "loss": 0.1636, + "step": 20320 + }, + { + "epoch": 12.99, + "learning_rate": 0.0009667731629392971, + "loss": 0.1535, + "step": 20330 + }, + { + "epoch": 13.0, + "learning_rate": 0.0009667021654242102, + "loss": 0.1794, + "step": 20340 + }, + { + "epoch": 13.0, + "learning_rate": 0.0009666311679091232, + "loss": 0.1457, + "step": 20350 + }, + { + "epoch": 13.01, + "learning_rate": 0.0009665601703940362, + "loss": 0.168, + "step": 20360 + }, + { + "epoch": 13.02, + "learning_rate": 0.0009664891728789492, + "loss": 0.1501, + "step": 20370 + }, + { + "epoch": 13.02, + "learning_rate": 0.0009664181753638623, + "loss": 0.1401, + "step": 20380 + }, + { + "epoch": 13.03, + "learning_rate": 0.0009663471778487753, + "loss": 0.1438, + "step": 20390 + }, + { + "epoch": 13.04, + "learning_rate": 0.0009662761803336884, + "loss": 0.1199, + "step": 20400 + }, + { + "epoch": 13.04, + "learning_rate": 0.0009662051828186013, + "loss": 0.1534, + "step": 20410 + }, + { + "epoch": 13.05, + "learning_rate": 0.0009661341853035144, + "loss": 0.1386, + "step": 20420 + }, + { + "epoch": 13.05, + "learning_rate": 0.0009660631877884274, + "loss": 0.1598, + "step": 20430 + }, + { + "epoch": 13.06, + "learning_rate": 0.0009659921902733404, + "loss": 0.1612, + "step": 20440 + }, + { + "epoch": 13.07, + "learning_rate": 0.0009659211927582535, + "loss": 0.1319, + "step": 20450 + }, + { + "epoch": 13.07, + "learning_rate": 0.0009658501952431666, + "loss": 0.157, + "step": 20460 + }, + { + "epoch": 13.08, + "learning_rate": 0.0009657791977280795, + "loss": 0.1411, + "step": 20470 + }, + { + "epoch": 13.09, + "learning_rate": 0.0009657082002129926, + "loss": 0.1287, + "step": 20480 + }, + { + "epoch": 13.09, + "learning_rate": 0.0009656372026979055, + "loss": 0.1241, + "step": 20490 + }, + { + "epoch": 13.1, + "learning_rate": 0.0009655662051828186, + "loss": 0.168, + "step": 20500 + }, + { + "epoch": 13.11, + "learning_rate": 0.0009654952076677317, + "loss": 0.1583, + "step": 20510 + }, + { + "epoch": 13.11, + "learning_rate": 0.0009654242101526447, + "loss": 0.1267, + "step": 20520 + }, + { + "epoch": 13.12, + "learning_rate": 0.0009653532126375577, + "loss": 0.1357, + "step": 20530 + }, + { + "epoch": 13.12, + "learning_rate": 0.0009652822151224708, + "loss": 0.126, + "step": 20540 + }, + { + "epoch": 13.13, + "learning_rate": 0.0009652112176073837, + "loss": 0.1379, + "step": 20550 + }, + { + "epoch": 13.14, + "learning_rate": 0.0009651402200922968, + "loss": 0.1572, + "step": 20560 + }, + { + "epoch": 13.14, + "learning_rate": 0.0009650692225772098, + "loss": 0.1551, + "step": 20570 + }, + { + "epoch": 13.15, + "learning_rate": 0.0009649982250621229, + "loss": 0.1277, + "step": 20580 + }, + { + "epoch": 13.16, + "learning_rate": 0.0009649272275470359, + "loss": 0.1609, + "step": 20590 + }, + { + "epoch": 13.16, + "learning_rate": 0.0009648562300319489, + "loss": 0.1459, + "step": 20600 + }, + { + "epoch": 13.17, + "learning_rate": 0.0009647852325168619, + "loss": 0.1602, + "step": 20610 + }, + { + "epoch": 13.18, + "learning_rate": 0.0009647142350017749, + "loss": 0.1472, + "step": 20620 + }, + { + "epoch": 13.18, + "learning_rate": 0.000964643237486688, + "loss": 0.128, + "step": 20630 + }, + { + "epoch": 13.19, + "learning_rate": 0.000964572239971601, + "loss": 0.1444, + "step": 20640 + }, + { + "epoch": 13.19, + "learning_rate": 0.000964501242456514, + "loss": 0.1627, + "step": 20650 + }, + { + "epoch": 13.2, + "learning_rate": 0.000964430244941427, + "loss": 0.1418, + "step": 20660 + }, + { + "epoch": 13.21, + "learning_rate": 0.0009643592474263401, + "loss": 0.135, + "step": 20670 + }, + { + "epoch": 13.21, + "learning_rate": 0.000964288249911253, + "loss": 0.1337, + "step": 20680 + }, + { + "epoch": 13.22, + "learning_rate": 0.0009642172523961662, + "loss": 0.1353, + "step": 20690 + }, + { + "epoch": 13.23, + "learning_rate": 0.0009641462548810792, + "loss": 0.1586, + "step": 20700 + }, + { + "epoch": 13.23, + "learning_rate": 0.0009640752573659922, + "loss": 0.1386, + "step": 20710 + }, + { + "epoch": 13.24, + "learning_rate": 0.0009640042598509052, + "loss": 0.1369, + "step": 20720 + }, + { + "epoch": 13.25, + "learning_rate": 0.0009639332623358183, + "loss": 0.1357, + "step": 20730 + }, + { + "epoch": 13.25, + "learning_rate": 0.0009638622648207312, + "loss": 0.1472, + "step": 20740 + }, + { + "epoch": 13.26, + "learning_rate": 0.0009637912673056444, + "loss": 0.1533, + "step": 20750 + }, + { + "epoch": 13.27, + "learning_rate": 0.0009637202697905573, + "loss": 0.1584, + "step": 20760 + }, + { + "epoch": 13.27, + "learning_rate": 0.0009636492722754704, + "loss": 0.126, + "step": 20770 + }, + { + "epoch": 13.28, + "learning_rate": 0.0009635782747603834, + "loss": 0.1642, + "step": 20780 + }, + { + "epoch": 13.28, + "learning_rate": 0.0009635072772452964, + "loss": 0.1284, + "step": 20790 + }, + { + "epoch": 13.29, + "learning_rate": 0.0009634362797302094, + "loss": 0.1457, + "step": 20800 + }, + { + "epoch": 13.3, + "learning_rate": 0.0009633652822151226, + "loss": 0.1481, + "step": 20810 + }, + { + "epoch": 13.3, + "learning_rate": 0.0009632942847000355, + "loss": 0.1639, + "step": 20820 + }, + { + "epoch": 13.31, + "learning_rate": 0.0009632232871849486, + "loss": 0.1459, + "step": 20830 + }, + { + "epoch": 13.32, + "learning_rate": 0.0009631522896698616, + "loss": 0.1742, + "step": 20840 + }, + { + "epoch": 13.32, + "learning_rate": 0.0009630812921547746, + "loss": 0.1388, + "step": 20850 + }, + { + "epoch": 13.33, + "learning_rate": 0.0009630102946396876, + "loss": 0.1769, + "step": 20860 + }, + { + "epoch": 13.34, + "learning_rate": 0.0009629392971246008, + "loss": 0.1517, + "step": 20870 + }, + { + "epoch": 13.34, + "learning_rate": 0.0009628682996095137, + "loss": 0.1463, + "step": 20880 + }, + { + "epoch": 13.35, + "learning_rate": 0.0009627973020944268, + "loss": 0.1135, + "step": 20890 + }, + { + "epoch": 13.35, + "learning_rate": 0.0009627263045793397, + "loss": 0.1543, + "step": 20900 + }, + { + "epoch": 13.36, + "learning_rate": 0.0009626553070642528, + "loss": 0.1598, + "step": 20910 + }, + { + "epoch": 13.37, + "learning_rate": 0.0009625843095491658, + "loss": 0.1745, + "step": 20920 + }, + { + "epoch": 13.37, + "learning_rate": 0.0009625133120340789, + "loss": 0.1595, + "step": 20930 + }, + { + "epoch": 13.38, + "learning_rate": 0.0009624423145189919, + "loss": 0.1329, + "step": 20940 + }, + { + "epoch": 13.39, + "learning_rate": 0.0009623713170039048, + "loss": 0.1612, + "step": 20950 + }, + { + "epoch": 13.39, + "learning_rate": 0.0009623003194888179, + "loss": 0.1528, + "step": 20960 + }, + { + "epoch": 13.4, + "learning_rate": 0.0009622293219737309, + "loss": 0.1518, + "step": 20970 + }, + { + "epoch": 13.41, + "learning_rate": 0.0009621583244586439, + "loss": 0.1635, + "step": 20980 + }, + { + "epoch": 13.41, + "learning_rate": 0.000962087326943557, + "loss": 0.1446, + "step": 20990 + }, + { + "epoch": 13.42, + "learning_rate": 0.0009620163294284701, + "loss": 0.1471, + "step": 21000 + }, + { + "epoch": 13.42, + "learning_rate": 0.000961945331913383, + "loss": 0.1476, + "step": 21010 + }, + { + "epoch": 13.43, + "learning_rate": 0.0009618743343982961, + "loss": 0.1907, + "step": 21020 + }, + { + "epoch": 13.44, + "learning_rate": 0.000961803336883209, + "loss": 0.1495, + "step": 21030 + }, + { + "epoch": 13.44, + "learning_rate": 0.0009617323393681221, + "loss": 0.1817, + "step": 21040 + }, + { + "epoch": 13.45, + "learning_rate": 0.0009616613418530351, + "loss": 0.1571, + "step": 21050 + }, + { + "epoch": 13.46, + "learning_rate": 0.0009615903443379483, + "loss": 0.1746, + "step": 21060 + }, + { + "epoch": 13.46, + "learning_rate": 0.0009615193468228612, + "loss": 0.1451, + "step": 21070 + }, + { + "epoch": 13.47, + "learning_rate": 0.0009614483493077743, + "loss": 0.1545, + "step": 21080 + }, + { + "epoch": 13.48, + "learning_rate": 0.0009613773517926872, + "loss": 0.148, + "step": 21090 + }, + { + "epoch": 13.48, + "learning_rate": 0.0009613063542776003, + "loss": 0.1368, + "step": 21100 + }, + { + "epoch": 13.49, + "learning_rate": 0.0009612353567625133, + "loss": 0.1401, + "step": 21110 + }, + { + "epoch": 13.5, + "learning_rate": 0.0009611643592474264, + "loss": 0.1375, + "step": 21120 + }, + { + "epoch": 13.5, + "learning_rate": 0.0009610933617323394, + "loss": 0.1525, + "step": 21130 + }, + { + "epoch": 13.51, + "learning_rate": 0.0009610223642172525, + "loss": 0.1509, + "step": 21140 + }, + { + "epoch": 13.51, + "learning_rate": 0.0009609513667021654, + "loss": 0.1531, + "step": 21150 + }, + { + "epoch": 13.52, + "learning_rate": 0.0009608803691870785, + "loss": 0.1096, + "step": 21160 + }, + { + "epoch": 13.53, + "learning_rate": 0.0009608093716719914, + "loss": 0.1678, + "step": 21170 + }, + { + "epoch": 13.53, + "learning_rate": 0.0009607383741569046, + "loss": 0.1366, + "step": 21180 + }, + { + "epoch": 13.54, + "learning_rate": 0.0009606673766418176, + "loss": 0.1431, + "step": 21190 + }, + { + "epoch": 13.55, + "learning_rate": 0.0009605963791267306, + "loss": 0.1709, + "step": 21200 + }, + { + "epoch": 13.55, + "learning_rate": 0.0009605253816116436, + "loss": 0.1812, + "step": 21210 + }, + { + "epoch": 13.56, + "learning_rate": 0.0009604543840965567, + "loss": 0.1601, + "step": 21220 + }, + { + "epoch": 13.57, + "learning_rate": 0.0009603833865814696, + "loss": 0.1491, + "step": 21230 + }, + { + "epoch": 13.57, + "learning_rate": 0.0009603123890663828, + "loss": 0.128, + "step": 21240 + }, + { + "epoch": 13.58, + "learning_rate": 0.0009602413915512958, + "loss": 0.1746, + "step": 21250 + }, + { + "epoch": 13.58, + "learning_rate": 0.0009601703940362087, + "loss": 0.1417, + "step": 21260 + }, + { + "epoch": 13.59, + "learning_rate": 0.0009600993965211218, + "loss": 0.1661, + "step": 21270 + }, + { + "epoch": 13.6, + "learning_rate": 0.0009600283990060347, + "loss": 0.1624, + "step": 21280 + }, + { + "epoch": 13.6, + "learning_rate": 0.0009599574014909478, + "loss": 0.1768, + "step": 21290 + }, + { + "epoch": 13.61, + "learning_rate": 0.0009598864039758609, + "loss": 0.1553, + "step": 21300 + }, + { + "epoch": 13.62, + "learning_rate": 0.0009598154064607739, + "loss": 0.1604, + "step": 21310 + }, + { + "epoch": 13.62, + "learning_rate": 0.0009597444089456869, + "loss": 0.1621, + "step": 21320 + }, + { + "epoch": 13.63, + "learning_rate": 0.0009596734114306, + "loss": 0.1551, + "step": 21330 + }, + { + "epoch": 13.64, + "learning_rate": 0.0009596024139155129, + "loss": 0.1578, + "step": 21340 + }, + { + "epoch": 13.64, + "learning_rate": 0.000959531416400426, + "loss": 0.1668, + "step": 21350 + }, + { + "epoch": 13.65, + "learning_rate": 0.000959460418885339, + "loss": 0.1563, + "step": 21360 + }, + { + "epoch": 13.65, + "learning_rate": 0.0009593894213702521, + "loss": 0.1717, + "step": 21370 + }, + { + "epoch": 13.66, + "learning_rate": 0.0009593184238551651, + "loss": 0.1706, + "step": 21380 + }, + { + "epoch": 13.67, + "learning_rate": 0.0009592474263400781, + "loss": 0.1705, + "step": 21390 + }, + { + "epoch": 13.67, + "learning_rate": 0.0009591764288249911, + "loss": 0.1619, + "step": 21400 + }, + { + "epoch": 13.68, + "learning_rate": 0.0009591054313099042, + "loss": 0.1597, + "step": 21410 + }, + { + "epoch": 13.69, + "learning_rate": 0.0009590344337948172, + "loss": 0.1756, + "step": 21420 + }, + { + "epoch": 13.69, + "learning_rate": 0.0009589634362797303, + "loss": 0.1706, + "step": 21430 + }, + { + "epoch": 13.7, + "learning_rate": 0.0009588924387646433, + "loss": 0.1657, + "step": 21440 + }, + { + "epoch": 13.71, + "learning_rate": 0.0009588214412495563, + "loss": 0.1659, + "step": 21450 + }, + { + "epoch": 13.71, + "learning_rate": 0.0009587504437344693, + "loss": 0.1977, + "step": 21460 + }, + { + "epoch": 13.72, + "learning_rate": 0.0009586794462193823, + "loss": 0.1391, + "step": 21470 + }, + { + "epoch": 13.73, + "learning_rate": 0.0009586084487042954, + "loss": 0.1594, + "step": 21480 + }, + { + "epoch": 13.73, + "learning_rate": 0.0009585374511892085, + "loss": 0.1668, + "step": 21490 + }, + { + "epoch": 13.74, + "learning_rate": 0.0009584664536741214, + "loss": 0.1544, + "step": 21500 + }, + { + "epoch": 13.74, + "learning_rate": 0.0009583954561590345, + "loss": 0.1808, + "step": 21510 + }, + { + "epoch": 13.75, + "learning_rate": 0.0009583244586439475, + "loss": 0.1779, + "step": 21520 + }, + { + "epoch": 13.76, + "learning_rate": 0.0009582534611288605, + "loss": 0.1392, + "step": 21530 + }, + { + "epoch": 13.76, + "learning_rate": 0.0009581824636137736, + "loss": 0.165, + "step": 21540 + }, + { + "epoch": 13.77, + "learning_rate": 0.0009581114660986867, + "loss": 0.1705, + "step": 21550 + }, + { + "epoch": 13.78, + "learning_rate": 0.0009580404685835996, + "loss": 0.1484, + "step": 21560 + }, + { + "epoch": 13.78, + "learning_rate": 0.0009579694710685126, + "loss": 0.1389, + "step": 21570 + }, + { + "epoch": 13.79, + "learning_rate": 0.0009578984735534256, + "loss": 0.1566, + "step": 21580 + }, + { + "epoch": 13.8, + "learning_rate": 0.0009578274760383386, + "loss": 0.133, + "step": 21590 + }, + { + "epoch": 13.8, + "learning_rate": 0.0009577564785232517, + "loss": 0.1441, + "step": 21600 + }, + { + "epoch": 13.81, + "learning_rate": 0.0009576854810081647, + "loss": 0.1644, + "step": 21610 + }, + { + "epoch": 13.81, + "learning_rate": 0.0009576144834930778, + "loss": 0.1805, + "step": 21620 + }, + { + "epoch": 13.82, + "learning_rate": 0.0009575434859779907, + "loss": 0.1491, + "step": 21630 + }, + { + "epoch": 13.83, + "learning_rate": 0.0009574724884629038, + "loss": 0.1472, + "step": 21640 + }, + { + "epoch": 13.83, + "learning_rate": 0.0009574014909478168, + "loss": 0.173, + "step": 21650 + }, + { + "epoch": 13.84, + "learning_rate": 0.0009573304934327298, + "loss": 0.1336, + "step": 21660 + }, + { + "epoch": 13.85, + "learning_rate": 0.0009572594959176429, + "loss": 0.1522, + "step": 21670 + }, + { + "epoch": 13.85, + "learning_rate": 0.000957188498402556, + "loss": 0.1586, + "step": 21680 + }, + { + "epoch": 13.86, + "learning_rate": 0.0009571175008874689, + "loss": 0.1364, + "step": 21690 + }, + { + "epoch": 13.87, + "learning_rate": 0.000957046503372382, + "loss": 0.1443, + "step": 21700 + }, + { + "epoch": 13.87, + "learning_rate": 0.000956975505857295, + "loss": 0.182, + "step": 21710 + }, + { + "epoch": 13.88, + "learning_rate": 0.000956904508342208, + "loss": 0.1481, + "step": 21720 + }, + { + "epoch": 13.88, + "learning_rate": 0.0009568335108271211, + "loss": 0.1608, + "step": 21730 + }, + { + "epoch": 13.89, + "learning_rate": 0.0009567625133120342, + "loss": 0.1596, + "step": 21740 + }, + { + "epoch": 13.9, + "learning_rate": 0.0009566915157969471, + "loss": 0.1546, + "step": 21750 + }, + { + "epoch": 13.9, + "learning_rate": 0.0009566205182818602, + "loss": 0.1417, + "step": 21760 + }, + { + "epoch": 13.91, + "learning_rate": 0.0009565495207667731, + "loss": 0.1342, + "step": 21770 + }, + { + "epoch": 13.92, + "learning_rate": 0.0009564785232516862, + "loss": 0.1615, + "step": 21780 + }, + { + "epoch": 13.92, + "learning_rate": 0.0009564075257365993, + "loss": 0.1452, + "step": 21790 + }, + { + "epoch": 13.93, + "learning_rate": 0.0009563365282215123, + "loss": 0.1705, + "step": 21800 + }, + { + "epoch": 13.94, + "learning_rate": 0.0009562655307064253, + "loss": 0.1404, + "step": 21810 + }, + { + "epoch": 13.94, + "learning_rate": 0.0009561945331913384, + "loss": 0.141, + "step": 21820 + }, + { + "epoch": 13.95, + "learning_rate": 0.0009561235356762513, + "loss": 0.1642, + "step": 21830 + }, + { + "epoch": 13.95, + "learning_rate": 0.0009560525381611644, + "loss": 0.1654, + "step": 21840 + }, + { + "epoch": 13.96, + "learning_rate": 0.0009559815406460774, + "loss": 0.1722, + "step": 21850 + }, + { + "epoch": 13.97, + "learning_rate": 0.0009559105431309905, + "loss": 0.1509, + "step": 21860 + }, + { + "epoch": 13.97, + "learning_rate": 0.0009558395456159035, + "loss": 0.1557, + "step": 21870 + }, + { + "epoch": 13.98, + "learning_rate": 0.0009557685481008164, + "loss": 0.1645, + "step": 21880 + }, + { + "epoch": 13.99, + "learning_rate": 0.0009556975505857295, + "loss": 0.1729, + "step": 21890 + }, + { + "epoch": 13.99, + "learning_rate": 0.0009556265530706425, + "loss": 0.1471, + "step": 21900 + }, + { + "epoch": 14.0, + "learning_rate": 0.0009555555555555556, + "loss": 0.1493, + "step": 21910 + }, + { + "epoch": 14.01, + "learning_rate": 0.0009554845580404686, + "loss": 0.1662, + "step": 21920 + }, + { + "epoch": 14.01, + "learning_rate": 0.0009554135605253817, + "loss": 0.1034, + "step": 21930 + }, + { + "epoch": 14.02, + "learning_rate": 0.0009553425630102946, + "loss": 0.1537, + "step": 21940 + }, + { + "epoch": 14.03, + "learning_rate": 0.0009552715654952077, + "loss": 0.1495, + "step": 21950 + }, + { + "epoch": 14.03, + "learning_rate": 0.0009552005679801206, + "loss": 0.1488, + "step": 21960 + }, + { + "epoch": 14.04, + "learning_rate": 0.0009551295704650338, + "loss": 0.1242, + "step": 21970 + }, + { + "epoch": 14.04, + "learning_rate": 0.0009550585729499468, + "loss": 0.1232, + "step": 21980 + }, + { + "epoch": 14.05, + "learning_rate": 0.0009549875754348598, + "loss": 0.126, + "step": 21990 + }, + { + "epoch": 14.06, + "learning_rate": 0.0009549165779197728, + "loss": 0.1493, + "step": 22000 + }, + { + "epoch": 14.06, + "learning_rate": 0.0009548455804046859, + "loss": 0.145, + "step": 22010 + }, + { + "epoch": 14.07, + "learning_rate": 0.0009547745828895988, + "loss": 0.1363, + "step": 22020 + }, + { + "epoch": 14.08, + "learning_rate": 0.000954703585374512, + "loss": 0.1487, + "step": 22030 + }, + { + "epoch": 14.08, + "learning_rate": 0.000954632587859425, + "loss": 0.15, + "step": 22040 + }, + { + "epoch": 14.09, + "learning_rate": 0.000954561590344338, + "loss": 0.1205, + "step": 22050 + }, + { + "epoch": 14.1, + "learning_rate": 0.000954490592829251, + "loss": 0.1431, + "step": 22060 + }, + { + "epoch": 14.1, + "learning_rate": 0.000954419595314164, + "loss": 0.1134, + "step": 22070 + }, + { + "epoch": 14.11, + "learning_rate": 0.000954348597799077, + "loss": 0.1431, + "step": 22080 + }, + { + "epoch": 14.11, + "learning_rate": 0.0009542776002839902, + "loss": 0.1484, + "step": 22090 + }, + { + "epoch": 14.12, + "learning_rate": 0.0009542066027689031, + "loss": 0.1435, + "step": 22100 + }, + { + "epoch": 14.13, + "learning_rate": 0.0009541356052538162, + "loss": 0.1595, + "step": 22110 + }, + { + "epoch": 14.13, + "learning_rate": 0.0009540646077387292, + "loss": 0.1407, + "step": 22120 + }, + { + "epoch": 14.14, + "learning_rate": 0.0009539936102236422, + "loss": 0.1514, + "step": 22130 + }, + { + "epoch": 14.15, + "learning_rate": 0.0009539226127085552, + "loss": 0.1391, + "step": 22140 + }, + { + "epoch": 14.15, + "learning_rate": 0.0009538516151934684, + "loss": 0.1266, + "step": 22150 + }, + { + "epoch": 14.16, + "learning_rate": 0.0009537806176783813, + "loss": 0.1598, + "step": 22160 + }, + { + "epoch": 14.17, + "learning_rate": 0.0009537096201632944, + "loss": 0.1071, + "step": 22170 + }, + { + "epoch": 14.17, + "learning_rate": 0.0009536386226482073, + "loss": 0.1529, + "step": 22180 + }, + { + "epoch": 14.18, + "learning_rate": 0.0009535676251331203, + "loss": 0.1626, + "step": 22190 + }, + { + "epoch": 14.19, + "learning_rate": 0.0009534966276180334, + "loss": 0.0999, + "step": 22200 + }, + { + "epoch": 14.19, + "learning_rate": 0.0009534256301029463, + "loss": 0.1244, + "step": 22210 + }, + { + "epoch": 14.2, + "learning_rate": 0.0009533546325878595, + "loss": 0.1518, + "step": 22220 + }, + { + "epoch": 14.2, + "learning_rate": 0.0009532836350727724, + "loss": 0.1589, + "step": 22230 + }, + { + "epoch": 14.21, + "learning_rate": 0.0009532126375576855, + "loss": 0.128, + "step": 22240 + }, + { + "epoch": 14.22, + "learning_rate": 0.0009531416400425985, + "loss": 0.155, + "step": 22250 + }, + { + "epoch": 14.22, + "learning_rate": 0.0009530706425275115, + "loss": 0.1533, + "step": 22260 + }, + { + "epoch": 14.23, + "learning_rate": 0.0009529996450124245, + "loss": 0.1312, + "step": 22270 + }, + { + "epoch": 14.24, + "learning_rate": 0.0009529286474973377, + "loss": 0.1448, + "step": 22280 + }, + { + "epoch": 14.24, + "learning_rate": 0.0009528576499822506, + "loss": 0.1474, + "step": 22290 + }, + { + "epoch": 14.25, + "learning_rate": 0.0009527866524671637, + "loss": 0.1018, + "step": 22300 + }, + { + "epoch": 14.26, + "learning_rate": 0.0009527156549520767, + "loss": 0.1606, + "step": 22310 + }, + { + "epoch": 14.26, + "learning_rate": 0.0009526446574369897, + "loss": 0.1233, + "step": 22320 + }, + { + "epoch": 14.27, + "learning_rate": 0.0009525736599219027, + "loss": 0.1554, + "step": 22330 + }, + { + "epoch": 14.27, + "learning_rate": 0.0009525026624068159, + "loss": 0.1245, + "step": 22340 + }, + { + "epoch": 14.28, + "learning_rate": 0.0009524316648917288, + "loss": 0.1178, + "step": 22350 + }, + { + "epoch": 14.29, + "learning_rate": 0.0009523606673766419, + "loss": 0.1328, + "step": 22360 + }, + { + "epoch": 14.29, + "learning_rate": 0.0009522896698615548, + "loss": 0.1562, + "step": 22370 + }, + { + "epoch": 14.3, + "learning_rate": 0.0009522186723464679, + "loss": 0.1414, + "step": 22380 + }, + { + "epoch": 14.31, + "learning_rate": 0.0009521476748313809, + "loss": 0.1537, + "step": 22390 + }, + { + "epoch": 14.31, + "learning_rate": 0.000952076677316294, + "loss": 0.1343, + "step": 22400 + }, + { + "epoch": 14.32, + "learning_rate": 0.000952005679801207, + "loss": 0.1536, + "step": 22410 + }, + { + "epoch": 14.33, + "learning_rate": 0.0009519346822861201, + "loss": 0.1516, + "step": 22420 + }, + { + "epoch": 14.33, + "learning_rate": 0.000951863684771033, + "loss": 0.1512, + "step": 22430 + }, + { + "epoch": 14.34, + "learning_rate": 0.0009517926872559461, + "loss": 0.1475, + "step": 22440 + }, + { + "epoch": 14.34, + "learning_rate": 0.000951721689740859, + "loss": 0.1431, + "step": 22450 + }, + { + "epoch": 14.35, + "learning_rate": 0.0009516506922257722, + "loss": 0.1415, + "step": 22460 + }, + { + "epoch": 14.36, + "learning_rate": 0.0009515796947106852, + "loss": 0.1334, + "step": 22470 + }, + { + "epoch": 14.36, + "learning_rate": 0.0009515086971955982, + "loss": 0.1613, + "step": 22480 + }, + { + "epoch": 14.37, + "learning_rate": 0.0009514376996805112, + "loss": 0.1442, + "step": 22490 + }, + { + "epoch": 14.38, + "learning_rate": 0.0009513667021654242, + "loss": 0.1368, + "step": 22500 + }, + { + "epoch": 14.38, + "learning_rate": 0.0009512957046503372, + "loss": 0.1551, + "step": 22510 + }, + { + "epoch": 14.39, + "learning_rate": 0.0009512247071352503, + "loss": 0.1346, + "step": 22520 + }, + { + "epoch": 14.4, + "learning_rate": 0.0009511537096201633, + "loss": 0.1497, + "step": 22530 + }, + { + "epoch": 14.4, + "learning_rate": 0.0009510827121050763, + "loss": 0.1537, + "step": 22540 + }, + { + "epoch": 14.41, + "learning_rate": 0.0009510117145899894, + "loss": 0.1407, + "step": 22550 + }, + { + "epoch": 14.42, + "learning_rate": 0.0009509407170749023, + "loss": 0.1513, + "step": 22560 + }, + { + "epoch": 14.42, + "learning_rate": 0.0009508697195598154, + "loss": 0.1479, + "step": 22570 + }, + { + "epoch": 14.43, + "learning_rate": 0.0009507987220447285, + "loss": 0.1532, + "step": 22580 + }, + { + "epoch": 14.43, + "learning_rate": 0.0009507277245296415, + "loss": 0.1189, + "step": 22590 + }, + { + "epoch": 14.44, + "learning_rate": 0.0009506567270145545, + "loss": 0.1349, + "step": 22600 + }, + { + "epoch": 14.45, + "learning_rate": 0.0009505857294994676, + "loss": 0.1508, + "step": 22610 + }, + { + "epoch": 14.45, + "learning_rate": 0.0009505147319843805, + "loss": 0.1711, + "step": 22620 + }, + { + "epoch": 14.46, + "learning_rate": 0.0009504437344692936, + "loss": 0.1591, + "step": 22630 + }, + { + "epoch": 14.47, + "learning_rate": 0.0009503727369542066, + "loss": 0.1572, + "step": 22640 + }, + { + "epoch": 14.47, + "learning_rate": 0.0009503017394391197, + "loss": 0.1153, + "step": 22650 + }, + { + "epoch": 14.48, + "learning_rate": 0.0009502307419240327, + "loss": 0.1261, + "step": 22660 + }, + { + "epoch": 14.49, + "learning_rate": 0.0009501597444089457, + "loss": 0.1392, + "step": 22670 + }, + { + "epoch": 14.49, + "learning_rate": 0.0009500887468938587, + "loss": 0.1275, + "step": 22680 + }, + { + "epoch": 14.5, + "learning_rate": 0.0009500177493787718, + "loss": 0.1472, + "step": 22690 + }, + { + "epoch": 14.5, + "learning_rate": 0.0009499467518636848, + "loss": 0.1568, + "step": 22700 + }, + { + "epoch": 14.51, + "learning_rate": 0.0009498757543485979, + "loss": 0.1386, + "step": 22710 + }, + { + "epoch": 14.52, + "learning_rate": 0.0009498047568335108, + "loss": 0.1265, + "step": 22720 + }, + { + "epoch": 14.52, + "learning_rate": 0.0009497337593184239, + "loss": 0.1428, + "step": 22730 + }, + { + "epoch": 14.53, + "learning_rate": 0.0009496627618033369, + "loss": 0.175, + "step": 22740 + }, + { + "epoch": 14.54, + "learning_rate": 0.0009495917642882499, + "loss": 0.1614, + "step": 22750 + }, + { + "epoch": 14.54, + "learning_rate": 0.000949520766773163, + "loss": 0.1728, + "step": 22760 + }, + { + "epoch": 14.55, + "learning_rate": 0.0009494497692580761, + "loss": 0.1589, + "step": 22770 + }, + { + "epoch": 14.56, + "learning_rate": 0.000949378771742989, + "loss": 0.1652, + "step": 22780 + }, + { + "epoch": 14.56, + "learning_rate": 0.0009493077742279021, + "loss": 0.1545, + "step": 22790 + }, + { + "epoch": 14.57, + "learning_rate": 0.000949236776712815, + "loss": 0.1602, + "step": 22800 + }, + { + "epoch": 14.57, + "learning_rate": 0.000949165779197728, + "loss": 0.1615, + "step": 22810 + }, + { + "epoch": 14.58, + "learning_rate": 0.0009490947816826411, + "loss": 0.167, + "step": 22820 + }, + { + "epoch": 14.59, + "learning_rate": 0.0009490237841675541, + "loss": 0.145, + "step": 22830 + }, + { + "epoch": 14.59, + "learning_rate": 0.0009489527866524672, + "loss": 0.15, + "step": 22840 + }, + { + "epoch": 14.6, + "learning_rate": 0.0009488817891373802, + "loss": 0.141, + "step": 22850 + }, + { + "epoch": 14.61, + "learning_rate": 0.0009488107916222932, + "loss": 0.1568, + "step": 22860 + }, + { + "epoch": 14.61, + "learning_rate": 0.0009487397941072062, + "loss": 0.1405, + "step": 22870 + }, + { + "epoch": 14.62, + "learning_rate": 0.0009486687965921193, + "loss": 0.1496, + "step": 22880 + }, + { + "epoch": 14.63, + "learning_rate": 0.0009485977990770323, + "loss": 0.1532, + "step": 22890 + }, + { + "epoch": 14.63, + "learning_rate": 0.0009485268015619454, + "loss": 0.1574, + "step": 22900 + }, + { + "epoch": 14.64, + "learning_rate": 0.0009484558040468583, + "loss": 0.144, + "step": 22910 + }, + { + "epoch": 14.65, + "learning_rate": 0.0009483848065317714, + "loss": 0.1783, + "step": 22920 + }, + { + "epoch": 14.65, + "learning_rate": 0.0009483138090166844, + "loss": 0.1661, + "step": 22930 + }, + { + "epoch": 14.66, + "learning_rate": 0.0009482428115015974, + "loss": 0.142, + "step": 22940 + }, + { + "epoch": 14.66, + "learning_rate": 0.0009481718139865105, + "loss": 0.1407, + "step": 22950 + }, + { + "epoch": 14.67, + "learning_rate": 0.0009481008164714236, + "loss": 0.146, + "step": 22960 + }, + { + "epoch": 14.68, + "learning_rate": 0.0009480298189563365, + "loss": 0.1423, + "step": 22970 + }, + { + "epoch": 14.68, + "learning_rate": 0.0009479588214412496, + "loss": 0.1735, + "step": 22980 + }, + { + "epoch": 14.69, + "learning_rate": 0.0009478878239261626, + "loss": 0.1355, + "step": 22990 + }, + { + "epoch": 14.7, + "learning_rate": 0.0009478168264110756, + "loss": 0.1344, + "step": 23000 + }, + { + "epoch": 14.7, + "learning_rate": 0.0009477458288959887, + "loss": 0.1427, + "step": 23010 + }, + { + "epoch": 14.71, + "learning_rate": 0.0009476748313809018, + "loss": 0.1657, + "step": 23020 + }, + { + "epoch": 14.72, + "learning_rate": 0.0009476038338658147, + "loss": 0.1527, + "step": 23030 + }, + { + "epoch": 14.72, + "learning_rate": 0.0009475328363507278, + "loss": 0.1503, + "step": 23040 + }, + { + "epoch": 14.73, + "learning_rate": 0.0009474618388356407, + "loss": 0.1619, + "step": 23050 + }, + { + "epoch": 14.73, + "learning_rate": 0.0009473908413205538, + "loss": 0.1645, + "step": 23060 + }, + { + "epoch": 14.74, + "learning_rate": 0.0009473198438054669, + "loss": 0.1143, + "step": 23070 + }, + { + "epoch": 14.75, + "learning_rate": 0.0009472488462903799, + "loss": 0.1702, + "step": 23080 + }, + { + "epoch": 14.75, + "learning_rate": 0.0009471778487752929, + "loss": 0.1313, + "step": 23090 + }, + { + "epoch": 14.76, + "learning_rate": 0.000947106851260206, + "loss": 0.1716, + "step": 23100 + }, + { + "epoch": 14.77, + "learning_rate": 0.0009470358537451189, + "loss": 0.1421, + "step": 23110 + }, + { + "epoch": 14.77, + "learning_rate": 0.0009469648562300319, + "loss": 0.1407, + "step": 23120 + }, + { + "epoch": 14.78, + "learning_rate": 0.000946893858714945, + "loss": 0.1345, + "step": 23130 + }, + { + "epoch": 14.79, + "learning_rate": 0.000946822861199858, + "loss": 0.162, + "step": 23140 + }, + { + "epoch": 14.79, + "learning_rate": 0.0009467518636847711, + "loss": 0.1601, + "step": 23150 + }, + { + "epoch": 14.8, + "learning_rate": 0.000946680866169684, + "loss": 0.1415, + "step": 23160 + }, + { + "epoch": 14.8, + "learning_rate": 0.0009466098686545971, + "loss": 0.182, + "step": 23170 + }, + { + "epoch": 14.81, + "learning_rate": 0.00094653887113951, + "loss": 0.1519, + "step": 23180 + }, + { + "epoch": 14.82, + "learning_rate": 0.0009464678736244232, + "loss": 0.1856, + "step": 23190 + }, + { + "epoch": 14.82, + "learning_rate": 0.0009463968761093362, + "loss": 0.1641, + "step": 23200 + }, + { + "epoch": 14.83, + "learning_rate": 0.0009463258785942493, + "loss": 0.1401, + "step": 23210 + }, + { + "epoch": 14.84, + "learning_rate": 0.0009462548810791622, + "loss": 0.1536, + "step": 23220 + }, + { + "epoch": 14.84, + "learning_rate": 0.0009461838835640753, + "loss": 0.1346, + "step": 23230 + }, + { + "epoch": 14.85, + "learning_rate": 0.0009461128860489882, + "loss": 0.1536, + "step": 23240 + }, + { + "epoch": 14.86, + "learning_rate": 0.0009460418885339014, + "loss": 0.1416, + "step": 23250 + }, + { + "epoch": 14.86, + "learning_rate": 0.0009459708910188144, + "loss": 0.1659, + "step": 23260 + }, + { + "epoch": 14.87, + "learning_rate": 0.0009458998935037274, + "loss": 0.1343, + "step": 23270 + }, + { + "epoch": 14.88, + "learning_rate": 0.0009458288959886404, + "loss": 0.1592, + "step": 23280 + }, + { + "epoch": 14.88, + "learning_rate": 0.0009457578984735535, + "loss": 0.1342, + "step": 23290 + }, + { + "epoch": 14.89, + "learning_rate": 0.0009456869009584664, + "loss": 0.1514, + "step": 23300 + }, + { + "epoch": 14.89, + "learning_rate": 0.0009456159034433796, + "loss": 0.1519, + "step": 23310 + }, + { + "epoch": 14.9, + "learning_rate": 0.0009455449059282925, + "loss": 0.1524, + "step": 23320 + }, + { + "epoch": 14.91, + "learning_rate": 0.0009454739084132056, + "loss": 0.1608, + "step": 23330 + }, + { + "epoch": 14.91, + "learning_rate": 0.0009454029108981186, + "loss": 0.1324, + "step": 23340 + }, + { + "epoch": 14.92, + "learning_rate": 0.0009453319133830316, + "loss": 0.1542, + "step": 23350 + }, + { + "epoch": 14.93, + "learning_rate": 0.0009452609158679446, + "loss": 0.1339, + "step": 23360 + }, + { + "epoch": 14.93, + "learning_rate": 0.0009451899183528578, + "loss": 0.1496, + "step": 23370 + }, + { + "epoch": 14.94, + "learning_rate": 0.0009451189208377707, + "loss": 0.1244, + "step": 23380 + }, + { + "epoch": 14.95, + "learning_rate": 0.0009450479233226838, + "loss": 0.1439, + "step": 23390 + }, + { + "epoch": 14.95, + "learning_rate": 0.0009449769258075968, + "loss": 0.1165, + "step": 23400 + }, + { + "epoch": 14.96, + "learning_rate": 0.0009449059282925098, + "loss": 0.1267, + "step": 23410 + }, + { + "epoch": 14.96, + "learning_rate": 0.0009448349307774228, + "loss": 0.1383, + "step": 23420 + }, + { + "epoch": 14.97, + "learning_rate": 0.0009447639332623357, + "loss": 0.1468, + "step": 23430 + }, + { + "epoch": 14.98, + "learning_rate": 0.0009446929357472489, + "loss": 0.1508, + "step": 23440 + }, + { + "epoch": 14.98, + "learning_rate": 0.0009446219382321619, + "loss": 0.1566, + "step": 23450 + }, + { + "epoch": 14.99, + "learning_rate": 0.0009445509407170749, + "loss": 0.1399, + "step": 23460 + }, + { + "epoch": 15.0, + "learning_rate": 0.0009444799432019879, + "loss": 0.1656, + "step": 23470 + }, + { + "epoch": 15.0, + "learning_rate": 0.000944408945686901, + "loss": 0.1503, + "step": 23480 + }, + { + "epoch": 15.01, + "learning_rate": 0.0009443379481718139, + "loss": 0.1385, + "step": 23490 + }, + { + "epoch": 15.02, + "learning_rate": 0.0009442669506567271, + "loss": 0.1249, + "step": 23500 + }, + { + "epoch": 15.02, + "learning_rate": 0.00094419595314164, + "loss": 0.1565, + "step": 23510 + }, + { + "epoch": 15.03, + "learning_rate": 0.0009441249556265531, + "loss": 0.1236, + "step": 23520 + }, + { + "epoch": 15.04, + "learning_rate": 0.0009440539581114661, + "loss": 0.1381, + "step": 23530 + }, + { + "epoch": 15.04, + "learning_rate": 0.0009439829605963791, + "loss": 0.1496, + "step": 23540 + }, + { + "epoch": 15.05, + "learning_rate": 0.0009439119630812921, + "loss": 0.1309, + "step": 23550 + }, + { + "epoch": 15.05, + "learning_rate": 0.0009438409655662053, + "loss": 0.1029, + "step": 23560 + }, + { + "epoch": 15.06, + "learning_rate": 0.0009437699680511182, + "loss": 0.1542, + "step": 23570 + }, + { + "epoch": 15.07, + "learning_rate": 0.0009436989705360313, + "loss": 0.1186, + "step": 23580 + }, + { + "epoch": 15.07, + "learning_rate": 0.0009436279730209442, + "loss": 0.1395, + "step": 23590 + }, + { + "epoch": 15.08, + "learning_rate": 0.0009435569755058573, + "loss": 0.1368, + "step": 23600 + }, + { + "epoch": 15.09, + "learning_rate": 0.0009434859779907703, + "loss": 0.1187, + "step": 23610 + }, + { + "epoch": 15.09, + "learning_rate": 0.0009434149804756834, + "loss": 0.1232, + "step": 23620 + }, + { + "epoch": 15.1, + "learning_rate": 0.0009433439829605964, + "loss": 0.1449, + "step": 23630 + }, + { + "epoch": 15.11, + "learning_rate": 0.0009432729854455095, + "loss": 0.1134, + "step": 23640 + }, + { + "epoch": 15.11, + "learning_rate": 0.0009432019879304224, + "loss": 0.1189, + "step": 23650 + }, + { + "epoch": 15.12, + "learning_rate": 0.0009431309904153355, + "loss": 0.1534, + "step": 23660 + }, + { + "epoch": 15.12, + "learning_rate": 0.0009430599929002485, + "loss": 0.1376, + "step": 23670 + }, + { + "epoch": 15.13, + "learning_rate": 0.0009429889953851616, + "loss": 0.1442, + "step": 23680 + }, + { + "epoch": 15.14, + "learning_rate": 0.0009429179978700746, + "loss": 0.1275, + "step": 23690 + }, + { + "epoch": 15.14, + "learning_rate": 0.0009428470003549877, + "loss": 0.1406, + "step": 23700 + }, + { + "epoch": 15.15, + "learning_rate": 0.0009427760028399006, + "loss": 0.1367, + "step": 23710 + }, + { + "epoch": 15.16, + "learning_rate": 0.0009427050053248137, + "loss": 0.149, + "step": 23720 + }, + { + "epoch": 15.16, + "learning_rate": 0.0009426340078097266, + "loss": 0.1376, + "step": 23730 + }, + { + "epoch": 15.17, + "learning_rate": 0.0009425630102946398, + "loss": 0.1431, + "step": 23740 + }, + { + "epoch": 15.18, + "learning_rate": 0.0009424920127795528, + "loss": 0.1326, + "step": 23750 + }, + { + "epoch": 15.18, + "learning_rate": 0.0009424210152644657, + "loss": 0.1056, + "step": 23760 + }, + { + "epoch": 15.19, + "learning_rate": 0.0009423500177493788, + "loss": 0.1506, + "step": 23770 + }, + { + "epoch": 15.19, + "learning_rate": 0.0009422790202342917, + "loss": 0.153, + "step": 23780 + }, + { + "epoch": 15.2, + "learning_rate": 0.0009422080227192048, + "loss": 0.1368, + "step": 23790 + }, + { + "epoch": 15.21, + "learning_rate": 0.0009421370252041179, + "loss": 0.1508, + "step": 23800 + }, + { + "epoch": 15.21, + "learning_rate": 0.000942066027689031, + "loss": 0.1398, + "step": 23810 + }, + { + "epoch": 15.22, + "learning_rate": 0.0009419950301739439, + "loss": 0.1396, + "step": 23820 + }, + { + "epoch": 15.23, + "learning_rate": 0.000941924032658857, + "loss": 0.1394, + "step": 23830 + }, + { + "epoch": 15.23, + "learning_rate": 0.0009418530351437699, + "loss": 0.1578, + "step": 23840 + }, + { + "epoch": 15.24, + "learning_rate": 0.000941782037628683, + "loss": 0.1626, + "step": 23850 + }, + { + "epoch": 15.25, + "learning_rate": 0.0009417110401135961, + "loss": 0.1428, + "step": 23860 + }, + { + "epoch": 15.25, + "learning_rate": 0.0009416400425985091, + "loss": 0.1478, + "step": 23870 + }, + { + "epoch": 15.26, + "learning_rate": 0.0009415690450834221, + "loss": 0.1482, + "step": 23880 + }, + { + "epoch": 15.27, + "learning_rate": 0.0009414980475683352, + "loss": 0.1329, + "step": 23890 + }, + { + "epoch": 15.27, + "learning_rate": 0.0009414270500532481, + "loss": 0.1287, + "step": 23900 + }, + { + "epoch": 15.28, + "learning_rate": 0.0009413560525381612, + "loss": 0.1443, + "step": 23910 + }, + { + "epoch": 15.28, + "learning_rate": 0.0009412850550230742, + "loss": 0.1398, + "step": 23920 + }, + { + "epoch": 15.29, + "learning_rate": 0.0009412140575079873, + "loss": 0.1211, + "step": 23930 + }, + { + "epoch": 15.3, + "learning_rate": 0.0009411430599929003, + "loss": 0.137, + "step": 23940 + }, + { + "epoch": 15.3, + "learning_rate": 0.0009410720624778133, + "loss": 0.1539, + "step": 23950 + }, + { + "epoch": 15.31, + "learning_rate": 0.0009410010649627263, + "loss": 0.1305, + "step": 23960 + }, + { + "epoch": 15.32, + "learning_rate": 0.0009409300674476394, + "loss": 0.1501, + "step": 23970 + }, + { + "epoch": 15.32, + "learning_rate": 0.0009408590699325523, + "loss": 0.1517, + "step": 23980 + }, + { + "epoch": 15.33, + "learning_rate": 0.0009407880724174655, + "loss": 0.1436, + "step": 23990 + }, + { + "epoch": 15.34, + "learning_rate": 0.0009407170749023784, + "loss": 0.1175, + "step": 24000 + }, + { + "epoch": 15.34, + "learning_rate": 0.0009406460773872915, + "loss": 0.1333, + "step": 24010 + }, + { + "epoch": 15.35, + "learning_rate": 0.0009405750798722045, + "loss": 0.1091, + "step": 24020 + }, + { + "epoch": 15.35, + "learning_rate": 0.0009405040823571175, + "loss": 0.143, + "step": 24030 + }, + { + "epoch": 15.36, + "learning_rate": 0.0009404330848420305, + "loss": 0.1213, + "step": 24040 + }, + { + "epoch": 15.37, + "learning_rate": 0.0009403620873269437, + "loss": 0.1295, + "step": 24050 + }, + { + "epoch": 15.37, + "learning_rate": 0.0009402910898118566, + "loss": 0.1312, + "step": 24060 + }, + { + "epoch": 15.38, + "learning_rate": 0.0009402200922967696, + "loss": 0.1402, + "step": 24070 + }, + { + "epoch": 15.39, + "learning_rate": 0.0009401490947816827, + "loss": 0.1299, + "step": 24080 + }, + { + "epoch": 15.39, + "learning_rate": 0.0009400780972665956, + "loss": 0.1399, + "step": 24090 + }, + { + "epoch": 15.4, + "learning_rate": 0.0009400070997515087, + "loss": 0.1339, + "step": 24100 + }, + { + "epoch": 15.41, + "learning_rate": 0.0009399361022364217, + "loss": 0.1145, + "step": 24110 + }, + { + "epoch": 15.41, + "learning_rate": 0.0009398651047213348, + "loss": 0.1567, + "step": 24120 + }, + { + "epoch": 15.42, + "learning_rate": 0.0009397941072062478, + "loss": 0.1406, + "step": 24130 + }, + { + "epoch": 15.42, + "learning_rate": 0.0009397231096911608, + "loss": 0.1522, + "step": 24140 + }, + { + "epoch": 15.43, + "learning_rate": 0.0009396521121760738, + "loss": 0.1274, + "step": 24150 + }, + { + "epoch": 15.44, + "learning_rate": 0.0009395811146609869, + "loss": 0.1309, + "step": 24160 + }, + { + "epoch": 15.44, + "learning_rate": 0.0009395101171458999, + "loss": 0.1333, + "step": 24170 + }, + { + "epoch": 15.45, + "learning_rate": 0.000939439119630813, + "loss": 0.1439, + "step": 24180 + }, + { + "epoch": 15.46, + "learning_rate": 0.000939368122115726, + "loss": 0.1288, + "step": 24190 + }, + { + "epoch": 15.46, + "learning_rate": 0.000939297124600639, + "loss": 0.1154, + "step": 24200 + }, + { + "epoch": 15.47, + "learning_rate": 0.000939226127085552, + "loss": 0.1208, + "step": 24210 + }, + { + "epoch": 15.48, + "learning_rate": 0.000939155129570465, + "loss": 0.1275, + "step": 24220 + }, + { + "epoch": 15.48, + "learning_rate": 0.0009390841320553781, + "loss": 0.136, + "step": 24230 + }, + { + "epoch": 15.49, + "learning_rate": 0.0009390131345402912, + "loss": 0.1182, + "step": 24240 + }, + { + "epoch": 15.5, + "learning_rate": 0.0009389421370252041, + "loss": 0.136, + "step": 24250 + }, + { + "epoch": 15.5, + "learning_rate": 0.0009388711395101172, + "loss": 0.1294, + "step": 24260 + }, + { + "epoch": 15.51, + "learning_rate": 0.0009388001419950302, + "loss": 0.1553, + "step": 24270 + }, + { + "epoch": 15.51, + "learning_rate": 0.0009387291444799432, + "loss": 0.1494, + "step": 24280 + }, + { + "epoch": 15.52, + "learning_rate": 0.0009386581469648563, + "loss": 0.1119, + "step": 24290 + }, + { + "epoch": 15.53, + "learning_rate": 0.0009385871494497694, + "loss": 0.1552, + "step": 24300 + }, + { + "epoch": 15.53, + "learning_rate": 0.0009385161519346823, + "loss": 0.1289, + "step": 24310 + }, + { + "epoch": 15.54, + "learning_rate": 0.0009384451544195954, + "loss": 0.1435, + "step": 24320 + }, + { + "epoch": 15.55, + "learning_rate": 0.0009383741569045083, + "loss": 0.1491, + "step": 24330 + }, + { + "epoch": 15.55, + "learning_rate": 0.0009383031593894214, + "loss": 0.1536, + "step": 24340 + }, + { + "epoch": 15.56, + "learning_rate": 0.0009382321618743345, + "loss": 0.1418, + "step": 24350 + }, + { + "epoch": 15.57, + "learning_rate": 0.0009381611643592475, + "loss": 0.1373, + "step": 24360 + }, + { + "epoch": 15.57, + "learning_rate": 0.0009380901668441605, + "loss": 0.1431, + "step": 24370 + }, + { + "epoch": 15.58, + "learning_rate": 0.0009380191693290734, + "loss": 0.1286, + "step": 24380 + }, + { + "epoch": 15.58, + "learning_rate": 0.0009379481718139865, + "loss": 0.1437, + "step": 24390 + }, + { + "epoch": 15.59, + "learning_rate": 0.0009378771742988995, + "loss": 0.184, + "step": 24400 + }, + { + "epoch": 15.6, + "learning_rate": 0.0009378061767838126, + "loss": 0.1034, + "step": 24410 + }, + { + "epoch": 15.6, + "learning_rate": 0.0009377351792687256, + "loss": 0.1458, + "step": 24420 + }, + { + "epoch": 15.61, + "learning_rate": 0.0009376641817536387, + "loss": 0.1409, + "step": 24430 + }, + { + "epoch": 15.62, + "learning_rate": 0.0009375931842385516, + "loss": 0.1224, + "step": 24440 + }, + { + "epoch": 15.62, + "learning_rate": 0.0009375221867234647, + "loss": 0.1531, + "step": 24450 + }, + { + "epoch": 15.63, + "learning_rate": 0.0009374511892083777, + "loss": 0.1554, + "step": 24460 + }, + { + "epoch": 15.64, + "learning_rate": 0.0009373801916932908, + "loss": 0.145, + "step": 24470 + }, + { + "epoch": 15.64, + "learning_rate": 0.0009373091941782038, + "loss": 0.1375, + "step": 24480 + }, + { + "epoch": 15.65, + "learning_rate": 0.0009372381966631168, + "loss": 0.1538, + "step": 24490 + }, + { + "epoch": 15.65, + "learning_rate": 0.0009371671991480298, + "loss": 0.1313, + "step": 24500 + }, + { + "epoch": 15.66, + "learning_rate": 0.0009370962016329429, + "loss": 0.1241, + "step": 24510 + }, + { + "epoch": 15.67, + "learning_rate": 0.0009370252041178558, + "loss": 0.1408, + "step": 24520 + }, + { + "epoch": 15.67, + "learning_rate": 0.000936954206602769, + "loss": 0.1285, + "step": 24530 + }, + { + "epoch": 15.68, + "learning_rate": 0.000936883209087682, + "loss": 0.1444, + "step": 24540 + }, + { + "epoch": 15.69, + "learning_rate": 0.000936812211572595, + "loss": 0.1534, + "step": 24550 + }, + { + "epoch": 15.69, + "learning_rate": 0.000936741214057508, + "loss": 0.1407, + "step": 24560 + }, + { + "epoch": 15.7, + "learning_rate": 0.000936670216542421, + "loss": 0.1189, + "step": 24570 + }, + { + "epoch": 15.71, + "learning_rate": 0.000936599219027334, + "loss": 0.1567, + "step": 24580 + }, + { + "epoch": 15.71, + "learning_rate": 0.0009365282215122471, + "loss": 0.149, + "step": 24590 + }, + { + "epoch": 15.72, + "learning_rate": 0.0009364572239971601, + "loss": 0.1743, + "step": 24600 + }, + { + "epoch": 15.73, + "learning_rate": 0.0009363862264820732, + "loss": 0.158, + "step": 24610 + }, + { + "epoch": 15.73, + "learning_rate": 0.0009363152289669862, + "loss": 0.1391, + "step": 24620 + }, + { + "epoch": 15.74, + "learning_rate": 0.0009362442314518992, + "loss": 0.147, + "step": 24630 + }, + { + "epoch": 15.74, + "learning_rate": 0.0009361732339368122, + "loss": 0.1432, + "step": 24640 + }, + { + "epoch": 15.75, + "learning_rate": 0.0009361022364217253, + "loss": 0.1382, + "step": 24650 + }, + { + "epoch": 15.76, + "learning_rate": 0.0009360312389066383, + "loss": 0.14, + "step": 24660 + }, + { + "epoch": 15.76, + "learning_rate": 0.0009359602413915513, + "loss": 0.1489, + "step": 24670 + }, + { + "epoch": 15.77, + "learning_rate": 0.0009358892438764643, + "loss": 0.1443, + "step": 24680 + }, + { + "epoch": 15.78, + "learning_rate": 0.0009358182463613773, + "loss": 0.1329, + "step": 24690 + }, + { + "epoch": 15.78, + "learning_rate": 0.0009357472488462904, + "loss": 0.1445, + "step": 24700 + }, + { + "epoch": 15.79, + "learning_rate": 0.0009356762513312033, + "loss": 0.159, + "step": 24710 + }, + { + "epoch": 15.8, + "learning_rate": 0.0009356052538161165, + "loss": 0.1628, + "step": 24720 + }, + { + "epoch": 15.8, + "learning_rate": 0.0009355342563010295, + "loss": 0.1636, + "step": 24730 + }, + { + "epoch": 15.81, + "learning_rate": 0.0009354632587859425, + "loss": 0.1332, + "step": 24740 + }, + { + "epoch": 15.81, + "learning_rate": 0.0009353922612708555, + "loss": 0.1685, + "step": 24750 + }, + { + "epoch": 15.82, + "learning_rate": 0.0009353212637557686, + "loss": 0.1439, + "step": 24760 + }, + { + "epoch": 15.83, + "learning_rate": 0.0009352502662406815, + "loss": 0.1545, + "step": 24770 + }, + { + "epoch": 15.83, + "learning_rate": 0.0009351792687255947, + "loss": 0.173, + "step": 24780 + }, + { + "epoch": 15.84, + "learning_rate": 0.0009351082712105076, + "loss": 0.1309, + "step": 24790 + }, + { + "epoch": 15.85, + "learning_rate": 0.0009350372736954207, + "loss": 0.1452, + "step": 24800 + }, + { + "epoch": 15.85, + "learning_rate": 0.0009349662761803337, + "loss": 0.1527, + "step": 24810 + }, + { + "epoch": 15.86, + "learning_rate": 0.0009348952786652467, + "loss": 0.1403, + "step": 24820 + }, + { + "epoch": 15.87, + "learning_rate": 0.0009348242811501597, + "loss": 0.1439, + "step": 24830 + }, + { + "epoch": 15.87, + "learning_rate": 0.0009347532836350729, + "loss": 0.1238, + "step": 24840 + }, + { + "epoch": 15.88, + "learning_rate": 0.0009346822861199858, + "loss": 0.1268, + "step": 24850 + }, + { + "epoch": 15.88, + "learning_rate": 0.0009346112886048989, + "loss": 0.1617, + "step": 24860 + }, + { + "epoch": 15.89, + "learning_rate": 0.0009345402910898118, + "loss": 0.1181, + "step": 24870 + }, + { + "epoch": 15.9, + "learning_rate": 0.0009344692935747249, + "loss": 0.1561, + "step": 24880 + }, + { + "epoch": 15.9, + "learning_rate": 0.0009343982960596379, + "loss": 0.1353, + "step": 24890 + }, + { + "epoch": 15.91, + "learning_rate": 0.000934327298544551, + "loss": 0.1284, + "step": 24900 + }, + { + "epoch": 15.92, + "learning_rate": 0.000934256301029464, + "loss": 0.1271, + "step": 24910 + }, + { + "epoch": 15.92, + "learning_rate": 0.0009341853035143771, + "loss": 0.1071, + "step": 24920 + }, + { + "epoch": 15.93, + "learning_rate": 0.00093411430599929, + "loss": 0.1475, + "step": 24930 + }, + { + "epoch": 15.94, + "learning_rate": 0.0009340433084842031, + "loss": 0.1313, + "step": 24940 + }, + { + "epoch": 15.94, + "learning_rate": 0.000933972310969116, + "loss": 0.1228, + "step": 24950 + }, + { + "epoch": 15.95, + "learning_rate": 0.0009339013134540292, + "loss": 0.137, + "step": 24960 + }, + { + "epoch": 15.95, + "learning_rate": 0.0009338303159389422, + "loss": 0.1521, + "step": 24970 + }, + { + "epoch": 15.96, + "learning_rate": 0.0009337593184238551, + "loss": 0.1228, + "step": 24980 + }, + { + "epoch": 15.97, + "learning_rate": 0.0009336883209087682, + "loss": 0.1769, + "step": 24990 + }, + { + "epoch": 15.97, + "learning_rate": 0.0009336173233936812, + "loss": 0.1282, + "step": 25000 + }, + { + "epoch": 15.98, + "learning_rate": 0.0009335463258785942, + "loss": 0.1443, + "step": 25010 + }, + { + "epoch": 15.99, + "learning_rate": 0.0009334753283635073, + "loss": 0.1371, + "step": 25020 + }, + { + "epoch": 15.99, + "learning_rate": 0.0009334043308484204, + "loss": 0.1388, + "step": 25030 + }, + { + "epoch": 16.0, + "learning_rate": 0.0009333333333333333, + "loss": 0.1664, + "step": 25040 + }, + { + "epoch": 16.01, + "learning_rate": 0.0009332623358182464, + "loss": 0.1286, + "step": 25050 + }, + { + "epoch": 16.01, + "learning_rate": 0.0009331913383031593, + "loss": 0.1375, + "step": 25060 + }, + { + "epoch": 16.02, + "learning_rate": 0.0009331203407880724, + "loss": 0.1182, + "step": 25070 + }, + { + "epoch": 16.03, + "learning_rate": 0.0009330493432729855, + "loss": 0.1052, + "step": 25080 + }, + { + "epoch": 16.03, + "learning_rate": 0.0009329783457578985, + "loss": 0.1171, + "step": 25090 + }, + { + "epoch": 16.04, + "learning_rate": 0.0009329073482428115, + "loss": 0.1204, + "step": 25100 + }, + { + "epoch": 16.04, + "learning_rate": 0.0009328363507277246, + "loss": 0.1288, + "step": 25110 + }, + { + "epoch": 16.05, + "learning_rate": 0.0009327653532126375, + "loss": 0.1363, + "step": 25120 + }, + { + "epoch": 16.06, + "learning_rate": 0.0009326943556975506, + "loss": 0.1242, + "step": 25130 + }, + { + "epoch": 16.06, + "learning_rate": 0.0009326233581824637, + "loss": 0.0983, + "step": 25140 + }, + { + "epoch": 16.07, + "learning_rate": 0.0009325523606673767, + "loss": 0.1232, + "step": 25150 + }, + { + "epoch": 16.08, + "learning_rate": 0.0009324813631522897, + "loss": 0.133, + "step": 25160 + }, + { + "epoch": 16.08, + "learning_rate": 0.0009324103656372028, + "loss": 0.1143, + "step": 25170 + }, + { + "epoch": 16.09, + "learning_rate": 0.0009323393681221157, + "loss": 0.118, + "step": 25180 + }, + { + "epoch": 16.1, + "learning_rate": 0.0009322683706070288, + "loss": 0.1199, + "step": 25190 + }, + { + "epoch": 16.1, + "learning_rate": 0.0009321973730919417, + "loss": 0.1186, + "step": 25200 + }, + { + "epoch": 16.11, + "learning_rate": 0.0009321263755768549, + "loss": 0.1374, + "step": 25210 + }, + { + "epoch": 16.11, + "learning_rate": 0.0009320553780617679, + "loss": 0.1354, + "step": 25220 + }, + { + "epoch": 16.12, + "learning_rate": 0.0009319843805466809, + "loss": 0.1587, + "step": 25230 + }, + { + "epoch": 16.13, + "learning_rate": 0.0009319133830315939, + "loss": 0.1165, + "step": 25240 + }, + { + "epoch": 16.13, + "learning_rate": 0.000931842385516507, + "loss": 0.1248, + "step": 25250 + }, + { + "epoch": 16.14, + "learning_rate": 0.0009317713880014199, + "loss": 0.1411, + "step": 25260 + }, + { + "epoch": 16.15, + "learning_rate": 0.0009317003904863331, + "loss": 0.1265, + "step": 25270 + }, + { + "epoch": 16.15, + "learning_rate": 0.000931629392971246, + "loss": 0.1404, + "step": 25280 + }, + { + "epoch": 16.16, + "learning_rate": 0.000931558395456159, + "loss": 0.1166, + "step": 25290 + }, + { + "epoch": 16.17, + "learning_rate": 0.0009314873979410721, + "loss": 0.1257, + "step": 25300 + }, + { + "epoch": 16.17, + "learning_rate": 0.000931416400425985, + "loss": 0.1293, + "step": 25310 + }, + { + "epoch": 16.18, + "learning_rate": 0.0009313454029108981, + "loss": 0.1311, + "step": 25320 + }, + { + "epoch": 16.19, + "learning_rate": 0.0009312744053958112, + "loss": 0.1424, + "step": 25330 + }, + { + "epoch": 16.19, + "learning_rate": 0.0009312034078807242, + "loss": 0.123, + "step": 25340 + }, + { + "epoch": 16.2, + "learning_rate": 0.0009311324103656372, + "loss": 0.1401, + "step": 25350 + }, + { + "epoch": 16.2, + "learning_rate": 0.0009310614128505503, + "loss": 0.1472, + "step": 25360 + }, + { + "epoch": 16.21, + "learning_rate": 0.0009309904153354632, + "loss": 0.1571, + "step": 25370 + }, + { + "epoch": 16.22, + "learning_rate": 0.0009309194178203763, + "loss": 0.1117, + "step": 25380 + }, + { + "epoch": 16.22, + "learning_rate": 0.0009308484203052893, + "loss": 0.1385, + "step": 25390 + }, + { + "epoch": 16.23, + "learning_rate": 0.0009307774227902024, + "loss": 0.1311, + "step": 25400 + }, + { + "epoch": 16.24, + "learning_rate": 0.0009307064252751154, + "loss": 0.122, + "step": 25410 + }, + { + "epoch": 16.24, + "learning_rate": 0.0009306354277600284, + "loss": 0.141, + "step": 25420 + }, + { + "epoch": 16.25, + "learning_rate": 0.0009305644302449414, + "loss": 0.1267, + "step": 25430 + }, + { + "epoch": 16.26, + "learning_rate": 0.0009304934327298545, + "loss": 0.1027, + "step": 25440 + }, + { + "epoch": 16.26, + "learning_rate": 0.0009304224352147675, + "loss": 0.1542, + "step": 25450 + }, + { + "epoch": 16.27, + "learning_rate": 0.0009303514376996806, + "loss": 0.1143, + "step": 25460 + }, + { + "epoch": 16.27, + "learning_rate": 0.0009302804401845935, + "loss": 0.1454, + "step": 25470 + }, + { + "epoch": 16.28, + "learning_rate": 0.0009302094426695066, + "loss": 0.1523, + "step": 25480 + }, + { + "epoch": 16.29, + "learning_rate": 0.0009301384451544196, + "loss": 0.1088, + "step": 25490 + }, + { + "epoch": 16.29, + "learning_rate": 0.0009300674476393326, + "loss": 0.1545, + "step": 25500 + }, + { + "epoch": 16.3, + "learning_rate": 0.0009299964501242457, + "loss": 0.1418, + "step": 25510 + }, + { + "epoch": 16.31, + "learning_rate": 0.0009299254526091588, + "loss": 0.1418, + "step": 25520 + }, + { + "epoch": 16.31, + "learning_rate": 0.0009298544550940717, + "loss": 0.1424, + "step": 25530 + }, + { + "epoch": 16.32, + "learning_rate": 0.0009297834575789848, + "loss": 0.1374, + "step": 25540 + }, + { + "epoch": 16.33, + "learning_rate": 0.0009297124600638978, + "loss": 0.148, + "step": 25550 + }, + { + "epoch": 16.33, + "learning_rate": 0.0009296414625488108, + "loss": 0.1299, + "step": 25560 + }, + { + "epoch": 16.34, + "learning_rate": 0.0009295704650337239, + "loss": 0.1407, + "step": 25570 + }, + { + "epoch": 16.34, + "learning_rate": 0.000929499467518637, + "loss": 0.1308, + "step": 25580 + }, + { + "epoch": 16.35, + "learning_rate": 0.0009294284700035499, + "loss": 0.1344, + "step": 25590 + }, + { + "epoch": 16.36, + "learning_rate": 0.0009293574724884629, + "loss": 0.1084, + "step": 25600 + }, + { + "epoch": 16.36, + "learning_rate": 0.0009292864749733759, + "loss": 0.1325, + "step": 25610 + }, + { + "epoch": 16.37, + "learning_rate": 0.0009292154774582889, + "loss": 0.133, + "step": 25620 + }, + { + "epoch": 16.38, + "learning_rate": 0.0009291444799432021, + "loss": 0.107, + "step": 25630 + }, + { + "epoch": 16.38, + "learning_rate": 0.000929073482428115, + "loss": 0.1486, + "step": 25640 + }, + { + "epoch": 16.39, + "learning_rate": 0.0009290024849130281, + "loss": 0.1332, + "step": 25650 + }, + { + "epoch": 16.4, + "learning_rate": 0.000928931487397941, + "loss": 0.1404, + "step": 25660 + }, + { + "epoch": 16.4, + "learning_rate": 0.0009288604898828541, + "loss": 0.1448, + "step": 25670 + }, + { + "epoch": 16.41, + "learning_rate": 0.0009287894923677671, + "loss": 0.1351, + "step": 25680 + }, + { + "epoch": 16.42, + "learning_rate": 0.0009287184948526802, + "loss": 0.1293, + "step": 25690 + }, + { + "epoch": 16.42, + "learning_rate": 0.0009286474973375932, + "loss": 0.1368, + "step": 25700 + }, + { + "epoch": 16.43, + "learning_rate": 0.0009285764998225063, + "loss": 0.1199, + "step": 25710 + }, + { + "epoch": 16.43, + "learning_rate": 0.0009285055023074192, + "loss": 0.1127, + "step": 25720 + }, + { + "epoch": 16.44, + "learning_rate": 0.0009284345047923323, + "loss": 0.146, + "step": 25730 + }, + { + "epoch": 16.45, + "learning_rate": 0.0009283635072772452, + "loss": 0.1567, + "step": 25740 + }, + { + "epoch": 16.45, + "learning_rate": 0.0009282925097621584, + "loss": 0.1227, + "step": 25750 + }, + { + "epoch": 16.46, + "learning_rate": 0.0009282215122470714, + "loss": 0.1116, + "step": 25760 + }, + { + "epoch": 16.47, + "learning_rate": 0.0009281505147319844, + "loss": 0.1299, + "step": 25770 + }, + { + "epoch": 16.47, + "learning_rate": 0.0009280795172168974, + "loss": 0.1208, + "step": 25780 + }, + { + "epoch": 16.48, + "learning_rate": 0.0009280085197018105, + "loss": 0.1362, + "step": 25790 + }, + { + "epoch": 16.49, + "learning_rate": 0.0009279375221867234, + "loss": 0.1251, + "step": 25800 + }, + { + "epoch": 16.49, + "learning_rate": 0.0009278665246716365, + "loss": 0.1411, + "step": 25810 + }, + { + "epoch": 16.5, + "learning_rate": 0.0009277955271565496, + "loss": 0.1121, + "step": 25820 + }, + { + "epoch": 16.5, + "learning_rate": 0.0009277245296414626, + "loss": 0.1616, + "step": 25830 + }, + { + "epoch": 16.51, + "learning_rate": 0.0009276535321263756, + "loss": 0.1454, + "step": 25840 + }, + { + "epoch": 16.52, + "learning_rate": 0.0009275825346112887, + "loss": 0.122, + "step": 25850 + }, + { + "epoch": 16.52, + "learning_rate": 0.0009275115370962016, + "loss": 0.1452, + "step": 25860 + }, + { + "epoch": 16.53, + "learning_rate": 0.0009274405395811147, + "loss": 0.1088, + "step": 25870 + }, + { + "epoch": 16.54, + "learning_rate": 0.0009273695420660277, + "loss": 0.1377, + "step": 25880 + }, + { + "epoch": 16.54, + "learning_rate": 0.0009272985445509408, + "loss": 0.1186, + "step": 25890 + }, + { + "epoch": 16.55, + "learning_rate": 0.0009272275470358538, + "loss": 0.1247, + "step": 25900 + }, + { + "epoch": 16.56, + "learning_rate": 0.0009271565495207667, + "loss": 0.0938, + "step": 25910 + }, + { + "epoch": 16.56, + "learning_rate": 0.0009270855520056798, + "loss": 0.1685, + "step": 25920 + }, + { + "epoch": 16.57, + "learning_rate": 0.0009270145544905927, + "loss": 0.1359, + "step": 25930 + }, + { + "epoch": 16.57, + "learning_rate": 0.0009269435569755059, + "loss": 0.1519, + "step": 25940 + }, + { + "epoch": 16.58, + "learning_rate": 0.0009268725594604189, + "loss": 0.1279, + "step": 25950 + }, + { + "epoch": 16.59, + "learning_rate": 0.000926801561945332, + "loss": 0.1069, + "step": 25960 + }, + { + "epoch": 16.59, + "learning_rate": 0.0009267305644302449, + "loss": 0.1241, + "step": 25970 + }, + { + "epoch": 16.6, + "learning_rate": 0.000926659566915158, + "loss": 0.1594, + "step": 25980 + }, + { + "epoch": 16.61, + "learning_rate": 0.0009265885694000709, + "loss": 0.1272, + "step": 25990 + }, + { + "epoch": 16.61, + "learning_rate": 0.0009265175718849841, + "loss": 0.1267, + "step": 26000 + }, + { + "epoch": 16.62, + "learning_rate": 0.0009264465743698971, + "loss": 0.1224, + "step": 26010 + }, + { + "epoch": 16.63, + "learning_rate": 0.0009263755768548101, + "loss": 0.1352, + "step": 26020 + }, + { + "epoch": 16.63, + "learning_rate": 0.0009263045793397231, + "loss": 0.1276, + "step": 26030 + }, + { + "epoch": 16.64, + "learning_rate": 0.0009262335818246362, + "loss": 0.123, + "step": 26040 + }, + { + "epoch": 16.65, + "learning_rate": 0.0009261625843095491, + "loss": 0.1321, + "step": 26050 + }, + { + "epoch": 16.65, + "learning_rate": 0.0009260915867944623, + "loss": 0.1449, + "step": 26060 + }, + { + "epoch": 16.66, + "learning_rate": 0.0009260205892793752, + "loss": 0.1492, + "step": 26070 + }, + { + "epoch": 16.66, + "learning_rate": 0.0009259495917642883, + "loss": 0.1182, + "step": 26080 + }, + { + "epoch": 16.67, + "learning_rate": 0.0009258785942492013, + "loss": 0.1407, + "step": 26090 + }, + { + "epoch": 16.68, + "learning_rate": 0.0009258075967341143, + "loss": 0.1472, + "step": 26100 + }, + { + "epoch": 16.68, + "learning_rate": 0.0009257365992190273, + "loss": 0.1404, + "step": 26110 + }, + { + "epoch": 16.69, + "learning_rate": 0.0009256656017039405, + "loss": 0.1511, + "step": 26120 + }, + { + "epoch": 16.7, + "learning_rate": 0.0009255946041888534, + "loss": 0.1255, + "step": 26130 + }, + { + "epoch": 16.7, + "learning_rate": 0.0009255236066737665, + "loss": 0.1221, + "step": 26140 + }, + { + "epoch": 16.71, + "learning_rate": 0.0009254526091586794, + "loss": 0.1251, + "step": 26150 + }, + { + "epoch": 16.72, + "learning_rate": 0.0009253816116435925, + "loss": 0.1232, + "step": 26160 + }, + { + "epoch": 16.72, + "learning_rate": 0.0009253106141285055, + "loss": 0.1382, + "step": 26170 + }, + { + "epoch": 16.73, + "learning_rate": 0.0009252396166134186, + "loss": 0.135, + "step": 26180 + }, + { + "epoch": 16.73, + "learning_rate": 0.0009251686190983316, + "loss": 0.1372, + "step": 26190 + }, + { + "epoch": 16.74, + "learning_rate": 0.0009250976215832447, + "loss": 0.1124, + "step": 26200 + }, + { + "epoch": 16.75, + "learning_rate": 0.0009250266240681576, + "loss": 0.1464, + "step": 26210 + }, + { + "epoch": 16.75, + "learning_rate": 0.0009249556265530706, + "loss": 0.1388, + "step": 26220 + }, + { + "epoch": 16.76, + "learning_rate": 0.0009248846290379837, + "loss": 0.1212, + "step": 26230 + }, + { + "epoch": 16.77, + "learning_rate": 0.0009248136315228967, + "loss": 0.1517, + "step": 26240 + }, + { + "epoch": 16.77, + "learning_rate": 0.0009247426340078098, + "loss": 0.1009, + "step": 26250 + }, + { + "epoch": 16.78, + "learning_rate": 0.0009246716364927227, + "loss": 0.1296, + "step": 26260 + }, + { + "epoch": 16.79, + "learning_rate": 0.0009246006389776358, + "loss": 0.1443, + "step": 26270 + }, + { + "epoch": 16.79, + "learning_rate": 0.0009245296414625488, + "loss": 0.1436, + "step": 26280 + }, + { + "epoch": 16.8, + "learning_rate": 0.0009244586439474618, + "loss": 0.1104, + "step": 26290 + }, + { + "epoch": 16.8, + "learning_rate": 0.0009243876464323749, + "loss": 0.1109, + "step": 26300 + }, + { + "epoch": 16.81, + "learning_rate": 0.000924316648917288, + "loss": 0.1459, + "step": 26310 + }, + { + "epoch": 16.82, + "learning_rate": 0.0009242456514022009, + "loss": 0.131, + "step": 26320 + }, + { + "epoch": 16.82, + "learning_rate": 0.000924174653887114, + "loss": 0.1423, + "step": 26330 + }, + { + "epoch": 16.83, + "learning_rate": 0.000924103656372027, + "loss": 0.1342, + "step": 26340 + }, + { + "epoch": 16.84, + "learning_rate": 0.00092403265885694, + "loss": 0.1241, + "step": 26350 + }, + { + "epoch": 16.84, + "learning_rate": 0.000923961661341853, + "loss": 0.1781, + "step": 26360 + }, + { + "epoch": 16.85, + "learning_rate": 0.0009238906638267661, + "loss": 0.116, + "step": 26370 + }, + { + "epoch": 16.86, + "learning_rate": 0.0009238196663116791, + "loss": 0.1799, + "step": 26380 + }, + { + "epoch": 16.86, + "learning_rate": 0.0009237486687965922, + "loss": 0.1317, + "step": 26390 + }, + { + "epoch": 16.87, + "learning_rate": 0.0009236776712815051, + "loss": 0.0983, + "step": 26400 + }, + { + "epoch": 16.88, + "learning_rate": 0.0009236066737664182, + "loss": 0.1497, + "step": 26410 + }, + { + "epoch": 16.88, + "learning_rate": 0.0009235356762513312, + "loss": 0.1571, + "step": 26420 + }, + { + "epoch": 16.89, + "learning_rate": 0.0009234646787362443, + "loss": 0.1443, + "step": 26430 + }, + { + "epoch": 16.89, + "learning_rate": 0.0009233936812211573, + "loss": 0.1457, + "step": 26440 + }, + { + "epoch": 16.9, + "learning_rate": 0.0009233226837060703, + "loss": 0.1255, + "step": 26450 + }, + { + "epoch": 16.91, + "learning_rate": 0.0009232516861909833, + "loss": 0.1181, + "step": 26460 + }, + { + "epoch": 16.91, + "learning_rate": 0.0009231806886758964, + "loss": 0.1382, + "step": 26470 + }, + { + "epoch": 16.92, + "learning_rate": 0.0009231096911608093, + "loss": 0.1318, + "step": 26480 + }, + { + "epoch": 16.93, + "learning_rate": 0.0009230386936457225, + "loss": 0.1427, + "step": 26490 + }, + { + "epoch": 16.93, + "learning_rate": 0.0009229676961306355, + "loss": 0.1359, + "step": 26500 + }, + { + "epoch": 16.94, + "learning_rate": 0.0009228966986155485, + "loss": 0.1099, + "step": 26510 + }, + { + "epoch": 16.95, + "learning_rate": 0.0009228257011004615, + "loss": 0.1713, + "step": 26520 + }, + { + "epoch": 16.95, + "learning_rate": 0.0009227547035853744, + "loss": 0.1392, + "step": 26530 + }, + { + "epoch": 16.96, + "learning_rate": 0.0009226837060702875, + "loss": 0.123, + "step": 26540 + }, + { + "epoch": 16.96, + "learning_rate": 0.0009226127085552006, + "loss": 0.1638, + "step": 26550 + }, + { + "epoch": 16.97, + "learning_rate": 0.0009225417110401136, + "loss": 0.152, + "step": 26560 + }, + { + "epoch": 16.98, + "learning_rate": 0.0009224707135250266, + "loss": 0.1197, + "step": 26570 + }, + { + "epoch": 16.98, + "learning_rate": 0.0009223997160099397, + "loss": 0.1322, + "step": 26580 + }, + { + "epoch": 16.99, + "learning_rate": 0.0009223287184948526, + "loss": 0.1272, + "step": 26590 + }, + { + "epoch": 17.0, + "learning_rate": 0.0009222577209797657, + "loss": 0.1141, + "step": 26600 + }, + { + "epoch": 17.0, + "learning_rate": 0.0009221867234646788, + "loss": 0.122, + "step": 26610 + }, + { + "epoch": 17.01, + "learning_rate": 0.0009221157259495918, + "loss": 0.1188, + "step": 26620 + }, + { + "epoch": 17.02, + "learning_rate": 0.0009220447284345048, + "loss": 0.1244, + "step": 26630 + }, + { + "epoch": 17.02, + "learning_rate": 0.0009219737309194178, + "loss": 0.1568, + "step": 26640 + }, + { + "epoch": 17.03, + "learning_rate": 0.0009219027334043308, + "loss": 0.1023, + "step": 26650 + }, + { + "epoch": 17.04, + "learning_rate": 0.0009218317358892439, + "loss": 0.1231, + "step": 26660 + }, + { + "epoch": 17.04, + "learning_rate": 0.0009217607383741569, + "loss": 0.1095, + "step": 26670 + }, + { + "epoch": 17.05, + "learning_rate": 0.00092168974085907, + "loss": 0.123, + "step": 26680 + }, + { + "epoch": 17.05, + "learning_rate": 0.000921618743343983, + "loss": 0.1071, + "step": 26690 + }, + { + "epoch": 17.06, + "learning_rate": 0.000921547745828896, + "loss": 0.1295, + "step": 26700 + }, + { + "epoch": 17.07, + "learning_rate": 0.000921476748313809, + "loss": 0.1227, + "step": 26710 + }, + { + "epoch": 17.07, + "learning_rate": 0.000921405750798722, + "loss": 0.1109, + "step": 26720 + }, + { + "epoch": 17.08, + "learning_rate": 0.0009213347532836351, + "loss": 0.1039, + "step": 26730 + }, + { + "epoch": 17.09, + "learning_rate": 0.0009212637557685482, + "loss": 0.1291, + "step": 26740 + }, + { + "epoch": 17.09, + "learning_rate": 0.0009211927582534611, + "loss": 0.11, + "step": 26750 + }, + { + "epoch": 17.1, + "learning_rate": 0.0009211217607383742, + "loss": 0.1387, + "step": 26760 + }, + { + "epoch": 17.11, + "learning_rate": 0.0009210507632232872, + "loss": 0.1014, + "step": 26770 + }, + { + "epoch": 17.11, + "learning_rate": 0.0009209797657082002, + "loss": 0.1332, + "step": 26780 + }, + { + "epoch": 17.12, + "learning_rate": 0.0009209087681931133, + "loss": 0.1355, + "step": 26790 + }, + { + "epoch": 17.12, + "learning_rate": 0.0009208377706780264, + "loss": 0.1203, + "step": 26800 + }, + { + "epoch": 17.13, + "learning_rate": 0.0009207667731629393, + "loss": 0.1282, + "step": 26810 + }, + { + "epoch": 17.14, + "learning_rate": 0.0009206957756478524, + "loss": 0.1236, + "step": 26820 + }, + { + "epoch": 17.14, + "learning_rate": 0.0009206247781327653, + "loss": 0.1301, + "step": 26830 + }, + { + "epoch": 17.15, + "learning_rate": 0.0009205537806176783, + "loss": 0.1248, + "step": 26840 + }, + { + "epoch": 17.16, + "learning_rate": 0.0009204827831025915, + "loss": 0.1393, + "step": 26850 + }, + { + "epoch": 17.16, + "learning_rate": 0.0009204117855875044, + "loss": 0.1158, + "step": 26860 + }, + { + "epoch": 17.17, + "learning_rate": 0.0009203407880724175, + "loss": 0.1415, + "step": 26870 + }, + { + "epoch": 17.18, + "learning_rate": 0.0009202697905573305, + "loss": 0.1166, + "step": 26880 + }, + { + "epoch": 17.18, + "learning_rate": 0.0009201987930422435, + "loss": 0.1273, + "step": 26890 + }, + { + "epoch": 17.19, + "learning_rate": 0.0009201277955271565, + "loss": 0.1322, + "step": 26900 + }, + { + "epoch": 17.19, + "learning_rate": 0.0009200567980120697, + "loss": 0.1042, + "step": 26910 + }, + { + "epoch": 17.2, + "learning_rate": 0.0009199858004969826, + "loss": 0.1692, + "step": 26920 + }, + { + "epoch": 17.21, + "learning_rate": 0.0009199148029818957, + "loss": 0.0913, + "step": 26930 + }, + { + "epoch": 17.21, + "learning_rate": 0.0009198438054668086, + "loss": 0.1293, + "step": 26940 + }, + { + "epoch": 17.22, + "learning_rate": 0.0009197728079517217, + "loss": 0.1275, + "step": 26950 + }, + { + "epoch": 17.23, + "learning_rate": 0.0009197018104366347, + "loss": 0.1086, + "step": 26960 + }, + { + "epoch": 17.23, + "learning_rate": 0.0009196308129215477, + "loss": 0.1151, + "step": 26970 + }, + { + "epoch": 17.24, + "learning_rate": 0.0009195598154064608, + "loss": 0.1449, + "step": 26980 + }, + { + "epoch": 17.25, + "learning_rate": 0.0009194888178913739, + "loss": 0.1332, + "step": 26990 + }, + { + "epoch": 17.25, + "learning_rate": 0.0009194178203762868, + "loss": 0.1148, + "step": 27000 + }, + { + "epoch": 17.26, + "learning_rate": 0.0009193468228611999, + "loss": 0.1344, + "step": 27010 + }, + { + "epoch": 17.27, + "learning_rate": 0.0009192758253461128, + "loss": 0.1061, + "step": 27020 + }, + { + "epoch": 17.27, + "learning_rate": 0.0009192048278310259, + "loss": 0.1154, + "step": 27030 + }, + { + "epoch": 17.28, + "learning_rate": 0.000919133830315939, + "loss": 0.0995, + "step": 27040 + }, + { + "epoch": 17.28, + "learning_rate": 0.000919062832800852, + "loss": 0.1339, + "step": 27050 + }, + { + "epoch": 17.29, + "learning_rate": 0.000918991835285765, + "loss": 0.1048, + "step": 27060 + }, + { + "epoch": 17.3, + "learning_rate": 0.0009189208377706781, + "loss": 0.1519, + "step": 27070 + }, + { + "epoch": 17.3, + "learning_rate": 0.000918849840255591, + "loss": 0.078, + "step": 27080 + }, + { + "epoch": 17.31, + "learning_rate": 0.0009187788427405041, + "loss": 0.1348, + "step": 27090 + }, + { + "epoch": 17.32, + "learning_rate": 0.0009187078452254172, + "loss": 0.1258, + "step": 27100 + }, + { + "epoch": 17.32, + "learning_rate": 0.0009186368477103302, + "loss": 0.1221, + "step": 27110 + }, + { + "epoch": 17.33, + "learning_rate": 0.0009185658501952432, + "loss": 0.1198, + "step": 27120 + }, + { + "epoch": 17.34, + "learning_rate": 0.0009184948526801563, + "loss": 0.0938, + "step": 27130 + }, + { + "epoch": 17.34, + "learning_rate": 0.0009184238551650692, + "loss": 0.1246, + "step": 27140 + }, + { + "epoch": 17.35, + "learning_rate": 0.0009183528576499822, + "loss": 0.132, + "step": 27150 + }, + { + "epoch": 17.35, + "learning_rate": 0.0009182818601348953, + "loss": 0.1265, + "step": 27160 + }, + { + "epoch": 17.36, + "learning_rate": 0.0009182108626198083, + "loss": 0.1348, + "step": 27170 + }, + { + "epoch": 17.37, + "learning_rate": 0.0009181398651047214, + "loss": 0.1407, + "step": 27180 + }, + { + "epoch": 17.37, + "learning_rate": 0.0009180688675896343, + "loss": 0.1409, + "step": 27190 + }, + { + "epoch": 17.38, + "learning_rate": 0.0009179978700745474, + "loss": 0.0996, + "step": 27200 + }, + { + "epoch": 17.39, + "learning_rate": 0.0009179268725594603, + "loss": 0.1391, + "step": 27210 + }, + { + "epoch": 17.39, + "learning_rate": 0.0009178558750443735, + "loss": 0.1252, + "step": 27220 + }, + { + "epoch": 17.4, + "learning_rate": 0.0009177848775292865, + "loss": 0.1404, + "step": 27230 + }, + { + "epoch": 17.41, + "learning_rate": 0.0009177138800141995, + "loss": 0.1238, + "step": 27240 + }, + { + "epoch": 17.41, + "learning_rate": 0.0009176428824991125, + "loss": 0.126, + "step": 27250 + }, + { + "epoch": 17.42, + "learning_rate": 0.0009175718849840256, + "loss": 0.1264, + "step": 27260 + }, + { + "epoch": 17.42, + "learning_rate": 0.0009175008874689385, + "loss": 0.1355, + "step": 27270 + }, + { + "epoch": 17.43, + "learning_rate": 0.0009174298899538517, + "loss": 0.1192, + "step": 27280 + }, + { + "epoch": 17.44, + "learning_rate": 0.0009173588924387647, + "loss": 0.1355, + "step": 27290 + }, + { + "epoch": 17.44, + "learning_rate": 0.0009172878949236777, + "loss": 0.1259, + "step": 27300 + }, + { + "epoch": 17.45, + "learning_rate": 0.0009172168974085907, + "loss": 0.1207, + "step": 27310 + }, + { + "epoch": 17.46, + "learning_rate": 0.0009171458998935038, + "loss": 0.111, + "step": 27320 + }, + { + "epoch": 17.46, + "learning_rate": 0.0009170749023784167, + "loss": 0.1248, + "step": 27330 + }, + { + "epoch": 17.47, + "learning_rate": 0.0009170039048633299, + "loss": 0.1155, + "step": 27340 + }, + { + "epoch": 17.48, + "learning_rate": 0.0009169329073482428, + "loss": 0.1202, + "step": 27350 + }, + { + "epoch": 17.48, + "learning_rate": 0.0009168619098331559, + "loss": 0.1468, + "step": 27360 + }, + { + "epoch": 17.49, + "learning_rate": 0.0009167909123180689, + "loss": 0.1278, + "step": 27370 + }, + { + "epoch": 17.5, + "learning_rate": 0.0009167199148029819, + "loss": 0.1361, + "step": 27380 + }, + { + "epoch": 17.5, + "learning_rate": 0.0009166489172878949, + "loss": 0.1249, + "step": 27390 + }, + { + "epoch": 17.51, + "learning_rate": 0.0009165779197728081, + "loss": 0.1392, + "step": 27400 + }, + { + "epoch": 17.51, + "learning_rate": 0.000916506922257721, + "loss": 0.1001, + "step": 27410 + }, + { + "epoch": 17.52, + "learning_rate": 0.0009164359247426341, + "loss": 0.1354, + "step": 27420 + }, + { + "epoch": 17.53, + "learning_rate": 0.000916364927227547, + "loss": 0.1063, + "step": 27430 + }, + { + "epoch": 17.53, + "learning_rate": 0.0009162939297124601, + "loss": 0.1433, + "step": 27440 + }, + { + "epoch": 17.54, + "learning_rate": 0.0009162229321973731, + "loss": 0.1294, + "step": 27450 + }, + { + "epoch": 17.55, + "learning_rate": 0.0009161519346822862, + "loss": 0.1183, + "step": 27460 + }, + { + "epoch": 17.55, + "learning_rate": 0.0009160809371671992, + "loss": 0.1353, + "step": 27470 + }, + { + "epoch": 17.56, + "learning_rate": 0.0009160099396521122, + "loss": 0.1012, + "step": 27480 + }, + { + "epoch": 17.57, + "learning_rate": 0.0009159389421370252, + "loss": 0.1388, + "step": 27490 + }, + { + "epoch": 17.57, + "learning_rate": 0.0009158679446219382, + "loss": 0.1329, + "step": 27500 + }, + { + "epoch": 17.58, + "learning_rate": 0.0009157969471068513, + "loss": 0.1465, + "step": 27510 + }, + { + "epoch": 17.58, + "learning_rate": 0.0009157259495917643, + "loss": 0.1405, + "step": 27520 + }, + { + "epoch": 17.59, + "learning_rate": 0.0009156549520766774, + "loss": 0.1375, + "step": 27530 + }, + { + "epoch": 17.6, + "learning_rate": 0.0009155839545615903, + "loss": 0.1431, + "step": 27540 + }, + { + "epoch": 17.6, + "learning_rate": 0.0009155129570465034, + "loss": 0.1102, + "step": 27550 + }, + { + "epoch": 17.61, + "learning_rate": 0.0009154419595314164, + "loss": 0.1477, + "step": 27560 + }, + { + "epoch": 17.62, + "learning_rate": 0.0009153709620163294, + "loss": 0.1203, + "step": 27570 + }, + { + "epoch": 17.62, + "learning_rate": 0.0009152999645012424, + "loss": 0.142, + "step": 27580 + }, + { + "epoch": 17.63, + "learning_rate": 0.0009152289669861556, + "loss": 0.1107, + "step": 27590 + }, + { + "epoch": 17.64, + "learning_rate": 0.0009151579694710685, + "loss": 0.0862, + "step": 27600 + }, + { + "epoch": 17.64, + "learning_rate": 0.0009150869719559816, + "loss": 0.1198, + "step": 27610 + }, + { + "epoch": 17.65, + "learning_rate": 0.0009150159744408945, + "loss": 0.1224, + "step": 27620 + }, + { + "epoch": 17.65, + "learning_rate": 0.0009149449769258076, + "loss": 0.1223, + "step": 27630 + }, + { + "epoch": 17.66, + "learning_rate": 0.0009148739794107206, + "loss": 0.1656, + "step": 27640 + }, + { + "epoch": 17.67, + "learning_rate": 0.0009148029818956337, + "loss": 0.1391, + "step": 27650 + }, + { + "epoch": 17.67, + "learning_rate": 0.0009147319843805467, + "loss": 0.1391, + "step": 27660 + }, + { + "epoch": 17.68, + "learning_rate": 0.0009146609868654598, + "loss": 0.1292, + "step": 27670 + }, + { + "epoch": 17.69, + "learning_rate": 0.0009145899893503727, + "loss": 0.1195, + "step": 27680 + }, + { + "epoch": 17.69, + "learning_rate": 0.0009145189918352858, + "loss": 0.1563, + "step": 27690 + }, + { + "epoch": 17.7, + "learning_rate": 0.0009144479943201987, + "loss": 0.1435, + "step": 27700 + }, + { + "epoch": 17.71, + "learning_rate": 0.0009143769968051119, + "loss": 0.1365, + "step": 27710 + }, + { + "epoch": 17.71, + "learning_rate": 0.0009143059992900249, + "loss": 0.1296, + "step": 27720 + }, + { + "epoch": 17.72, + "learning_rate": 0.000914235001774938, + "loss": 0.1118, + "step": 27730 + }, + { + "epoch": 17.73, + "learning_rate": 0.0009141640042598509, + "loss": 0.115, + "step": 27740 + }, + { + "epoch": 17.73, + "learning_rate": 0.000914093006744764, + "loss": 0.1173, + "step": 27750 + }, + { + "epoch": 17.74, + "learning_rate": 0.0009140220092296769, + "loss": 0.1318, + "step": 27760 + }, + { + "epoch": 17.74, + "learning_rate": 0.0009139510117145901, + "loss": 0.1547, + "step": 27770 + }, + { + "epoch": 17.75, + "learning_rate": 0.0009138800141995031, + "loss": 0.1173, + "step": 27780 + }, + { + "epoch": 17.76, + "learning_rate": 0.000913809016684416, + "loss": 0.1173, + "step": 27790 + }, + { + "epoch": 17.76, + "learning_rate": 0.0009137380191693291, + "loss": 0.1154, + "step": 27800 + }, + { + "epoch": 17.77, + "learning_rate": 0.000913667021654242, + "loss": 0.1305, + "step": 27810 + }, + { + "epoch": 17.78, + "learning_rate": 0.0009135960241391551, + "loss": 0.1241, + "step": 27820 + }, + { + "epoch": 17.78, + "learning_rate": 0.0009135250266240682, + "loss": 0.1159, + "step": 27830 + }, + { + "epoch": 17.79, + "learning_rate": 0.0009134540291089812, + "loss": 0.1501, + "step": 27840 + }, + { + "epoch": 17.8, + "learning_rate": 0.0009133830315938942, + "loss": 0.1342, + "step": 27850 + }, + { + "epoch": 17.8, + "learning_rate": 0.0009133120340788073, + "loss": 0.1278, + "step": 27860 + }, + { + "epoch": 17.81, + "learning_rate": 0.0009132410365637202, + "loss": 0.134, + "step": 27870 + }, + { + "epoch": 17.81, + "learning_rate": 0.0009131700390486333, + "loss": 0.1363, + "step": 27880 + }, + { + "epoch": 17.82, + "learning_rate": 0.0009130990415335464, + "loss": 0.1314, + "step": 27890 + }, + { + "epoch": 17.83, + "learning_rate": 0.0009130280440184594, + "loss": 0.1324, + "step": 27900 + }, + { + "epoch": 17.83, + "learning_rate": 0.0009129570465033724, + "loss": 0.1197, + "step": 27910 + }, + { + "epoch": 17.84, + "learning_rate": 0.0009128860489882854, + "loss": 0.1307, + "step": 27920 + }, + { + "epoch": 17.85, + "learning_rate": 0.0009128150514731984, + "loss": 0.1197, + "step": 27930 + }, + { + "epoch": 17.85, + "learning_rate": 0.0009127440539581115, + "loss": 0.1454, + "step": 27940 + }, + { + "epoch": 17.86, + "learning_rate": 0.0009126730564430245, + "loss": 0.1155, + "step": 27950 + }, + { + "epoch": 17.87, + "learning_rate": 0.0009126020589279376, + "loss": 0.1238, + "step": 27960 + }, + { + "epoch": 17.87, + "learning_rate": 0.0009125310614128506, + "loss": 0.102, + "step": 27970 + }, + { + "epoch": 17.88, + "learning_rate": 0.0009124600638977636, + "loss": 0.1501, + "step": 27980 + }, + { + "epoch": 17.88, + "learning_rate": 0.0009123890663826766, + "loss": 0.1435, + "step": 27990 + }, + { + "epoch": 17.89, + "learning_rate": 0.0009123180688675897, + "loss": 0.1381, + "step": 28000 + }, + { + "epoch": 17.9, + "learning_rate": 0.0009122470713525027, + "loss": 0.1478, + "step": 28010 + }, + { + "epoch": 17.9, + "learning_rate": 0.0009121760738374158, + "loss": 0.1608, + "step": 28020 + }, + { + "epoch": 17.91, + "learning_rate": 0.0009121050763223287, + "loss": 0.16, + "step": 28030 + }, + { + "epoch": 17.92, + "learning_rate": 0.0009120340788072418, + "loss": 0.1383, + "step": 28040 + }, + { + "epoch": 17.92, + "learning_rate": 0.0009119630812921548, + "loss": 0.1365, + "step": 28050 + }, + { + "epoch": 17.93, + "learning_rate": 0.0009118920837770678, + "loss": 0.1274, + "step": 28060 + }, + { + "epoch": 17.94, + "learning_rate": 0.0009118210862619809, + "loss": 0.1176, + "step": 28070 + }, + { + "epoch": 17.94, + "learning_rate": 0.000911750088746894, + "loss": 0.1474, + "step": 28080 + }, + { + "epoch": 17.95, + "learning_rate": 0.0009116790912318069, + "loss": 0.1065, + "step": 28090 + }, + { + "epoch": 17.95, + "learning_rate": 0.0009116080937167199, + "loss": 0.1241, + "step": 28100 + }, + { + "epoch": 17.96, + "learning_rate": 0.000911537096201633, + "loss": 0.123, + "step": 28110 + }, + { + "epoch": 17.97, + "learning_rate": 0.0009114660986865459, + "loss": 0.1231, + "step": 28120 + }, + { + "epoch": 17.97, + "learning_rate": 0.0009113951011714591, + "loss": 0.1273, + "step": 28130 + }, + { + "epoch": 17.98, + "learning_rate": 0.000911324103656372, + "loss": 0.136, + "step": 28140 + }, + { + "epoch": 17.99, + "learning_rate": 0.0009112531061412851, + "loss": 0.1333, + "step": 28150 + }, + { + "epoch": 17.99, + "learning_rate": 0.0009111821086261981, + "loss": 0.1464, + "step": 28160 + }, + { + "epoch": 18.0, + "learning_rate": 0.0009111111111111111, + "loss": 0.1176, + "step": 28170 + }, + { + "epoch": 18.01, + "learning_rate": 0.0009110401135960241, + "loss": 0.1254, + "step": 28180 + }, + { + "epoch": 18.01, + "learning_rate": 0.0009109691160809372, + "loss": 0.111, + "step": 28190 + }, + { + "epoch": 18.02, + "learning_rate": 0.0009108981185658502, + "loss": 0.1208, + "step": 28200 + }, + { + "epoch": 18.03, + "learning_rate": 0.0009108271210507633, + "loss": 0.1233, + "step": 28210 + }, + { + "epoch": 18.03, + "learning_rate": 0.0009107561235356762, + "loss": 0.097, + "step": 28220 + }, + { + "epoch": 18.04, + "learning_rate": 0.0009106851260205893, + "loss": 0.1101, + "step": 28230 + }, + { + "epoch": 18.04, + "learning_rate": 0.0009106141285055023, + "loss": 0.0998, + "step": 28240 + }, + { + "epoch": 18.05, + "learning_rate": 0.0009105431309904153, + "loss": 0.116, + "step": 28250 + }, + { + "epoch": 18.06, + "learning_rate": 0.0009104721334753284, + "loss": 0.1068, + "step": 28260 + }, + { + "epoch": 18.06, + "learning_rate": 0.0009104011359602415, + "loss": 0.1056, + "step": 28270 + }, + { + "epoch": 18.07, + "learning_rate": 0.0009103301384451544, + "loss": 0.1343, + "step": 28280 + }, + { + "epoch": 18.08, + "learning_rate": 0.0009102591409300675, + "loss": 0.1656, + "step": 28290 + }, + { + "epoch": 18.08, + "learning_rate": 0.0009101881434149804, + "loss": 0.1124, + "step": 28300 + }, + { + "epoch": 18.09, + "learning_rate": 0.0009101171458998935, + "loss": 0.1164, + "step": 28310 + }, + { + "epoch": 18.1, + "learning_rate": 0.0009100461483848066, + "loss": 0.0916, + "step": 28320 + }, + { + "epoch": 18.1, + "learning_rate": 0.0009099751508697196, + "loss": 0.1054, + "step": 28330 + }, + { + "epoch": 18.11, + "learning_rate": 0.0009099041533546326, + "loss": 0.1102, + "step": 28340 + }, + { + "epoch": 18.11, + "learning_rate": 0.0009098331558395457, + "loss": 0.1293, + "step": 28350 + }, + { + "epoch": 18.12, + "learning_rate": 0.0009097621583244586, + "loss": 0.1283, + "step": 28360 + }, + { + "epoch": 18.13, + "learning_rate": 0.0009096911608093717, + "loss": 0.1168, + "step": 28370 + }, + { + "epoch": 18.13, + "learning_rate": 0.0009096201632942848, + "loss": 0.0809, + "step": 28380 + }, + { + "epoch": 18.14, + "learning_rate": 0.0009095491657791978, + "loss": 0.1197, + "step": 28390 + }, + { + "epoch": 18.15, + "learning_rate": 0.0009094781682641108, + "loss": 0.1149, + "step": 28400 + }, + { + "epoch": 18.15, + "learning_rate": 0.0009094071707490237, + "loss": 0.1162, + "step": 28410 + }, + { + "epoch": 18.16, + "learning_rate": 0.0009093361732339368, + "loss": 0.1258, + "step": 28420 + }, + { + "epoch": 18.17, + "learning_rate": 0.0009092651757188498, + "loss": 0.1266, + "step": 28430 + }, + { + "epoch": 18.17, + "learning_rate": 0.0009091941782037629, + "loss": 0.1502, + "step": 28440 + }, + { + "epoch": 18.18, + "learning_rate": 0.0009091231806886759, + "loss": 0.1195, + "step": 28450 + }, + { + "epoch": 18.19, + "learning_rate": 0.000909052183173589, + "loss": 0.1035, + "step": 28460 + }, + { + "epoch": 18.19, + "learning_rate": 0.0009089811856585019, + "loss": 0.1482, + "step": 28470 + }, + { + "epoch": 18.2, + "learning_rate": 0.000908910188143415, + "loss": 0.1187, + "step": 28480 + }, + { + "epoch": 18.2, + "learning_rate": 0.000908839190628328, + "loss": 0.1204, + "step": 28490 + }, + { + "epoch": 18.21, + "learning_rate": 0.0009087681931132411, + "loss": 0.1059, + "step": 28500 + }, + { + "epoch": 18.22, + "learning_rate": 0.0009086971955981541, + "loss": 0.1164, + "step": 28510 + }, + { + "epoch": 18.22, + "learning_rate": 0.0009086261980830671, + "loss": 0.1168, + "step": 28520 + }, + { + "epoch": 18.23, + "learning_rate": 0.0009085552005679801, + "loss": 0.1324, + "step": 28530 + }, + { + "epoch": 18.24, + "learning_rate": 0.0009084842030528932, + "loss": 0.124, + "step": 28540 + }, + { + "epoch": 18.24, + "learning_rate": 0.0009084132055378061, + "loss": 0.1028, + "step": 28550 + }, + { + "epoch": 18.25, + "learning_rate": 0.0009083422080227193, + "loss": 0.1057, + "step": 28560 + }, + { + "epoch": 18.26, + "learning_rate": 0.0009082712105076323, + "loss": 0.1132, + "step": 28570 + }, + { + "epoch": 18.26, + "learning_rate": 0.0009082002129925453, + "loss": 0.1164, + "step": 28580 + }, + { + "epoch": 18.27, + "learning_rate": 0.0009081292154774583, + "loss": 0.1129, + "step": 28590 + }, + { + "epoch": 18.27, + "learning_rate": 0.0009080582179623713, + "loss": 0.1185, + "step": 28600 + }, + { + "epoch": 18.28, + "learning_rate": 0.0009079872204472843, + "loss": 0.1268, + "step": 28610 + }, + { + "epoch": 18.29, + "learning_rate": 0.0009079162229321975, + "loss": 0.1184, + "step": 28620 + }, + { + "epoch": 18.29, + "learning_rate": 0.0009078452254171104, + "loss": 0.0942, + "step": 28630 + }, + { + "epoch": 18.3, + "learning_rate": 0.0009077742279020235, + "loss": 0.109, + "step": 28640 + }, + { + "epoch": 18.31, + "learning_rate": 0.0009077032303869365, + "loss": 0.0995, + "step": 28650 + }, + { + "epoch": 18.31, + "learning_rate": 0.0009076322328718495, + "loss": 0.1432, + "step": 28660 + }, + { + "epoch": 18.32, + "learning_rate": 0.0009075612353567625, + "loss": 0.1517, + "step": 28670 + }, + { + "epoch": 18.33, + "learning_rate": 0.0009074902378416757, + "loss": 0.1044, + "step": 28680 + }, + { + "epoch": 18.33, + "learning_rate": 0.0009074192403265886, + "loss": 0.0846, + "step": 28690 + }, + { + "epoch": 18.34, + "learning_rate": 0.0009073482428115017, + "loss": 0.1275, + "step": 28700 + }, + { + "epoch": 18.34, + "learning_rate": 0.0009072772452964146, + "loss": 0.0935, + "step": 28710 + }, + { + "epoch": 18.35, + "learning_rate": 0.0009072062477813276, + "loss": 0.0898, + "step": 28720 + }, + { + "epoch": 18.36, + "learning_rate": 0.0009071352502662407, + "loss": 0.127, + "step": 28730 + }, + { + "epoch": 18.36, + "learning_rate": 0.0009070642527511536, + "loss": 0.1172, + "step": 28740 + }, + { + "epoch": 18.37, + "learning_rate": 0.0009069932552360668, + "loss": 0.1069, + "step": 28750 + }, + { + "epoch": 18.38, + "learning_rate": 0.0009069222577209798, + "loss": 0.1378, + "step": 28760 + }, + { + "epoch": 18.38, + "learning_rate": 0.0009068512602058928, + "loss": 0.1268, + "step": 28770 + }, + { + "epoch": 18.39, + "learning_rate": 0.0009067802626908058, + "loss": 0.0926, + "step": 28780 + }, + { + "epoch": 18.4, + "learning_rate": 0.0009067092651757188, + "loss": 0.1208, + "step": 28790 + }, + { + "epoch": 18.4, + "learning_rate": 0.0009066382676606318, + "loss": 0.1234, + "step": 28800 + }, + { + "epoch": 18.41, + "learning_rate": 0.000906567270145545, + "loss": 0.1548, + "step": 28810 + }, + { + "epoch": 18.42, + "learning_rate": 0.0009064962726304579, + "loss": 0.1121, + "step": 28820 + }, + { + "epoch": 18.42, + "learning_rate": 0.000906425275115371, + "loss": 0.1457, + "step": 28830 + }, + { + "epoch": 18.43, + "learning_rate": 0.000906354277600284, + "loss": 0.1252, + "step": 28840 + }, + { + "epoch": 18.43, + "learning_rate": 0.000906283280085197, + "loss": 0.1345, + "step": 28850 + }, + { + "epoch": 18.44, + "learning_rate": 0.00090621228257011, + "loss": 0.1101, + "step": 28860 + }, + { + "epoch": 18.45, + "learning_rate": 0.0009061412850550232, + "loss": 0.1146, + "step": 28870 + }, + { + "epoch": 18.45, + "learning_rate": 0.0009060702875399361, + "loss": 0.1232, + "step": 28880 + }, + { + "epoch": 18.46, + "learning_rate": 0.0009059992900248492, + "loss": 0.1239, + "step": 28890 + }, + { + "epoch": 18.47, + "learning_rate": 0.0009059282925097621, + "loss": 0.1202, + "step": 28900 + }, + { + "epoch": 18.47, + "learning_rate": 0.0009058572949946752, + "loss": 0.1124, + "step": 28910 + }, + { + "epoch": 18.48, + "learning_rate": 0.0009057862974795882, + "loss": 0.1148, + "step": 28920 + }, + { + "epoch": 18.49, + "learning_rate": 0.0009057152999645013, + "loss": 0.1111, + "step": 28930 + }, + { + "epoch": 18.49, + "learning_rate": 0.0009056443024494143, + "loss": 0.1243, + "step": 28940 + }, + { + "epoch": 18.5, + "learning_rate": 0.0009055733049343274, + "loss": 0.1227, + "step": 28950 + }, + { + "epoch": 18.5, + "learning_rate": 0.0009055023074192403, + "loss": 0.0993, + "step": 28960 + }, + { + "epoch": 18.51, + "learning_rate": 0.0009054313099041534, + "loss": 0.1286, + "step": 28970 + }, + { + "epoch": 18.52, + "learning_rate": 0.0009053603123890663, + "loss": 0.1054, + "step": 28980 + }, + { + "epoch": 18.52, + "learning_rate": 0.0009052893148739795, + "loss": 0.1196, + "step": 28990 + }, + { + "epoch": 18.53, + "learning_rate": 0.0009052183173588925, + "loss": 0.1255, + "step": 29000 + }, + { + "epoch": 18.54, + "learning_rate": 0.0009051473198438055, + "loss": 0.1325, + "step": 29010 + }, + { + "epoch": 18.54, + "learning_rate": 0.0009050763223287185, + "loss": 0.1161, + "step": 29020 + }, + { + "epoch": 18.55, + "learning_rate": 0.0009050053248136315, + "loss": 0.1144, + "step": 29030 + }, + { + "epoch": 18.56, + "learning_rate": 0.0009049343272985445, + "loss": 0.1293, + "step": 29040 + }, + { + "epoch": 18.56, + "learning_rate": 0.0009048633297834576, + "loss": 0.1284, + "step": 29050 + }, + { + "epoch": 18.57, + "learning_rate": 0.0009047923322683707, + "loss": 0.1086, + "step": 29060 + }, + { + "epoch": 18.57, + "learning_rate": 0.0009047213347532836, + "loss": 0.1142, + "step": 29070 + }, + { + "epoch": 18.58, + "learning_rate": 0.0009046503372381967, + "loss": 0.1223, + "step": 29080 + }, + { + "epoch": 18.59, + "learning_rate": 0.0009045793397231096, + "loss": 0.137, + "step": 29090 + }, + { + "epoch": 18.59, + "learning_rate": 0.0009045083422080227, + "loss": 0.1287, + "step": 29100 + }, + { + "epoch": 18.6, + "learning_rate": 0.0009044373446929358, + "loss": 0.1208, + "step": 29110 + }, + { + "epoch": 18.61, + "learning_rate": 0.0009043663471778488, + "loss": 0.1086, + "step": 29120 + }, + { + "epoch": 18.61, + "learning_rate": 0.0009042953496627618, + "loss": 0.125, + "step": 29130 + }, + { + "epoch": 18.62, + "learning_rate": 0.0009042243521476749, + "loss": 0.1193, + "step": 29140 + }, + { + "epoch": 18.63, + "learning_rate": 0.0009041533546325878, + "loss": 0.1054, + "step": 29150 + }, + { + "epoch": 18.63, + "learning_rate": 0.0009040823571175009, + "loss": 0.1231, + "step": 29160 + }, + { + "epoch": 18.64, + "learning_rate": 0.000904011359602414, + "loss": 0.1086, + "step": 29170 + }, + { + "epoch": 18.65, + "learning_rate": 0.000903940362087327, + "loss": 0.1391, + "step": 29180 + }, + { + "epoch": 18.65, + "learning_rate": 0.00090386936457224, + "loss": 0.1039, + "step": 29190 + }, + { + "epoch": 18.66, + "learning_rate": 0.000903798367057153, + "loss": 0.14, + "step": 29200 + }, + { + "epoch": 18.66, + "learning_rate": 0.000903727369542066, + "loss": 0.1216, + "step": 29210 + }, + { + "epoch": 18.67, + "learning_rate": 0.0009036563720269791, + "loss": 0.1337, + "step": 29220 + }, + { + "epoch": 18.68, + "learning_rate": 0.0009035853745118921, + "loss": 0.1436, + "step": 29230 + }, + { + "epoch": 18.68, + "learning_rate": 0.0009035143769968052, + "loss": 0.1275, + "step": 29240 + }, + { + "epoch": 18.69, + "learning_rate": 0.0009034433794817182, + "loss": 0.1191, + "step": 29250 + }, + { + "epoch": 18.7, + "learning_rate": 0.0009033723819666312, + "loss": 0.1228, + "step": 29260 + }, + { + "epoch": 18.7, + "learning_rate": 0.0009033013844515442, + "loss": 0.1387, + "step": 29270 + }, + { + "epoch": 18.71, + "learning_rate": 0.0009032303869364573, + "loss": 0.146, + "step": 29280 + }, + { + "epoch": 18.72, + "learning_rate": 0.0009031593894213703, + "loss": 0.1387, + "step": 29290 + }, + { + "epoch": 18.72, + "learning_rate": 0.0009030883919062834, + "loss": 0.0893, + "step": 29300 + }, + { + "epoch": 18.73, + "learning_rate": 0.0009030173943911963, + "loss": 0.1221, + "step": 29310 + }, + { + "epoch": 18.73, + "learning_rate": 0.0009029463968761094, + "loss": 0.1158, + "step": 29320 + }, + { + "epoch": 18.74, + "learning_rate": 0.0009028753993610224, + "loss": 0.1393, + "step": 29330 + }, + { + "epoch": 18.75, + "learning_rate": 0.0009028044018459353, + "loss": 0.1195, + "step": 29340 + }, + { + "epoch": 18.75, + "learning_rate": 0.0009027334043308484, + "loss": 0.1365, + "step": 29350 + }, + { + "epoch": 18.76, + "learning_rate": 0.0009026624068157615, + "loss": 0.1153, + "step": 29360 + }, + { + "epoch": 18.77, + "learning_rate": 0.0009025914093006745, + "loss": 0.1396, + "step": 29370 + }, + { + "epoch": 18.77, + "learning_rate": 0.0009025204117855875, + "loss": 0.1326, + "step": 29380 + }, + { + "epoch": 18.78, + "learning_rate": 0.0009024494142705005, + "loss": 0.1302, + "step": 29390 + }, + { + "epoch": 18.79, + "learning_rate": 0.0009023784167554135, + "loss": 0.142, + "step": 29400 + }, + { + "epoch": 18.79, + "learning_rate": 0.0009023074192403266, + "loss": 0.1146, + "step": 29410 + }, + { + "epoch": 18.8, + "learning_rate": 0.0009022364217252396, + "loss": 0.1048, + "step": 29420 + }, + { + "epoch": 18.8, + "learning_rate": 0.0009021654242101527, + "loss": 0.1186, + "step": 29430 + }, + { + "epoch": 18.81, + "learning_rate": 0.0009020944266950657, + "loss": 0.1196, + "step": 29440 + }, + { + "epoch": 18.82, + "learning_rate": 0.0009020234291799787, + "loss": 0.1468, + "step": 29450 + }, + { + "epoch": 18.82, + "learning_rate": 0.0009019524316648917, + "loss": 0.1145, + "step": 29460 + }, + { + "epoch": 18.83, + "learning_rate": 0.0009018814341498048, + "loss": 0.112, + "step": 29470 + }, + { + "epoch": 18.84, + "learning_rate": 0.0009018104366347178, + "loss": 0.1304, + "step": 29480 + }, + { + "epoch": 18.84, + "learning_rate": 0.0009017394391196309, + "loss": 0.1395, + "step": 29490 + }, + { + "epoch": 18.85, + "learning_rate": 0.0009016684416045438, + "loss": 0.1276, + "step": 29500 + }, + { + "epoch": 18.86, + "learning_rate": 0.0009015974440894569, + "loss": 0.097, + "step": 29510 + }, + { + "epoch": 18.86, + "learning_rate": 0.0009015264465743699, + "loss": 0.1287, + "step": 29520 + }, + { + "epoch": 18.87, + "learning_rate": 0.0009014554490592829, + "loss": 0.1234, + "step": 29530 + }, + { + "epoch": 18.88, + "learning_rate": 0.000901384451544196, + "loss": 0.1509, + "step": 29540 + }, + { + "epoch": 18.88, + "learning_rate": 0.0009013134540291091, + "loss": 0.1317, + "step": 29550 + }, + { + "epoch": 18.89, + "learning_rate": 0.000901242456514022, + "loss": 0.1198, + "step": 29560 + }, + { + "epoch": 18.89, + "learning_rate": 0.0009011714589989351, + "loss": 0.1212, + "step": 29570 + }, + { + "epoch": 18.9, + "learning_rate": 0.000901100461483848, + "loss": 0.1124, + "step": 29580 + }, + { + "epoch": 18.91, + "learning_rate": 0.0009010294639687611, + "loss": 0.1289, + "step": 29590 + }, + { + "epoch": 18.91, + "learning_rate": 0.0009009584664536742, + "loss": 0.0943, + "step": 29600 + }, + { + "epoch": 18.92, + "learning_rate": 0.0009008874689385872, + "loss": 0.1308, + "step": 29610 + }, + { + "epoch": 18.93, + "learning_rate": 0.0009008164714235002, + "loss": 0.1405, + "step": 29620 + }, + { + "epoch": 18.93, + "learning_rate": 0.0009007454739084133, + "loss": 0.1067, + "step": 29630 + }, + { + "epoch": 18.94, + "learning_rate": 0.0009006744763933262, + "loss": 0.1219, + "step": 29640 + }, + { + "epoch": 18.95, + "learning_rate": 0.0009006034788782392, + "loss": 0.1205, + "step": 29650 + }, + { + "epoch": 18.95, + "learning_rate": 0.0009005324813631524, + "loss": 0.1326, + "step": 29660 + }, + { + "epoch": 18.96, + "learning_rate": 0.0009004614838480653, + "loss": 0.1112, + "step": 29670 + }, + { + "epoch": 18.96, + "learning_rate": 0.0009003904863329784, + "loss": 0.1184, + "step": 29680 + }, + { + "epoch": 18.97, + "learning_rate": 0.0009003194888178913, + "loss": 0.1426, + "step": 29690 + }, + { + "epoch": 18.98, + "learning_rate": 0.0009002484913028044, + "loss": 0.1358, + "step": 29700 + }, + { + "epoch": 18.98, + "learning_rate": 0.0009001774937877174, + "loss": 0.1562, + "step": 29710 + }, + { + "epoch": 18.99, + "learning_rate": 0.0009001064962726305, + "loss": 0.1396, + "step": 29720 + }, + { + "epoch": 19.0, + "learning_rate": 0.0009000354987575435, + "loss": 0.1173, + "step": 29730 + }, + { + "epoch": 19.0, + "learning_rate": 0.0008999645012424566, + "loss": 0.1017, + "step": 29740 + }, + { + "epoch": 19.01, + "learning_rate": 0.0008998935037273695, + "loss": 0.1158, + "step": 29750 + }, + { + "epoch": 19.02, + "learning_rate": 0.0008998225062122826, + "loss": 0.109, + "step": 29760 + }, + { + "epoch": 19.02, + "learning_rate": 0.0008997515086971955, + "loss": 0.1313, + "step": 29770 + }, + { + "epoch": 19.03, + "learning_rate": 0.0008996805111821087, + "loss": 0.1366, + "step": 29780 + }, + { + "epoch": 19.04, + "learning_rate": 0.0008996095136670217, + "loss": 0.1002, + "step": 29790 + }, + { + "epoch": 19.04, + "learning_rate": 0.0008995385161519347, + "loss": 0.0708, + "step": 29800 + }, + { + "epoch": 19.05, + "learning_rate": 0.0008994675186368477, + "loss": 0.1137, + "step": 29810 + }, + { + "epoch": 19.05, + "learning_rate": 0.0008993965211217608, + "loss": 0.0956, + "step": 29820 + }, + { + "epoch": 19.06, + "learning_rate": 0.0008993255236066737, + "loss": 0.1274, + "step": 29830 + }, + { + "epoch": 19.07, + "learning_rate": 0.0008992545260915869, + "loss": 0.1048, + "step": 29840 + }, + { + "epoch": 19.07, + "learning_rate": 0.0008991835285764999, + "loss": 0.0961, + "step": 29850 + }, + { + "epoch": 19.08, + "learning_rate": 0.0008991125310614129, + "loss": 0.125, + "step": 29860 + }, + { + "epoch": 19.09, + "learning_rate": 0.0008990415335463259, + "loss": 0.1302, + "step": 29870 + }, + { + "epoch": 19.09, + "learning_rate": 0.000898970536031239, + "loss": 0.1347, + "step": 29880 + }, + { + "epoch": 19.1, + "learning_rate": 0.0008988995385161519, + "loss": 0.1157, + "step": 29890 + }, + { + "epoch": 19.11, + "learning_rate": 0.0008988285410010651, + "loss": 0.1186, + "step": 29900 + }, + { + "epoch": 19.11, + "learning_rate": 0.000898757543485978, + "loss": 0.1363, + "step": 29910 + }, + { + "epoch": 19.12, + "learning_rate": 0.0008986865459708911, + "loss": 0.1319, + "step": 29920 + }, + { + "epoch": 19.12, + "learning_rate": 0.0008986155484558041, + "loss": 0.1121, + "step": 29930 + }, + { + "epoch": 19.13, + "learning_rate": 0.0008985445509407171, + "loss": 0.0922, + "step": 29940 + }, + { + "epoch": 19.14, + "learning_rate": 0.0008984735534256301, + "loss": 0.1178, + "step": 29950 + }, + { + "epoch": 19.14, + "learning_rate": 0.000898402555910543, + "loss": 0.1193, + "step": 29960 + }, + { + "epoch": 19.15, + "learning_rate": 0.0008983315583954562, + "loss": 0.111, + "step": 29970 + }, + { + "epoch": 19.16, + "learning_rate": 0.0008982605608803692, + "loss": 0.0945, + "step": 29980 + }, + { + "epoch": 19.16, + "learning_rate": 0.0008981895633652822, + "loss": 0.119, + "step": 29990 + }, + { + "epoch": 19.17, + "learning_rate": 0.0008981185658501952, + "loss": 0.1203, + "step": 30000 + } + ], + "max_steps": 156500, + "num_train_epochs": 100, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +}