{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5, "global_step": 13971, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.8571428571428575e-07, "loss": 10.6891, "step": 10 }, { "epoch": 0.0, "learning_rate": 7.61904761904762e-07, "loss": 10.6539, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.2380952380952382e-06, "loss": 10.6617, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.7142857142857145e-06, "loss": 10.6156, "step": 40 }, { "epoch": 0.01, "learning_rate": 2.1904761904761908e-06, "loss": 10.6289, "step": 50 }, { "epoch": 0.01, "learning_rate": 2.666666666666667e-06, "loss": 10.6148, "step": 60 }, { "epoch": 0.01, "learning_rate": 3.142857142857143e-06, "loss": 10.5672, "step": 70 }, { "epoch": 0.01, "learning_rate": 3.6190476190476194e-06, "loss": 10.5367, "step": 80 }, { "epoch": 0.01, "learning_rate": 4.095238095238096e-06, "loss": 10.5062, "step": 90 }, { "epoch": 0.01, "learning_rate": 4.571428571428572e-06, "loss": 10.4492, "step": 100 }, { "epoch": 0.01, "learning_rate": 5.047619047619048e-06, "loss": 10.4242, "step": 110 }, { "epoch": 0.01, "learning_rate": 5.523809523809525e-06, "loss": 10.3461, "step": 120 }, { "epoch": 0.01, "learning_rate": 6e-06, "loss": 10.2758, "step": 130 }, { "epoch": 0.02, "learning_rate": 6.476190476190477e-06, "loss": 10.2016, "step": 140 }, { "epoch": 0.02, "learning_rate": 6.952380952380952e-06, "loss": 10.132, "step": 150 }, { "epoch": 0.02, "learning_rate": 7.428571428571429e-06, "loss": 10.0852, "step": 160 }, { "epoch": 0.02, "learning_rate": 7.904761904761904e-06, "loss": 10.0258, "step": 170 }, { "epoch": 0.02, "learning_rate": 8.380952380952382e-06, "loss": 10.0055, "step": 180 }, { "epoch": 0.02, "learning_rate": 8.857142857142858e-06, "loss": 9.9734, "step": 190 }, { "epoch": 0.02, "learning_rate": 9.333333333333334e-06, "loss": 9.9578, "step": 200 }, { "epoch": 0.02, "learning_rate": 9.80952380952381e-06, "loss": 9.9516, "step": 210 }, { "epoch": 0.02, "learning_rate": 1.0285714285714285e-05, "loss": 9.9172, "step": 220 }, { "epoch": 0.02, "learning_rate": 1.0761904761904763e-05, "loss": 9.8992, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.1238095238095239e-05, "loss": 9.8641, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.1714285714285716e-05, "loss": 9.8609, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.2190476190476192e-05, "loss": 9.8406, "step": 260 }, { "epoch": 0.03, "learning_rate": 1.2666666666666667e-05, "loss": 9.7977, "step": 270 }, { "epoch": 0.03, "learning_rate": 1.3142857142857145e-05, "loss": 9.7937, "step": 280 }, { "epoch": 0.03, "learning_rate": 1.361904761904762e-05, "loss": 9.7711, "step": 290 }, { "epoch": 0.03, "learning_rate": 1.4095238095238097e-05, "loss": 9.7352, "step": 300 }, { "epoch": 0.03, "learning_rate": 1.4571428571428573e-05, "loss": 9.7195, "step": 310 }, { "epoch": 0.03, "learning_rate": 1.5047619047619049e-05, "loss": 9.7, "step": 320 }, { "epoch": 0.04, "learning_rate": 1.5523809523809525e-05, "loss": 9.6523, "step": 330 }, { "epoch": 0.04, "learning_rate": 1.6000000000000003e-05, "loss": 9.593, "step": 340 }, { "epoch": 0.04, "learning_rate": 1.6476190476190477e-05, "loss": 9.5703, "step": 350 }, { "epoch": 0.04, "learning_rate": 1.6952380952380955e-05, "loss": 9.5336, "step": 360 }, { "epoch": 0.04, "learning_rate": 1.742857142857143e-05, "loss": 9.5039, "step": 370 }, { "epoch": 0.04, "learning_rate": 1.7904761904761907e-05, "loss": 9.418, "step": 380 }, { "epoch": 0.04, "learning_rate": 1.838095238095238e-05, "loss": 9.3305, "step": 390 }, { "epoch": 0.04, "learning_rate": 1.885714285714286e-05, "loss": 9.3203, "step": 400 }, { "epoch": 0.04, "learning_rate": 1.9333333333333333e-05, "loss": 9.207, "step": 410 }, { "epoch": 0.05, "learning_rate": 1.980952380952381e-05, "loss": 9.1086, "step": 420 }, { "epoch": 0.05, "learning_rate": 1.9999990325478594e-05, "loss": 8.9594, "step": 430 }, { "epoch": 0.05, "learning_rate": 1.9999931203471123e-05, "loss": 8.5867, "step": 440 }, { "epoch": 0.05, "learning_rate": 1.9999818334507674e-05, "loss": 8.1969, "step": 450 }, { "epoch": 0.05, "learning_rate": 1.9999651719194886e-05, "loss": 7.7641, "step": 460 }, { "epoch": 0.05, "learning_rate": 1.9999431358428275e-05, "loss": 7.366, "step": 470 }, { "epoch": 0.05, "learning_rate": 1.999915725339222e-05, "loss": 7.082, "step": 480 }, { "epoch": 0.05, "learning_rate": 1.9998829405559963e-05, "loss": 6.9242, "step": 490 }, { "epoch": 0.05, "learning_rate": 1.9998447816693596e-05, "loss": 6.7367, "step": 500 }, { "epoch": 0.05, "learning_rate": 1.999801248884406e-05, "loss": 6.6734, "step": 510 }, { "epoch": 0.06, "learning_rate": 1.9997523424351122e-05, "loss": 6.5676, "step": 520 }, { "epoch": 0.06, "learning_rate": 1.999698062584338e-05, "loss": 6.4836, "step": 530 }, { "epoch": 0.06, "learning_rate": 1.999638409623822e-05, "loss": 6.4262, "step": 540 }, { "epoch": 0.06, "learning_rate": 1.999573383874184e-05, "loss": 6.4371, "step": 550 }, { "epoch": 0.06, "learning_rate": 1.9995029856849192e-05, "loss": 6.3914, "step": 560 }, { "epoch": 0.06, "learning_rate": 1.9994272154343995e-05, "loss": 6.3473, "step": 570 }, { "epoch": 0.06, "learning_rate": 1.9993460735298695e-05, "loss": 6.3812, "step": 580 }, { "epoch": 0.06, "learning_rate": 1.9992595604074457e-05, "loss": 6.3656, "step": 590 }, { "epoch": 0.06, "learning_rate": 1.9991676765321124e-05, "loss": 6.3387, "step": 600 }, { "epoch": 0.07, "learning_rate": 1.999070422397721e-05, "loss": 6.2977, "step": 610 }, { "epoch": 0.07, "learning_rate": 1.998967798526987e-05, "loss": 6.3191, "step": 620 }, { "epoch": 0.07, "learning_rate": 1.9988598054714854e-05, "loss": 6.2926, "step": 630 }, { "epoch": 0.07, "learning_rate": 1.9987464438116506e-05, "loss": 6.2914, "step": 640 }, { "epoch": 0.07, "learning_rate": 1.998627714156771e-05, "loss": 6.2887, "step": 650 }, { "epoch": 0.07, "learning_rate": 1.9985036171449868e-05, "loss": 6.2414, "step": 660 }, { "epoch": 0.07, "learning_rate": 1.998374153443286e-05, "loss": 6.2742, "step": 670 }, { "epoch": 0.07, "learning_rate": 1.998239323747502e-05, "loss": 6.248, "step": 680 }, { "epoch": 0.07, "learning_rate": 1.9980991287823076e-05, "loss": 6.2629, "step": 690 }, { "epoch": 0.08, "learning_rate": 1.997953569301214e-05, "loss": 6.2496, "step": 700 }, { "epoch": 0.08, "learning_rate": 1.9978026460865634e-05, "loss": 6.2461, "step": 710 }, { "epoch": 0.08, "learning_rate": 1.997646359949529e-05, "loss": 6.2062, "step": 720 }, { "epoch": 0.08, "learning_rate": 1.9974847117301062e-05, "loss": 6.2539, "step": 730 }, { "epoch": 0.08, "learning_rate": 1.997317702297111e-05, "loss": 6.2402, "step": 740 }, { "epoch": 0.08, "learning_rate": 1.997145332548175e-05, "loss": 6.2105, "step": 750 }, { "epoch": 0.08, "learning_rate": 1.9969676034097386e-05, "loss": 6.1992, "step": 760 }, { "epoch": 0.08, "learning_rate": 1.996784515837049e-05, "loss": 6.2246, "step": 770 }, { "epoch": 0.08, "learning_rate": 1.9965960708141532e-05, "loss": 6.2129, "step": 780 }, { "epoch": 0.08, "learning_rate": 1.996402269353892e-05, "loss": 6.2012, "step": 790 }, { "epoch": 0.09, "learning_rate": 1.9962031124978974e-05, "loss": 6.1855, "step": 800 }, { "epoch": 0.09, "learning_rate": 1.995998601316583e-05, "loss": 6.1867, "step": 810 }, { "epoch": 0.09, "learning_rate": 1.9957887369091427e-05, "loss": 6.232, "step": 820 }, { "epoch": 0.09, "learning_rate": 1.9955735204035412e-05, "loss": 6.2332, "step": 830 }, { "epoch": 0.09, "learning_rate": 1.9953529529565098e-05, "loss": 6.1937, "step": 840 }, { "epoch": 0.09, "learning_rate": 1.9951270357535397e-05, "loss": 6.2062, "step": 850 }, { "epoch": 0.09, "learning_rate": 1.9948957700088747e-05, "loss": 6.2254, "step": 860 }, { "epoch": 0.09, "learning_rate": 1.9946591569655073e-05, "loss": 6.2078, "step": 870 }, { "epoch": 0.09, "learning_rate": 1.9944171978951687e-05, "loss": 6.1742, "step": 880 }, { "epoch": 0.1, "learning_rate": 1.9941698940983243e-05, "loss": 6.2, "step": 890 }, { "epoch": 0.1, "learning_rate": 1.993917246904166e-05, "loss": 6.1895, "step": 900 }, { "epoch": 0.1, "learning_rate": 1.9936592576706048e-05, "loss": 6.1953, "step": 910 }, { "epoch": 0.1, "learning_rate": 1.993395927784264e-05, "loss": 6.1914, "step": 920 }, { "epoch": 0.1, "learning_rate": 1.9931272586604712e-05, "loss": 6.1883, "step": 930 }, { "epoch": 0.1, "learning_rate": 1.992853251743251e-05, "loss": 6.202, "step": 940 }, { "epoch": 0.1, "learning_rate": 1.9925739085053175e-05, "loss": 6.193, "step": 950 }, { "epoch": 0.1, "learning_rate": 1.9922892304480657e-05, "loss": 6.177, "step": 960 }, { "epoch": 0.1, "learning_rate": 1.991999219101564e-05, "loss": 6.1566, "step": 970 }, { "epoch": 0.11, "learning_rate": 1.9917038760245463e-05, "loss": 6.1863, "step": 980 }, { "epoch": 0.11, "learning_rate": 1.991403202804402e-05, "loss": 6.1723, "step": 990 }, { "epoch": 0.11, "learning_rate": 1.99109720105717e-05, "loss": 6.1937, "step": 1000 }, { "epoch": 0.11, "learning_rate": 1.9907858724275272e-05, "loss": 6.1742, "step": 1010 }, { "epoch": 0.11, "learning_rate": 1.990469218588782e-05, "loss": 6.1789, "step": 1020 }, { "epoch": 0.11, "learning_rate": 1.990147241242864e-05, "loss": 6.1598, "step": 1030 }, { "epoch": 0.11, "learning_rate": 1.989819942120315e-05, "loss": 6.1691, "step": 1040 }, { "epoch": 0.11, "learning_rate": 1.989487322980281e-05, "loss": 6.1512, "step": 1050 }, { "epoch": 0.11, "learning_rate": 1.9891493856105007e-05, "loss": 6.1652, "step": 1060 }, { "epoch": 0.11, "learning_rate": 1.988806131827297e-05, "loss": 6.1574, "step": 1070 }, { "epoch": 0.12, "learning_rate": 1.9884575634755667e-05, "loss": 6.1645, "step": 1080 }, { "epoch": 0.12, "learning_rate": 1.9881036824287724e-05, "loss": 6.1684, "step": 1090 }, { "epoch": 0.12, "learning_rate": 1.9877444905889293e-05, "loss": 6.1473, "step": 1100 }, { "epoch": 0.12, "learning_rate": 1.987379989886598e-05, "loss": 6.1648, "step": 1110 }, { "epoch": 0.12, "learning_rate": 1.9870101822808717e-05, "loss": 6.1359, "step": 1120 }, { "epoch": 0.12, "learning_rate": 1.9866350697593682e-05, "loss": 6.184, "step": 1130 }, { "epoch": 0.12, "learning_rate": 1.9862546543382163e-05, "loss": 6.1445, "step": 1140 }, { "epoch": 0.12, "learning_rate": 1.985868938062048e-05, "loss": 6.098, "step": 1150 }, { "epoch": 0.12, "learning_rate": 1.9854779230039838e-05, "loss": 6.1434, "step": 1160 }, { "epoch": 0.13, "learning_rate": 1.985081611265626e-05, "loss": 6.157, "step": 1170 }, { "epoch": 0.13, "learning_rate": 1.9846800049770444e-05, "loss": 6.1484, "step": 1180 }, { "epoch": 0.13, "learning_rate": 1.9842731062967647e-05, "loss": 6.148, "step": 1190 }, { "epoch": 0.13, "learning_rate": 1.9838609174117588e-05, "loss": 6.1594, "step": 1200 }, { "epoch": 0.13, "learning_rate": 1.983443440537432e-05, "loss": 6.1492, "step": 1210 }, { "epoch": 0.13, "learning_rate": 1.9830206779176103e-05, "loss": 6.1266, "step": 1220 }, { "epoch": 0.13, "learning_rate": 1.9825926318245302e-05, "loss": 6.1281, "step": 1230 }, { "epoch": 0.13, "learning_rate": 1.982159304558825e-05, "loss": 6.1383, "step": 1240 }, { "epoch": 0.13, "learning_rate": 1.9817206984495123e-05, "loss": 6.1152, "step": 1250 }, { "epoch": 0.14, "learning_rate": 1.981276815853983e-05, "loss": 6.1406, "step": 1260 }, { "epoch": 0.14, "learning_rate": 1.9808276591579875e-05, "loss": 6.1707, "step": 1270 }, { "epoch": 0.14, "learning_rate": 1.980373230775623e-05, "loss": 6.143, "step": 1280 }, { "epoch": 0.14, "learning_rate": 1.9799135331493202e-05, "loss": 6.118, "step": 1290 }, { "epoch": 0.14, "learning_rate": 1.979448568749831e-05, "loss": 6.1461, "step": 1300 }, { "epoch": 0.14, "learning_rate": 1.9789783400762148e-05, "loss": 6.1363, "step": 1310 }, { "epoch": 0.14, "learning_rate": 1.9785028496558247e-05, "loss": 6.1434, "step": 1320 }, { "epoch": 0.14, "learning_rate": 1.978022100044295e-05, "loss": 6.1383, "step": 1330 }, { "epoch": 0.14, "learning_rate": 1.977536093825526e-05, "loss": 6.1434, "step": 1340 }, { "epoch": 0.14, "learning_rate": 1.977044833611671e-05, "loss": 6.1512, "step": 1350 }, { "epoch": 0.15, "learning_rate": 1.9765483220431227e-05, "loss": 6.1508, "step": 1360 }, { "epoch": 0.15, "learning_rate": 1.9760465617884978e-05, "loss": 6.1277, "step": 1370 }, { "epoch": 0.15, "learning_rate": 1.9755395555446233e-05, "loss": 6.1164, "step": 1380 }, { "epoch": 0.15, "learning_rate": 1.9750273060365225e-05, "loss": 6.1246, "step": 1390 }, { "epoch": 0.15, "learning_rate": 1.974509816017399e-05, "loss": 6.123, "step": 1400 }, { "epoch": 0.15, "learning_rate": 1.973987088268624e-05, "loss": 6.1355, "step": 1410 }, { "epoch": 0.15, "learning_rate": 1.973459125599719e-05, "loss": 6.1129, "step": 1420 }, { "epoch": 0.15, "learning_rate": 1.9729259308483418e-05, "loss": 6.1164, "step": 1430 }, { "epoch": 0.15, "learning_rate": 1.9723875068802722e-05, "loss": 6.1363, "step": 1440 }, { "epoch": 0.16, "learning_rate": 1.971843856589395e-05, "loss": 6.1133, "step": 1450 }, { "epoch": 0.16, "learning_rate": 1.9712949828976844e-05, "loss": 6.1063, "step": 1460 }, { "epoch": 0.16, "learning_rate": 1.9707408887551906e-05, "loss": 6.1172, "step": 1470 }, { "epoch": 0.16, "learning_rate": 1.9701815771400206e-05, "loss": 6.1316, "step": 1480 }, { "epoch": 0.16, "learning_rate": 1.9696170510583255e-05, "loss": 6.1344, "step": 1490 }, { "epoch": 0.16, "learning_rate": 1.9690473135442815e-05, "loss": 6.1324, "step": 1500 }, { "epoch": 0.16, "learning_rate": 1.9684723676600758e-05, "loss": 6.1133, "step": 1510 }, { "epoch": 0.16, "learning_rate": 1.9678922164958886e-05, "loss": 6.1039, "step": 1520 }, { "epoch": 0.16, "learning_rate": 1.9673068631698773e-05, "loss": 6.1102, "step": 1530 }, { "epoch": 0.17, "learning_rate": 1.9667163108281594e-05, "loss": 6.1246, "step": 1540 }, { "epoch": 0.17, "learning_rate": 1.9661205626447954e-05, "loss": 6.1172, "step": 1550 }, { "epoch": 0.17, "learning_rate": 1.9655196218217734e-05, "loss": 6.118, "step": 1560 }, { "epoch": 0.17, "learning_rate": 1.9649134915889886e-05, "loss": 6.1008, "step": 1570 }, { "epoch": 0.17, "learning_rate": 1.96430217520423e-05, "loss": 6.1324, "step": 1580 }, { "epoch": 0.17, "learning_rate": 1.9636856759531586e-05, "loss": 6.1219, "step": 1590 }, { "epoch": 0.17, "learning_rate": 1.9630639971492938e-05, "loss": 6.1375, "step": 1600 }, { "epoch": 0.17, "learning_rate": 1.9624371421339926e-05, "loss": 6.0957, "step": 1610 }, { "epoch": 0.17, "learning_rate": 1.961805114276433e-05, "loss": 6.0938, "step": 1620 }, { "epoch": 0.18, "learning_rate": 1.961167916973596e-05, "loss": 6.1246, "step": 1630 }, { "epoch": 0.18, "learning_rate": 1.9605255536502463e-05, "loss": 6.1367, "step": 1640 }, { "epoch": 0.18, "learning_rate": 1.959878027758915e-05, "loss": 6.1066, "step": 1650 }, { "epoch": 0.18, "learning_rate": 1.959225342779881e-05, "loss": 6.1473, "step": 1660 }, { "epoch": 0.18, "learning_rate": 1.9585675022211514e-05, "loss": 6.118, "step": 1670 }, { "epoch": 0.18, "learning_rate": 1.9579045096184433e-05, "loss": 6.1016, "step": 1680 }, { "epoch": 0.18, "learning_rate": 1.9572363685351642e-05, "loss": 6.116, "step": 1690 }, { "epoch": 0.18, "learning_rate": 1.9565630825623945e-05, "loss": 6.1184, "step": 1700 }, { "epoch": 0.18, "learning_rate": 1.955884655318865e-05, "loss": 6.1043, "step": 1710 }, { "epoch": 0.18, "learning_rate": 1.9552010904509424e-05, "loss": 6.1344, "step": 1720 }, { "epoch": 0.19, "learning_rate": 1.954512391632604e-05, "loss": 6.1055, "step": 1730 }, { "epoch": 0.19, "learning_rate": 1.9538185625654216e-05, "loss": 6.0887, "step": 1740 }, { "epoch": 0.19, "learning_rate": 1.9531196069785414e-05, "loss": 6.107, "step": 1750 }, { "epoch": 0.19, "learning_rate": 1.952415528628663e-05, "loss": 6.1047, "step": 1760 }, { "epoch": 0.19, "learning_rate": 1.9517063313000184e-05, "loss": 6.0969, "step": 1770 }, { "epoch": 0.19, "learning_rate": 1.950992018804354e-05, "loss": 6.1191, "step": 1780 }, { "epoch": 0.19, "learning_rate": 1.9502725949809086e-05, "loss": 6.1414, "step": 1790 }, { "epoch": 0.19, "learning_rate": 1.949548063696393e-05, "loss": 6.1145, "step": 1800 }, { "epoch": 0.19, "learning_rate": 1.9488184288449684e-05, "loss": 6.1063, "step": 1810 }, { "epoch": 0.2, "learning_rate": 1.9480836943482275e-05, "loss": 6.0973, "step": 1820 }, { "epoch": 0.2, "learning_rate": 1.9473438641551715e-05, "loss": 6.1227, "step": 1830 }, { "epoch": 0.2, "learning_rate": 1.9465989422421903e-05, "loss": 6.1027, "step": 1840 }, { "epoch": 0.2, "learning_rate": 1.9458489326130395e-05, "loss": 6.0707, "step": 1850 }, { "epoch": 0.2, "learning_rate": 1.9450938392988208e-05, "loss": 6.1297, "step": 1860 }, { "epoch": 0.2, "learning_rate": 1.9443336663579583e-05, "loss": 6.1074, "step": 1870 }, { "epoch": 0.2, "learning_rate": 1.943568417876178e-05, "loss": 6.134, "step": 1880 }, { "epoch": 0.2, "learning_rate": 1.942798097966487e-05, "loss": 6.1004, "step": 1890 }, { "epoch": 0.2, "learning_rate": 1.942022710769148e-05, "loss": 6.1297, "step": 1900 }, { "epoch": 0.21, "learning_rate": 1.9412422604516594e-05, "loss": 6.1051, "step": 1910 }, { "epoch": 0.21, "learning_rate": 1.9404567512087338e-05, "loss": 6.0984, "step": 1920 }, { "epoch": 0.21, "learning_rate": 1.9396661872622728e-05, "loss": 6.0879, "step": 1930 }, { "epoch": 0.21, "learning_rate": 1.9388705728613465e-05, "loss": 6.0891, "step": 1940 }, { "epoch": 0.21, "learning_rate": 1.9380699122821698e-05, "loss": 6.1137, "step": 1950 }, { "epoch": 0.21, "learning_rate": 1.9372642098280785e-05, "loss": 6.0914, "step": 1960 }, { "epoch": 0.21, "learning_rate": 1.936453469829508e-05, "loss": 6.1051, "step": 1970 }, { "epoch": 0.21, "learning_rate": 1.9356376966439694e-05, "loss": 6.1188, "step": 1980 }, { "epoch": 0.21, "learning_rate": 1.934816894656025e-05, "loss": 6.0949, "step": 1990 }, { "epoch": 0.21, "learning_rate": 1.9339910682772664e-05, "loss": 6.1266, "step": 2000 }, { "epoch": 0.22, "learning_rate": 1.9331602219462888e-05, "loss": 6.1113, "step": 2010 }, { "epoch": 0.22, "learning_rate": 1.9323243601286696e-05, "loss": 6.0949, "step": 2020 }, { "epoch": 0.22, "learning_rate": 1.9314834873169426e-05, "loss": 6.1105, "step": 2030 }, { "epoch": 0.22, "learning_rate": 1.930637608030574e-05, "loss": 6.1004, "step": 2040 }, { "epoch": 0.22, "learning_rate": 1.9297867268159393e-05, "loss": 6.0992, "step": 2050 }, { "epoch": 0.22, "learning_rate": 1.9289308482462964e-05, "loss": 6.0793, "step": 2060 }, { "epoch": 0.22, "learning_rate": 1.9280699769217647e-05, "loss": 6.0789, "step": 2070 }, { "epoch": 0.22, "learning_rate": 1.927204117469297e-05, "loss": 6.1199, "step": 2080 }, { "epoch": 0.22, "learning_rate": 1.9263332745426564e-05, "loss": 6.1172, "step": 2090 }, { "epoch": 0.23, "learning_rate": 1.9254574528223907e-05, "loss": 6.1051, "step": 2100 }, { "epoch": 0.23, "learning_rate": 1.9245766570158072e-05, "loss": 6.1023, "step": 2110 }, { "epoch": 0.23, "learning_rate": 1.9236908918569485e-05, "loss": 6.0949, "step": 2120 }, { "epoch": 0.23, "learning_rate": 1.9228001621065644e-05, "loss": 6.0934, "step": 2130 }, { "epoch": 0.23, "learning_rate": 1.92190447255209e-05, "loss": 6.0762, "step": 2140 }, { "epoch": 0.23, "learning_rate": 1.921003828007617e-05, "loss": 6.0863, "step": 2150 }, { "epoch": 0.23, "learning_rate": 1.9200982333138684e-05, "loss": 6.0848, "step": 2160 }, { "epoch": 0.23, "learning_rate": 1.9191876933381742e-05, "loss": 6.0895, "step": 2170 }, { "epoch": 0.23, "learning_rate": 1.9182722129744426e-05, "loss": 6.0594, "step": 2180 }, { "epoch": 0.24, "learning_rate": 1.9173517971431362e-05, "loss": 6.098, "step": 2190 }, { "epoch": 0.24, "learning_rate": 1.916426450791244e-05, "loss": 6.1074, "step": 2200 }, { "epoch": 0.24, "learning_rate": 1.915496178892255e-05, "loss": 6.1078, "step": 2210 }, { "epoch": 0.24, "learning_rate": 1.9145609864461317e-05, "loss": 6.0586, "step": 2220 }, { "epoch": 0.24, "learning_rate": 1.9136208784792838e-05, "loss": 6.0922, "step": 2230 }, { "epoch": 0.24, "learning_rate": 1.91267586004454e-05, "loss": 6.1285, "step": 2240 }, { "epoch": 0.24, "learning_rate": 1.9117259362211212e-05, "loss": 6.0855, "step": 2250 }, { "epoch": 0.24, "learning_rate": 1.9107711121146152e-05, "loss": 6.084, "step": 2260 }, { "epoch": 0.24, "learning_rate": 1.9098113928569448e-05, "loss": 6.1105, "step": 2270 }, { "epoch": 0.24, "learning_rate": 1.9088467836063452e-05, "loss": 6.0988, "step": 2280 }, { "epoch": 0.25, "learning_rate": 1.9078772895473326e-05, "loss": 6.0832, "step": 2290 }, { "epoch": 0.25, "learning_rate": 1.9069029158906792e-05, "loss": 6.0965, "step": 2300 }, { "epoch": 0.25, "learning_rate": 1.9059236678733817e-05, "loss": 6.1094, "step": 2310 }, { "epoch": 0.25, "learning_rate": 1.904939550758637e-05, "loss": 6.0738, "step": 2320 }, { "epoch": 0.25, "learning_rate": 1.9039505698358116e-05, "loss": 6.0941, "step": 2330 }, { "epoch": 0.25, "learning_rate": 1.902956730420413e-05, "loss": 6.1012, "step": 2340 }, { "epoch": 0.25, "learning_rate": 1.9019580378540622e-05, "loss": 6.0891, "step": 2350 }, { "epoch": 0.25, "learning_rate": 1.9009544975044652e-05, "loss": 6.1082, "step": 2360 }, { "epoch": 0.25, "learning_rate": 1.8999461147653828e-05, "loss": 6.1094, "step": 2370 }, { "epoch": 0.26, "learning_rate": 1.8989328950566023e-05, "loss": 6.077, "step": 2380 }, { "epoch": 0.26, "learning_rate": 1.897914843823909e-05, "loss": 6.0977, "step": 2390 }, { "epoch": 0.26, "learning_rate": 1.8968919665390556e-05, "loss": 6.1102, "step": 2400 }, { "epoch": 0.26, "learning_rate": 1.8958642686997348e-05, "loss": 6.084, "step": 2410 }, { "epoch": 0.26, "learning_rate": 1.8948317558295464e-05, "loss": 6.0949, "step": 2420 }, { "epoch": 0.26, "learning_rate": 1.893794433477972e-05, "loss": 6.0895, "step": 2430 }, { "epoch": 0.26, "learning_rate": 1.8927523072203417e-05, "loss": 6.0824, "step": 2440 }, { "epoch": 0.26, "learning_rate": 1.8917053826578047e-05, "loss": 6.0855, "step": 2450 }, { "epoch": 0.26, "learning_rate": 1.8906536654173013e-05, "loss": 6.1, "step": 2460 }, { "epoch": 0.27, "learning_rate": 1.88959716115153e-05, "loss": 6.0941, "step": 2470 }, { "epoch": 0.27, "learning_rate": 1.8885358755389192e-05, "loss": 6.0848, "step": 2480 }, { "epoch": 0.27, "learning_rate": 1.887469814283595e-05, "loss": 6.0926, "step": 2490 }, { "epoch": 0.27, "learning_rate": 1.8863989831153513e-05, "loss": 6.0637, "step": 2500 }, { "epoch": 0.27, "learning_rate": 1.8853233877896197e-05, "loss": 6.0922, "step": 2510 }, { "epoch": 0.27, "learning_rate": 1.8842430340874366e-05, "loss": 6.0785, "step": 2520 }, { "epoch": 0.27, "learning_rate": 1.883157927815415e-05, "loss": 6.0625, "step": 2530 }, { "epoch": 0.27, "learning_rate": 1.8820680748057113e-05, "loss": 6.073, "step": 2540 }, { "epoch": 0.27, "learning_rate": 1.880973480915993e-05, "loss": 6.0723, "step": 2550 }, { "epoch": 0.27, "learning_rate": 1.8798741520294097e-05, "loss": 6.0812, "step": 2560 }, { "epoch": 0.28, "learning_rate": 1.8787700940545608e-05, "loss": 6.082, "step": 2570 }, { "epoch": 0.28, "learning_rate": 1.877661312925462e-05, "loss": 6.0664, "step": 2580 }, { "epoch": 0.28, "learning_rate": 1.8765478146015156e-05, "loss": 6.0715, "step": 2590 }, { "epoch": 0.28, "learning_rate": 1.8754296050674776e-05, "loss": 6.0715, "step": 2600 }, { "epoch": 0.28, "learning_rate": 1.8743066903334252e-05, "loss": 6.0875, "step": 2610 }, { "epoch": 0.28, "learning_rate": 1.873179076434724e-05, "loss": 6.0754, "step": 2620 }, { "epoch": 0.28, "learning_rate": 1.872046769431998e-05, "loss": 6.0766, "step": 2630 }, { "epoch": 0.28, "learning_rate": 1.8709097754110945e-05, "loss": 6.0887, "step": 2640 }, { "epoch": 0.28, "learning_rate": 1.8697681004830514e-05, "loss": 6.0492, "step": 2650 }, { "epoch": 0.29, "learning_rate": 1.868621750784067e-05, "loss": 6.0684, "step": 2660 }, { "epoch": 0.29, "learning_rate": 1.8674707324754643e-05, "loss": 6.0809, "step": 2670 }, { "epoch": 0.29, "learning_rate": 1.8663150517436586e-05, "loss": 6.0875, "step": 2680 }, { "epoch": 0.29, "learning_rate": 1.865154714800125e-05, "loss": 6.0891, "step": 2690 }, { "epoch": 0.29, "learning_rate": 1.863989727881364e-05, "loss": 6.0816, "step": 2700 }, { "epoch": 0.29, "learning_rate": 1.862820097248869e-05, "loss": 6.082, "step": 2710 }, { "epoch": 0.29, "learning_rate": 1.861645829189092e-05, "loss": 6.0672, "step": 2720 }, { "epoch": 0.29, "learning_rate": 1.8604669300134094e-05, "loss": 6.0852, "step": 2730 }, { "epoch": 0.29, "learning_rate": 1.859283406058089e-05, "loss": 6.0879, "step": 2740 }, { "epoch": 0.3, "learning_rate": 1.8580952636842557e-05, "loss": 6.0723, "step": 2750 }, { "epoch": 0.3, "learning_rate": 1.856902509277857e-05, "loss": 6.0891, "step": 2760 }, { "epoch": 0.3, "learning_rate": 1.855705149249629e-05, "loss": 6.077, "step": 2770 }, { "epoch": 0.3, "learning_rate": 1.854503190035062e-05, "loss": 6.0484, "step": 2780 }, { "epoch": 0.3, "learning_rate": 1.8532966380943643e-05, "loss": 6.0797, "step": 2790 }, { "epoch": 0.3, "learning_rate": 1.8520854999124308e-05, "loss": 6.0738, "step": 2800 }, { "epoch": 0.3, "learning_rate": 1.850869781998805e-05, "loss": 6.0965, "step": 2810 }, { "epoch": 0.3, "learning_rate": 1.8496494908876454e-05, "loss": 6.0895, "step": 2820 }, { "epoch": 0.3, "learning_rate": 1.8484246331376908e-05, "loss": 6.0742, "step": 2830 }, { "epoch": 0.3, "learning_rate": 1.8471952153322237e-05, "loss": 6.0703, "step": 2840 }, { "epoch": 0.31, "learning_rate": 1.8459612440790364e-05, "loss": 6.0773, "step": 2850 }, { "epoch": 0.31, "learning_rate": 1.8447227260103942e-05, "loss": 6.0758, "step": 2860 }, { "epoch": 0.31, "learning_rate": 1.843479667783e-05, "loss": 6.0645, "step": 2870 }, { "epoch": 0.31, "learning_rate": 1.8422320760779602e-05, "loss": 6.0914, "step": 2880 }, { "epoch": 0.31, "learning_rate": 1.8409799576007465e-05, "loss": 6.0863, "step": 2890 }, { "epoch": 0.31, "learning_rate": 1.8397233190811597e-05, "loss": 6.0805, "step": 2900 }, { "epoch": 0.31, "learning_rate": 1.8384621672732975e-05, "loss": 6.0824, "step": 2910 }, { "epoch": 0.31, "learning_rate": 1.837196508955512e-05, "loss": 6.1023, "step": 2920 }, { "epoch": 0.31, "learning_rate": 1.8359263509303792e-05, "loss": 6.0867, "step": 2930 }, { "epoch": 0.32, "learning_rate": 1.834651700024659e-05, "loss": 6.107, "step": 2940 }, { "epoch": 0.32, "learning_rate": 1.8333725630892584e-05, "loss": 6.0941, "step": 2950 }, { "epoch": 0.32, "learning_rate": 1.8320889469991965e-05, "loss": 6.1039, "step": 2960 }, { "epoch": 0.32, "learning_rate": 1.8308008586535666e-05, "loss": 6.0805, "step": 2970 }, { "epoch": 0.32, "learning_rate": 1.8295083049754994e-05, "loss": 6.1336, "step": 2980 }, { "epoch": 0.32, "learning_rate": 1.828211292912125e-05, "loss": 6.0762, "step": 2990 }, { "epoch": 0.32, "learning_rate": 1.826909829434536e-05, "loss": 6.0906, "step": 3000 }, { "epoch": 0.32, "learning_rate": 1.82586545833408e-05, "loss": 6.0547, "step": 3010 }, { "epoch": 0.32, "learning_rate": 1.8245559999544053e-05, "loss": 6.1059, "step": 3020 }, { "epoch": 0.33, "learning_rate": 1.8232421098067383e-05, "loss": 6.0973, "step": 3030 }, { "epoch": 0.33, "learning_rate": 1.8219237949528875e-05, "loss": 6.0801, "step": 3040 }, { "epoch": 0.33, "learning_rate": 1.8206010624784426e-05, "loss": 6.082, "step": 3050 }, { "epoch": 0.33, "learning_rate": 1.8192739194927366e-05, "loss": 6.066, "step": 3060 }, { "epoch": 0.33, "learning_rate": 1.8179423731288088e-05, "loss": 6.0605, "step": 3070 }, { "epoch": 0.33, "learning_rate": 1.816606430543364e-05, "loss": 6.1137, "step": 3080 }, { "epoch": 0.33, "learning_rate": 1.8152660989167373e-05, "loss": 6.0871, "step": 3090 }, { "epoch": 0.33, "learning_rate": 1.813921385452852e-05, "loss": 6.0836, "step": 3100 }, { "epoch": 0.33, "learning_rate": 1.8125722973791836e-05, "loss": 6.0656, "step": 3110 }, { "epoch": 0.33, "learning_rate": 1.81121884194672e-05, "loss": 6.093, "step": 3120 }, { "epoch": 0.34, "learning_rate": 1.8098610264299213e-05, "loss": 6.0535, "step": 3130 }, { "epoch": 0.34, "learning_rate": 1.8084988581266837e-05, "loss": 6.0766, "step": 3140 }, { "epoch": 0.34, "learning_rate": 1.8071323443582973e-05, "loss": 6.0719, "step": 3150 }, { "epoch": 0.34, "learning_rate": 1.805761492469408e-05, "loss": 6.0809, "step": 3160 }, { "epoch": 0.34, "learning_rate": 1.804386309827978e-05, "loss": 6.0996, "step": 3170 }, { "epoch": 0.34, "learning_rate": 1.803006803825247e-05, "loss": 6.082, "step": 3180 }, { "epoch": 0.34, "learning_rate": 1.80162298187569e-05, "loss": 6.0656, "step": 3190 }, { "epoch": 0.34, "learning_rate": 1.8002348514169802e-05, "loss": 6.0848, "step": 3200 }, { "epoch": 0.34, "learning_rate": 1.7988424199099476e-05, "loss": 6.0855, "step": 3210 }, { "epoch": 0.35, "learning_rate": 1.797445694838539e-05, "loss": 6.0918, "step": 3220 }, { "epoch": 0.35, "learning_rate": 1.7960446837097784e-05, "loss": 6.0789, "step": 3230 }, { "epoch": 0.35, "learning_rate": 1.7946393940537262e-05, "loss": 6.0563, "step": 3240 }, { "epoch": 0.35, "learning_rate": 1.793229833423438e-05, "loss": 6.0473, "step": 3250 }, { "epoch": 0.35, "learning_rate": 1.7918160093949254e-05, "loss": 6.0777, "step": 3260 }, { "epoch": 0.35, "learning_rate": 1.790397929567114e-05, "loss": 6.077, "step": 3270 }, { "epoch": 0.35, "learning_rate": 1.7889756015618047e-05, "loss": 6.0852, "step": 3280 }, { "epoch": 0.35, "learning_rate": 1.787549033023629e-05, "loss": 6.0738, "step": 3290 }, { "epoch": 0.35, "learning_rate": 1.786118231620012e-05, "loss": 6.0914, "step": 3300 }, { "epoch": 0.36, "learning_rate": 1.784683205041129e-05, "loss": 6.0785, "step": 3310 }, { "epoch": 0.36, "learning_rate": 1.783243960999863e-05, "loss": 6.1156, "step": 3320 }, { "epoch": 0.36, "learning_rate": 1.7818005072317665e-05, "loss": 6.057, "step": 3330 }, { "epoch": 0.36, "learning_rate": 1.7803528514950173e-05, "loss": 6.0777, "step": 3340 }, { "epoch": 0.36, "learning_rate": 1.778901001570378e-05, "loss": 6.0508, "step": 3350 }, { "epoch": 0.36, "learning_rate": 1.7774449652611538e-05, "loss": 6.1152, "step": 3360 }, { "epoch": 0.36, "learning_rate": 1.77598475039315e-05, "loss": 6.0715, "step": 3370 }, { "epoch": 0.36, "learning_rate": 1.7745203648146314e-05, "loss": 6.0727, "step": 3380 }, { "epoch": 0.36, "learning_rate": 1.7730518163962788e-05, "loss": 6.066, "step": 3390 }, { "epoch": 0.37, "learning_rate": 1.771579113031147e-05, "loss": 6.107, "step": 3400 }, { "epoch": 0.37, "learning_rate": 1.770102262634623e-05, "loss": 6.1012, "step": 3410 }, { "epoch": 0.37, "learning_rate": 1.768621273144383e-05, "loss": 6.1059, "step": 3420 }, { "epoch": 0.37, "learning_rate": 1.767136152520349e-05, "loss": 6.0902, "step": 3430 }, { "epoch": 0.37, "learning_rate": 1.765646908744647e-05, "loss": 6.0848, "step": 3440 }, { "epoch": 0.37, "learning_rate": 1.7641535498215645e-05, "loss": 6.0773, "step": 3450 }, { "epoch": 0.37, "learning_rate": 1.7626560837775062e-05, "loss": 6.073, "step": 3460 }, { "epoch": 0.37, "learning_rate": 1.7611545186609516e-05, "loss": 6.0805, "step": 3470 }, { "epoch": 0.37, "learning_rate": 1.7596488625424118e-05, "loss": 6.0848, "step": 3480 }, { "epoch": 0.37, "learning_rate": 1.7581391235143854e-05, "loss": 6.0793, "step": 3490 }, { "epoch": 0.38, "learning_rate": 1.7566253096913162e-05, "loss": 6.0805, "step": 3500 }, { "epoch": 0.38, "learning_rate": 1.755107429209549e-05, "loss": 6.0691, "step": 3510 }, { "epoch": 0.38, "learning_rate": 1.753585490227285e-05, "loss": 6.0836, "step": 3520 }, { "epoch": 0.38, "learning_rate": 1.7520595009245394e-05, "loss": 6.0691, "step": 3530 }, { "epoch": 0.38, "learning_rate": 1.7505294695030964e-05, "loss": 6.0746, "step": 3540 }, { "epoch": 0.38, "learning_rate": 1.748995404186466e-05, "loss": 6.0852, "step": 3550 }, { "epoch": 0.38, "learning_rate": 1.7474573132198387e-05, "loss": 6.082, "step": 3560 }, { "epoch": 0.38, "learning_rate": 1.7459152048700423e-05, "loss": 6.0758, "step": 3570 }, { "epoch": 0.38, "learning_rate": 1.744369087425497e-05, "loss": 6.0777, "step": 3580 }, { "epoch": 0.39, "learning_rate": 1.7428189691961703e-05, "loss": 6.077, "step": 3590 }, { "epoch": 0.39, "learning_rate": 1.741264858513533e-05, "loss": 6.0918, "step": 3600 }, { "epoch": 0.39, "learning_rate": 1.7397067637305153e-05, "loss": 6.066, "step": 3610 }, { "epoch": 0.39, "learning_rate": 1.7381446932214587e-05, "loss": 6.0945, "step": 3620 }, { "epoch": 0.39, "learning_rate": 1.7365786553820757e-05, "loss": 6.075, "step": 3630 }, { "epoch": 0.39, "learning_rate": 1.7350086586293997e-05, "loss": 6.0746, "step": 3640 }, { "epoch": 0.39, "learning_rate": 1.733434711401744e-05, "loss": 6.0836, "step": 3650 }, { "epoch": 0.39, "learning_rate": 1.7318568221586543e-05, "loss": 6.0609, "step": 3660 }, { "epoch": 0.39, "learning_rate": 1.7302749993808626e-05, "loss": 6.0836, "step": 3670 }, { "epoch": 0.4, "learning_rate": 1.728689251570244e-05, "loss": 6.0742, "step": 3680 }, { "epoch": 0.4, "learning_rate": 1.7270995872497686e-05, "loss": 6.1008, "step": 3690 }, { "epoch": 0.4, "learning_rate": 1.725506014963457e-05, "loss": 6.0758, "step": 3700 }, { "epoch": 0.4, "learning_rate": 1.723908543276334e-05, "loss": 6.0637, "step": 3710 }, { "epoch": 0.4, "learning_rate": 1.722307180774383e-05, "loss": 6.0566, "step": 3720 }, { "epoch": 0.4, "learning_rate": 1.720701936064499e-05, "loss": 6.0754, "step": 3730 }, { "epoch": 0.4, "learning_rate": 1.719092817774443e-05, "loss": 6.0473, "step": 3740 }, { "epoch": 0.4, "learning_rate": 1.7174798345527953e-05, "loss": 6.0602, "step": 3750 }, { "epoch": 0.4, "learning_rate": 1.71586299506891e-05, "loss": 6.0664, "step": 3760 }, { "epoch": 0.4, "learning_rate": 1.7142423080128666e-05, "loss": 6.0672, "step": 3770 }, { "epoch": 0.41, "learning_rate": 1.7126177820954242e-05, "loss": 6.0629, "step": 3780 }, { "epoch": 0.41, "learning_rate": 1.710989426047976e-05, "loss": 6.0539, "step": 3790 }, { "epoch": 0.41, "learning_rate": 1.709357248622499e-05, "loss": 6.0848, "step": 3800 }, { "epoch": 0.41, "learning_rate": 1.7077212585915118e-05, "loss": 6.032, "step": 3810 }, { "epoch": 0.41, "learning_rate": 1.7060814647480228e-05, "loss": 6.0855, "step": 3820 }, { "epoch": 0.41, "learning_rate": 1.7044378759054846e-05, "loss": 6.0738, "step": 3830 }, { "epoch": 0.41, "learning_rate": 1.702790500897749e-05, "loss": 6.0582, "step": 3840 }, { "epoch": 0.41, "learning_rate": 1.701139348579015e-05, "loss": 6.0563, "step": 3850 }, { "epoch": 0.41, "learning_rate": 1.6994844278237857e-05, "loss": 6.1, "step": 3860 }, { "epoch": 0.42, "learning_rate": 1.6978257475268173e-05, "loss": 6.0598, "step": 3870 }, { "epoch": 0.42, "learning_rate": 1.6961633166030723e-05, "loss": 6.0801, "step": 3880 }, { "epoch": 0.42, "learning_rate": 1.6944971439876727e-05, "loss": 6.0754, "step": 3890 }, { "epoch": 0.42, "learning_rate": 1.692827238635851e-05, "loss": 6.0941, "step": 3900 }, { "epoch": 0.42, "learning_rate": 1.691153609522901e-05, "loss": 6.077, "step": 3910 }, { "epoch": 0.42, "learning_rate": 1.6894762656441328e-05, "loss": 6.0566, "step": 3920 }, { "epoch": 0.42, "learning_rate": 1.6877952160148203e-05, "loss": 6.0734, "step": 3930 }, { "epoch": 0.42, "learning_rate": 1.686110469670156e-05, "loss": 6.0445, "step": 3940 }, { "epoch": 0.42, "learning_rate": 1.6844220356652013e-05, "loss": 6.0613, "step": 3950 }, { "epoch": 0.43, "learning_rate": 1.6827299230748368e-05, "loss": 6.0809, "step": 3960 }, { "epoch": 0.43, "learning_rate": 1.681034140993716e-05, "loss": 6.059, "step": 3970 }, { "epoch": 0.43, "learning_rate": 1.6793346985362128e-05, "loss": 6.0805, "step": 3980 }, { "epoch": 0.43, "learning_rate": 1.677631604836377e-05, "loss": 6.0687, "step": 3990 }, { "epoch": 0.43, "learning_rate": 1.6759248690478814e-05, "loss": 6.0637, "step": 4000 }, { "epoch": 0.43, "learning_rate": 1.6745568642770166e-05, "loss": 6.0637, "step": 4010 }, { "epoch": 0.43, "learning_rate": 1.6728435958586427e-05, "loss": 6.0512, "step": 4020 }, { "epoch": 0.43, "learning_rate": 1.671126711085877e-05, "loss": 6.0793, "step": 4030 }, { "epoch": 0.43, "learning_rate": 1.6694062191865163e-05, "loss": 6.0711, "step": 4040 }, { "epoch": 0.43, "learning_rate": 1.6676821294077435e-05, "loss": 6.0551, "step": 4050 }, { "epoch": 0.44, "learning_rate": 1.6659544510160808e-05, "loss": 6.0746, "step": 4060 }, { "epoch": 0.44, "learning_rate": 1.664223193297337e-05, "loss": 6.0773, "step": 4070 }, { "epoch": 0.44, "learning_rate": 1.6624883655565602e-05, "loss": 6.0914, "step": 4080 }, { "epoch": 0.44, "learning_rate": 1.6607499771179853e-05, "loss": 6.0336, "step": 4090 }, { "epoch": 0.44, "learning_rate": 1.6590080373249846e-05, "loss": 6.0766, "step": 4100 }, { "epoch": 0.44, "learning_rate": 1.6572625555400194e-05, "loss": 6.068, "step": 4110 }, { "epoch": 0.44, "learning_rate": 1.655513541144587e-05, "loss": 6.0473, "step": 4120 }, { "epoch": 0.44, "learning_rate": 1.6537610035391726e-05, "loss": 6.0852, "step": 4130 }, { "epoch": 0.44, "learning_rate": 1.6520049521431966e-05, "loss": 6.0773, "step": 4140 }, { "epoch": 0.45, "learning_rate": 1.6502453963949662e-05, "loss": 6.0762, "step": 4150 }, { "epoch": 0.45, "learning_rate": 1.6484823457516225e-05, "loss": 6.0727, "step": 4160 }, { "epoch": 0.45, "learning_rate": 1.6467158096890915e-05, "loss": 6.0875, "step": 4170 }, { "epoch": 0.45, "learning_rate": 1.6449457977020315e-05, "loss": 6.0645, "step": 4180 }, { "epoch": 0.45, "learning_rate": 1.6431723193037847e-05, "loss": 6.0766, "step": 4190 }, { "epoch": 0.45, "learning_rate": 1.641395384026322e-05, "loss": 6.0719, "step": 4200 }, { "epoch": 0.45, "learning_rate": 1.6396150014201965e-05, "loss": 6.0652, "step": 4210 }, { "epoch": 0.45, "learning_rate": 1.6378311810544877e-05, "loss": 6.0836, "step": 4220 }, { "epoch": 0.45, "learning_rate": 1.6360439325167536e-05, "loss": 6.0828, "step": 4230 }, { "epoch": 0.46, "learning_rate": 1.6342532654129764e-05, "loss": 6.0891, "step": 4240 }, { "epoch": 0.46, "learning_rate": 1.632459189367514e-05, "loss": 6.0687, "step": 4250 }, { "epoch": 0.46, "learning_rate": 1.6306617140230442e-05, "loss": 6.0883, "step": 4260 }, { "epoch": 0.46, "learning_rate": 1.6288608490405172e-05, "loss": 6.0848, "step": 4270 }, { "epoch": 0.46, "learning_rate": 1.6270566040991004e-05, "loss": 6.0578, "step": 4280 }, { "epoch": 0.46, "learning_rate": 1.6252489888961275e-05, "loss": 6.075, "step": 4290 }, { "epoch": 0.46, "learning_rate": 1.623438013147047e-05, "loss": 6.073, "step": 4300 }, { "epoch": 0.46, "learning_rate": 1.6216236865853695e-05, "loss": 6.0512, "step": 4310 }, { "epoch": 0.46, "learning_rate": 1.6198060189626147e-05, "loss": 6.0789, "step": 4320 }, { "epoch": 0.46, "learning_rate": 1.6179850200482606e-05, "loss": 6.0766, "step": 4330 }, { "epoch": 0.47, "learning_rate": 1.6161606996296888e-05, "loss": 6.0711, "step": 4340 }, { "epoch": 0.47, "learning_rate": 1.614333067512134e-05, "loss": 6.0738, "step": 4350 }, { "epoch": 0.47, "learning_rate": 1.6125021335186295e-05, "loss": 6.0469, "step": 4360 }, { "epoch": 0.47, "learning_rate": 1.6106679074899565e-05, "loss": 6.0555, "step": 4370 }, { "epoch": 0.47, "learning_rate": 1.608830399284589e-05, "loss": 6.0801, "step": 4380 }, { "epoch": 0.47, "learning_rate": 1.6069896187786428e-05, "loss": 6.0465, "step": 4390 }, { "epoch": 0.47, "learning_rate": 1.6051455758658202e-05, "loss": 6.0773, "step": 4400 }, { "epoch": 0.47, "learning_rate": 1.603298280457359e-05, "loss": 6.075, "step": 4410 }, { "epoch": 0.47, "learning_rate": 1.601447742481978e-05, "loss": 6.0625, "step": 4420 }, { "epoch": 0.48, "learning_rate": 1.5995939718858238e-05, "loss": 6.0566, "step": 4430 }, { "epoch": 0.48, "learning_rate": 1.5977369786324177e-05, "loss": 6.0723, "step": 4440 }, { "epoch": 0.48, "learning_rate": 1.595876772702602e-05, "loss": 6.0676, "step": 4450 }, { "epoch": 0.48, "learning_rate": 1.5940133640944858e-05, "loss": 6.0609, "step": 4460 }, { "epoch": 0.48, "learning_rate": 1.592146762823392e-05, "loss": 6.0832, "step": 4470 }, { "epoch": 0.48, "learning_rate": 1.5902769789218037e-05, "loss": 6.0895, "step": 4480 }, { "epoch": 0.48, "learning_rate": 1.588404022439309e-05, "loss": 6.0391, "step": 4490 }, { "epoch": 0.48, "learning_rate": 1.5865279034425484e-05, "loss": 6.0934, "step": 4500 }, { "epoch": 0.48, "learning_rate": 1.5846486320151593e-05, "loss": 6.0816, "step": 4510 }, { "epoch": 0.49, "learning_rate": 1.5827662182577234e-05, "loss": 6.0523, "step": 4520 }, { "epoch": 0.49, "learning_rate": 1.580880672287711e-05, "loss": 6.0605, "step": 4530 }, { "epoch": 0.49, "learning_rate": 1.578992004239428e-05, "loss": 6.0941, "step": 4540 }, { "epoch": 0.49, "learning_rate": 1.5771002242639595e-05, "loss": 6.0656, "step": 4550 }, { "epoch": 0.49, "learning_rate": 1.5752053425291173e-05, "loss": 6.0797, "step": 4560 }, { "epoch": 0.49, "learning_rate": 1.5733073692193833e-05, "loss": 6.0359, "step": 4570 }, { "epoch": 0.49, "learning_rate": 1.5714063145358577e-05, "loss": 6.0625, "step": 4580 }, { "epoch": 0.49, "learning_rate": 1.5695021886961998e-05, "loss": 6.0734, "step": 4590 }, { "epoch": 0.49, "learning_rate": 1.5675950019345775e-05, "loss": 6.0371, "step": 4600 }, { "epoch": 0.49, "learning_rate": 1.56568476450161e-05, "loss": 6.075, "step": 4610 }, { "epoch": 0.5, "learning_rate": 1.563771486664311e-05, "loss": 6.0828, "step": 4620 }, { "epoch": 0.5, "learning_rate": 1.561855178706039e-05, "loss": 6.0777, "step": 4630 }, { "epoch": 0.5, "learning_rate": 1.5599358509264363e-05, "loss": 6.082, "step": 4640 }, { "epoch": 0.5, "learning_rate": 1.5580135136413757e-05, "loss": 6.0879, "step": 4650 }, { "epoch": 0.5, "learning_rate": 1.556088177182907e-05, "loss": 6.052, "step": 4660 }, { "epoch": 0.5, "learning_rate": 1.5541598518991983e-05, "loss": 6.0824, "step": 4670 }, { "epoch": 0.5, "learning_rate": 1.552228548154482e-05, "loss": 6.0477, "step": 4680 }, { "epoch": 0.5, "learning_rate": 1.550294276329e-05, "loss": 6.0664, "step": 4690 }, { "epoch": 0.5, "learning_rate": 1.5483570468189455e-05, "loss": 6.0559, "step": 4700 }, { "epoch": 0.51, "learning_rate": 1.5464168700364093e-05, "loss": 6.0566, "step": 4710 }, { "epoch": 0.51, "learning_rate": 1.544473756409323e-05, "loss": 6.059, "step": 4720 }, { "epoch": 0.51, "learning_rate": 1.5425277163814017e-05, "loss": 6.0934, "step": 4730 }, { "epoch": 0.51, "learning_rate": 1.5405787604120915e-05, "loss": 6.0863, "step": 4740 }, { "epoch": 0.51, "learning_rate": 1.5386268989765085e-05, "loss": 6.0492, "step": 4750 }, { "epoch": 0.51, "learning_rate": 1.5366721425653867e-05, "loss": 6.0723, "step": 4760 }, { "epoch": 0.51, "learning_rate": 1.5347145016850183e-05, "loss": 6.0586, "step": 4770 }, { "epoch": 0.51, "learning_rate": 1.5327539868571998e-05, "loss": 6.0461, "step": 4780 }, { "epoch": 0.51, "learning_rate": 1.5307906086191744e-05, "loss": 6.0434, "step": 4790 }, { "epoch": 0.52, "learning_rate": 1.528824377523575e-05, "loss": 6.1023, "step": 4800 }, { "epoch": 0.52, "learning_rate": 1.5268553041383675e-05, "loss": 6.0734, "step": 4810 }, { "epoch": 0.52, "learning_rate": 1.524883399046795e-05, "loss": 6.0918, "step": 4820 }, { "epoch": 0.52, "learning_rate": 1.5229086728473203e-05, "loss": 6.0586, "step": 4830 }, { "epoch": 0.52, "learning_rate": 1.5209311361535682e-05, "loss": 6.0738, "step": 4840 }, { "epoch": 0.52, "learning_rate": 1.5189507995942698e-05, "loss": 6.0602, "step": 4850 }, { "epoch": 0.52, "learning_rate": 1.5169676738132046e-05, "loss": 6.0789, "step": 4860 }, { "epoch": 0.52, "learning_rate": 1.514981769469143e-05, "loss": 6.0687, "step": 4870 }, { "epoch": 0.52, "learning_rate": 1.5129930972357902e-05, "loss": 6.0465, "step": 4880 }, { "epoch": 0.53, "learning_rate": 1.5110016678017277e-05, "loss": 6.0559, "step": 4890 }, { "epoch": 0.53, "learning_rate": 1.5090074918703553e-05, "loss": 6.0629, "step": 4900 }, { "epoch": 0.53, "learning_rate": 1.5070105801598363e-05, "loss": 6.059, "step": 4910 }, { "epoch": 0.53, "learning_rate": 1.5050109434030366e-05, "loss": 6.0633, "step": 4920 }, { "epoch": 0.53, "learning_rate": 1.503008592347469e-05, "loss": 6.0691, "step": 4930 }, { "epoch": 0.53, "learning_rate": 1.5010035377552354e-05, "loss": 6.0527, "step": 4940 }, { "epoch": 0.53, "learning_rate": 1.4989957904029675e-05, "loss": 6.0695, "step": 4950 }, { "epoch": 0.53, "learning_rate": 1.4969853610817707e-05, "loss": 6.0676, "step": 4960 }, { "epoch": 0.53, "learning_rate": 1.4949722605971647e-05, "loss": 6.0664, "step": 4970 }, { "epoch": 0.53, "learning_rate": 1.4929564997690268e-05, "loss": 6.0586, "step": 4980 }, { "epoch": 0.54, "learning_rate": 1.4909380894315316e-05, "loss": 6.0492, "step": 4990 }, { "epoch": 0.54, "learning_rate": 1.4889170404330953e-05, "loss": 6.0539, "step": 5000 }, { "epoch": 0.54, "learning_rate": 1.4872983086976594e-05, "loss": 6.0746, "step": 5010 }, { "epoch": 0.54, "learning_rate": 1.485272537492769e-05, "loss": 6.0453, "step": 5020 }, { "epoch": 0.54, "learning_rate": 1.4832441580777642e-05, "loss": 6.0875, "step": 5030 }, { "epoch": 0.54, "learning_rate": 1.4812131813546416e-05, "loss": 6.075, "step": 5040 }, { "epoch": 0.54, "learning_rate": 1.4791796182393578e-05, "loss": 6.077, "step": 5050 }, { "epoch": 0.54, "learning_rate": 1.4771434796617716e-05, "loss": 6.084, "step": 5060 }, { "epoch": 0.54, "learning_rate": 1.4751047765655824e-05, "loss": 6.0563, "step": 5070 }, { "epoch": 0.55, "learning_rate": 1.4730635199082746e-05, "loss": 6.1125, "step": 5080 }, { "epoch": 0.55, "learning_rate": 1.4710197206610567e-05, "loss": 6.0598, "step": 5090 }, { "epoch": 0.55, "learning_rate": 1.4689733898088032e-05, "loss": 6.0789, "step": 5100 }, { "epoch": 0.55, "learning_rate": 1.4669245383499952e-05, "loss": 6.0664, "step": 5110 }, { "epoch": 0.55, "learning_rate": 1.4648731772966613e-05, "loss": 6.0781, "step": 5120 }, { "epoch": 0.55, "learning_rate": 1.4628193176743185e-05, "loss": 6.073, "step": 5130 }, { "epoch": 0.55, "learning_rate": 1.460762970521913e-05, "loss": 6.0598, "step": 5140 }, { "epoch": 0.55, "learning_rate": 1.4587041468917609e-05, "loss": 6.0699, "step": 5150 }, { "epoch": 0.55, "learning_rate": 1.4566428578494888e-05, "loss": 6.068, "step": 5160 }, { "epoch": 0.56, "learning_rate": 1.4545791144739737e-05, "loss": 6.0422, "step": 5170 }, { "epoch": 0.56, "learning_rate": 1.452512927857285e-05, "loss": 6.0852, "step": 5180 }, { "epoch": 0.56, "learning_rate": 1.4504443091046225e-05, "loss": 6.0863, "step": 5190 }, { "epoch": 0.56, "learning_rate": 1.448373269334259e-05, "loss": 6.0699, "step": 5200 }, { "epoch": 0.56, "learning_rate": 1.4462998196774796e-05, "loss": 6.0621, "step": 5210 }, { "epoch": 0.56, "learning_rate": 1.4442239712785215e-05, "loss": 6.0805, "step": 5220 }, { "epoch": 0.56, "learning_rate": 1.4421457352945148e-05, "loss": 6.0809, "step": 5230 }, { "epoch": 0.56, "learning_rate": 1.440065122895422e-05, "loss": 6.0785, "step": 5240 }, { "epoch": 0.56, "learning_rate": 1.4379821452639789e-05, "loss": 6.0484, "step": 5250 }, { "epoch": 0.56, "learning_rate": 1.4358968135956323e-05, "loss": 6.0797, "step": 5260 }, { "epoch": 0.57, "learning_rate": 1.4338091390984831e-05, "loss": 6.0863, "step": 5270 }, { "epoch": 0.57, "learning_rate": 1.431719132993223e-05, "loss": 6.0324, "step": 5280 }, { "epoch": 0.57, "learning_rate": 1.4296268065130759e-05, "loss": 6.0422, "step": 5290 }, { "epoch": 0.57, "learning_rate": 1.4275321709037371e-05, "loss": 6.0672, "step": 5300 }, { "epoch": 0.57, "learning_rate": 1.425435237423313e-05, "loss": 6.0785, "step": 5310 }, { "epoch": 0.57, "learning_rate": 1.4233360173422602e-05, "loss": 6.0828, "step": 5320 }, { "epoch": 0.57, "learning_rate": 1.421234521943325e-05, "loss": 6.0781, "step": 5330 }, { "epoch": 0.57, "learning_rate": 1.4191307625214834e-05, "loss": 6.0445, "step": 5340 }, { "epoch": 0.57, "learning_rate": 1.41702475038388e-05, "loss": 6.0738, "step": 5350 }, { "epoch": 0.58, "learning_rate": 1.4149164968497661e-05, "loss": 6.0773, "step": 5360 }, { "epoch": 0.58, "learning_rate": 1.4128060132504413e-05, "loss": 6.0598, "step": 5370 }, { "epoch": 0.58, "learning_rate": 1.4106933109291903e-05, "loss": 6.0621, "step": 5380 }, { "epoch": 0.58, "learning_rate": 1.4085784012412232e-05, "loss": 6.0539, "step": 5390 }, { "epoch": 0.58, "learning_rate": 1.4064612955536138e-05, "loss": 6.0656, "step": 5400 }, { "epoch": 0.58, "learning_rate": 1.4043420052452393e-05, "loss": 6.0668, "step": 5410 }, { "epoch": 0.58, "learning_rate": 1.4022205417067178e-05, "loss": 6.098, "step": 5420 }, { "epoch": 0.58, "learning_rate": 1.4000969163403491e-05, "loss": 6.0449, "step": 5430 }, { "epoch": 0.58, "learning_rate": 1.3979711405600512e-05, "loss": 6.0687, "step": 5440 }, { "epoch": 0.59, "learning_rate": 1.3958432257913005e-05, "loss": 6.0707, "step": 5450 }, { "epoch": 0.59, "learning_rate": 1.3937131834710702e-05, "loss": 6.084, "step": 5460 }, { "epoch": 0.59, "learning_rate": 1.3915810250477679e-05, "loss": 6.066, "step": 5470 }, { "epoch": 0.59, "learning_rate": 1.3894467619811746e-05, "loss": 6.0598, "step": 5480 }, { "epoch": 0.59, "learning_rate": 1.387310405742384e-05, "loss": 6.0687, "step": 5490 }, { "epoch": 0.59, "learning_rate": 1.3851719678137395e-05, "loss": 6.0605, "step": 5500 }, { "epoch": 0.59, "learning_rate": 1.3830314596887728e-05, "loss": 6.0625, "step": 5510 }, { "epoch": 0.59, "learning_rate": 1.380888892872143e-05, "loss": 6.0684, "step": 5520 }, { "epoch": 0.59, "learning_rate": 1.3787442788795733e-05, "loss": 6.0762, "step": 5530 }, { "epoch": 0.59, "learning_rate": 1.3765976292377907e-05, "loss": 6.0555, "step": 5540 }, { "epoch": 0.6, "learning_rate": 1.3744489554844633e-05, "loss": 6.0848, "step": 5550 }, { "epoch": 0.6, "learning_rate": 1.3722982691681372e-05, "loss": 6.0367, "step": 5560 }, { "epoch": 0.6, "learning_rate": 1.3701455818481767e-05, "loss": 6.0672, "step": 5570 }, { "epoch": 0.6, "learning_rate": 1.3679909050947003e-05, "loss": 6.0445, "step": 5580 }, { "epoch": 0.6, "learning_rate": 1.3658342504885192e-05, "loss": 6.0809, "step": 5590 }, { "epoch": 0.6, "learning_rate": 1.3636756296210751e-05, "loss": 6.0789, "step": 5600 }, { "epoch": 0.6, "learning_rate": 1.3615150540943785e-05, "loss": 6.0805, "step": 5610 }, { "epoch": 0.6, "learning_rate": 1.3593525355209444e-05, "loss": 6.0609, "step": 5620 }, { "epoch": 0.6, "learning_rate": 1.3571880855237324e-05, "loss": 6.0566, "step": 5630 }, { "epoch": 0.61, "learning_rate": 1.3550217157360816e-05, "loss": 6.059, "step": 5640 }, { "epoch": 0.61, "learning_rate": 1.3528534378016509e-05, "loss": 6.0766, "step": 5650 }, { "epoch": 0.61, "learning_rate": 1.3506832633743543e-05, "loss": 6.0742, "step": 5660 }, { "epoch": 0.61, "learning_rate": 1.3485112041182988e-05, "loss": 6.0438, "step": 5670 }, { "epoch": 0.61, "learning_rate": 1.3463372717077221e-05, "loss": 6.0406, "step": 5680 }, { "epoch": 0.61, "learning_rate": 1.3441614778269297e-05, "loss": 6.0574, "step": 5690 }, { "epoch": 0.61, "learning_rate": 1.3419838341702314e-05, "loss": 6.0656, "step": 5700 }, { "epoch": 0.61, "learning_rate": 1.33980435244188e-05, "loss": 6.0535, "step": 5710 }, { "epoch": 0.61, "learning_rate": 1.3376230443560066e-05, "loss": 6.0488, "step": 5720 }, { "epoch": 0.62, "learning_rate": 1.335439921636559e-05, "loss": 6.0727, "step": 5730 }, { "epoch": 0.62, "learning_rate": 1.333254996017238e-05, "loss": 6.1, "step": 5740 }, { "epoch": 0.62, "learning_rate": 1.3310682792414344e-05, "loss": 6.0625, "step": 5750 }, { "epoch": 0.62, "learning_rate": 1.3288797830621661e-05, "loss": 6.0465, "step": 5760 }, { "epoch": 0.62, "learning_rate": 1.3266895192420149e-05, "loss": 6.0668, "step": 5770 }, { "epoch": 0.62, "learning_rate": 1.324497499553063e-05, "loss": 6.0781, "step": 5780 }, { "epoch": 0.62, "learning_rate": 1.32230373577683e-05, "loss": 6.0762, "step": 5790 }, { "epoch": 0.62, "learning_rate": 1.3201082397042097e-05, "loss": 6.0711, "step": 5800 }, { "epoch": 0.62, "learning_rate": 1.3179110231354062e-05, "loss": 6.0719, "step": 5810 }, { "epoch": 0.62, "learning_rate": 1.315712097879871e-05, "loss": 6.0715, "step": 5820 }, { "epoch": 0.63, "learning_rate": 1.313511475756239e-05, "loss": 6.0609, "step": 5830 }, { "epoch": 0.63, "learning_rate": 1.3113091685922663e-05, "loss": 6.0629, "step": 5840 }, { "epoch": 0.63, "learning_rate": 1.3091051882247646e-05, "loss": 6.0691, "step": 5850 }, { "epoch": 0.63, "learning_rate": 1.3068995464995387e-05, "loss": 6.0664, "step": 5860 }, { "epoch": 0.63, "learning_rate": 1.3046922552713232e-05, "loss": 6.0926, "step": 5870 }, { "epoch": 0.63, "learning_rate": 1.3024833264037185e-05, "loss": 6.0512, "step": 5880 }, { "epoch": 0.63, "learning_rate": 1.300272771769126e-05, "loss": 6.0441, "step": 5890 }, { "epoch": 0.63, "learning_rate": 1.2980606032486862e-05, "loss": 6.0664, "step": 5900 }, { "epoch": 0.63, "learning_rate": 1.295846832732213e-05, "loss": 6.0781, "step": 5910 }, { "epoch": 0.64, "learning_rate": 1.2936314721181309e-05, "loss": 6.0445, "step": 5920 }, { "epoch": 0.64, "learning_rate": 1.291414533313411e-05, "loss": 6.0512, "step": 5930 }, { "epoch": 0.64, "learning_rate": 1.2891960282335063e-05, "loss": 6.0586, "step": 5940 }, { "epoch": 0.64, "learning_rate": 1.2869759688022882e-05, "loss": 6.0816, "step": 5950 }, { "epoch": 0.64, "learning_rate": 1.2847543669519828e-05, "loss": 6.0602, "step": 5960 }, { "epoch": 0.64, "learning_rate": 1.2825312346231058e-05, "loss": 6.0816, "step": 5970 }, { "epoch": 0.64, "learning_rate": 1.2803065837643987e-05, "loss": 6.0789, "step": 5980 }, { "epoch": 0.64, "learning_rate": 1.2780804263327653e-05, "loss": 6.0754, "step": 5990 }, { "epoch": 0.64, "learning_rate": 1.2758527742932061e-05, "loss": 6.0805, "step": 6000 }, { "epoch": 0.65, "learning_rate": 1.2736236396187554e-05, "loss": 6.0617, "step": 6010 }, { "epoch": 0.65, "learning_rate": 1.2718392724330404e-05, "loss": 6.0637, "step": 6020 }, { "epoch": 0.65, "learning_rate": 1.2696074992132255e-05, "loss": 6.0383, "step": 6030 }, { "epoch": 0.65, "learning_rate": 1.2673742769252024e-05, "loss": 6.0625, "step": 6040 }, { "epoch": 0.65, "learning_rate": 1.2651396175719437e-05, "loss": 6.0523, "step": 6050 }, { "epoch": 0.65, "learning_rate": 1.2629035331641457e-05, "loss": 6.0496, "step": 6060 }, { "epoch": 0.65, "learning_rate": 1.260666035720164e-05, "loss": 6.0875, "step": 6070 }, { "epoch": 0.65, "learning_rate": 1.2584271372659495e-05, "loss": 6.0566, "step": 6080 }, { "epoch": 0.65, "learning_rate": 1.256186849834982e-05, "loss": 6.0273, "step": 6090 }, { "epoch": 0.65, "learning_rate": 1.2539451854682078e-05, "loss": 6.0637, "step": 6100 }, { "epoch": 0.66, "learning_rate": 1.2517021562139732e-05, "loss": 6.0664, "step": 6110 }, { "epoch": 0.66, "learning_rate": 1.249457774127961e-05, "loss": 6.0543, "step": 6120 }, { "epoch": 0.66, "learning_rate": 1.2472120512731239e-05, "loss": 6.0859, "step": 6130 }, { "epoch": 0.66, "learning_rate": 1.2449649997196223e-05, "loss": 6.0438, "step": 6140 }, { "epoch": 0.66, "learning_rate": 1.2427166315447572e-05, "loss": 6.0711, "step": 6150 }, { "epoch": 0.66, "learning_rate": 1.240466958832906e-05, "loss": 6.0512, "step": 6160 }, { "epoch": 0.66, "learning_rate": 1.2382159936754583e-05, "loss": 6.0273, "step": 6170 }, { "epoch": 0.66, "learning_rate": 1.2359637481707499e-05, "loss": 6.0586, "step": 6180 }, { "epoch": 0.66, "learning_rate": 1.233710234423998e-05, "loss": 6.0613, "step": 6190 }, { "epoch": 0.67, "learning_rate": 1.231455464547236e-05, "loss": 6.082, "step": 6200 }, { "epoch": 0.67, "learning_rate": 1.2291994506592493e-05, "loss": 6.0703, "step": 6210 }, { "epoch": 0.67, "learning_rate": 1.2269422048855093e-05, "loss": 6.1051, "step": 6220 }, { "epoch": 0.67, "learning_rate": 1.2246837393581081e-05, "loss": 6.066, "step": 6230 }, { "epoch": 0.67, "learning_rate": 1.2224240662156943e-05, "loss": 6.057, "step": 6240 }, { "epoch": 0.67, "learning_rate": 1.2201631976034064e-05, "loss": 6.0687, "step": 6250 }, { "epoch": 0.67, "learning_rate": 1.217901145672809e-05, "loss": 6.0809, "step": 6260 }, { "epoch": 0.67, "learning_rate": 1.2156379225818257e-05, "loss": 6.0367, "step": 6270 }, { "epoch": 0.67, "learning_rate": 1.213373540494676e-05, "loss": 6.0555, "step": 6280 }, { "epoch": 0.68, "learning_rate": 1.2111080115818076e-05, "loss": 6.052, "step": 6290 }, { "epoch": 0.68, "learning_rate": 1.2088413480198326e-05, "loss": 6.0613, "step": 6300 }, { "epoch": 0.68, "learning_rate": 1.2065735619914618e-05, "loss": 6.059, "step": 6310 }, { "epoch": 0.68, "learning_rate": 1.2043046656854385e-05, "loss": 6.0816, "step": 6320 }, { "epoch": 0.68, "learning_rate": 1.2020346712964732e-05, "loss": 6.0734, "step": 6330 }, { "epoch": 0.68, "learning_rate": 1.1997635910251793e-05, "loss": 6.0582, "step": 6340 }, { "epoch": 0.68, "learning_rate": 1.1974914370780053e-05, "loss": 6.0648, "step": 6350 }, { "epoch": 0.68, "learning_rate": 1.1952182216671715e-05, "loss": 6.0746, "step": 6360 }, { "epoch": 0.68, "learning_rate": 1.1929439570106028e-05, "loss": 6.0434, "step": 6370 }, { "epoch": 0.68, "learning_rate": 1.1906686553318632e-05, "loss": 6.084, "step": 6380 }, { "epoch": 0.69, "learning_rate": 1.188392328860091e-05, "loss": 6.0395, "step": 6390 }, { "epoch": 0.69, "learning_rate": 1.186114989829932e-05, "loss": 6.0074, "step": 6400 }, { "epoch": 0.69, "learning_rate": 1.1838366504814749e-05, "loss": 6.0477, "step": 6410 }, { "epoch": 0.69, "learning_rate": 1.181557323060184e-05, "loss": 6.0625, "step": 6420 }, { "epoch": 0.69, "learning_rate": 1.1792770198168348e-05, "loss": 6.0648, "step": 6430 }, { "epoch": 0.69, "learning_rate": 1.1769957530074474e-05, "loss": 6.0895, "step": 6440 }, { "epoch": 0.69, "learning_rate": 1.1747135348932208e-05, "loss": 6.0687, "step": 6450 }, { "epoch": 0.69, "learning_rate": 1.1724303777404671e-05, "loss": 6.066, "step": 6460 }, { "epoch": 0.69, "learning_rate": 1.1701462938205455e-05, "loss": 6.0695, "step": 6470 }, { "epoch": 0.7, "learning_rate": 1.167861295409796e-05, "loss": 6.059, "step": 6480 }, { "epoch": 0.7, "learning_rate": 1.1655753947894743e-05, "loss": 6.0645, "step": 6490 }, { "epoch": 0.7, "learning_rate": 1.1632886042456847e-05, "loss": 6.0977, "step": 6500 }, { "epoch": 0.7, "learning_rate": 1.1610009360693151e-05, "loss": 6.0563, "step": 6510 }, { "epoch": 0.7, "learning_rate": 1.1587124025559697e-05, "loss": 6.0621, "step": 6520 }, { "epoch": 0.7, "learning_rate": 1.1564230160059044e-05, "loss": 6.0812, "step": 6530 }, { "epoch": 0.7, "learning_rate": 1.1541327887239597e-05, "loss": 6.0484, "step": 6540 }, { "epoch": 0.7, "learning_rate": 1.1518417330194949e-05, "loss": 6.041, "step": 6550 }, { "epoch": 0.7, "learning_rate": 1.1495498612063212e-05, "loss": 6.0566, "step": 6560 }, { "epoch": 0.71, "learning_rate": 1.147257185602637e-05, "loss": 6.0738, "step": 6570 }, { "epoch": 0.71, "learning_rate": 1.144963718530961e-05, "loss": 6.0492, "step": 6580 }, { "epoch": 0.71, "learning_rate": 1.1426694723180647e-05, "loss": 6.0906, "step": 6590 }, { "epoch": 0.71, "learning_rate": 1.1403744592949082e-05, "loss": 6.0648, "step": 6600 }, { "epoch": 0.71, "learning_rate": 1.1380786917965727e-05, "loss": 6.0492, "step": 6610 }, { "epoch": 0.71, "learning_rate": 1.1357821821621948e-05, "loss": 6.0648, "step": 6620 }, { "epoch": 0.71, "learning_rate": 1.1334849427348992e-05, "loss": 6.0504, "step": 6630 }, { "epoch": 0.71, "learning_rate": 1.1311869858617342e-05, "loss": 6.057, "step": 6640 }, { "epoch": 0.71, "learning_rate": 1.1288883238936026e-05, "loss": 6.0496, "step": 6650 }, { "epoch": 0.72, "learning_rate": 1.1265889691851981e-05, "loss": 6.0852, "step": 6660 }, { "epoch": 0.72, "learning_rate": 1.1242889340949376e-05, "loss": 6.0457, "step": 6670 }, { "epoch": 0.72, "learning_rate": 1.1219882309848945e-05, "loss": 6.0379, "step": 6680 }, { "epoch": 0.72, "learning_rate": 1.1196868722207323e-05, "loss": 6.0691, "step": 6690 }, { "epoch": 0.72, "learning_rate": 1.1173848701716396e-05, "loss": 6.0727, "step": 6700 }, { "epoch": 0.72, "learning_rate": 1.1150822372102612e-05, "loss": 6.0672, "step": 6710 }, { "epoch": 0.72, "learning_rate": 1.1127789857126341e-05, "loss": 6.0758, "step": 6720 }, { "epoch": 0.72, "learning_rate": 1.1104751280581187e-05, "loss": 6.0453, "step": 6730 }, { "epoch": 0.72, "learning_rate": 1.1081706766293339e-05, "loss": 6.0609, "step": 6740 }, { "epoch": 0.72, "learning_rate": 1.1058656438120898e-05, "loss": 6.0738, "step": 6750 }, { "epoch": 0.73, "learning_rate": 1.1035600419953216e-05, "loss": 6.084, "step": 6760 }, { "epoch": 0.73, "learning_rate": 1.1012538835710223e-05, "loss": 6.0277, "step": 6770 }, { "epoch": 0.73, "learning_rate": 1.0989471809341764e-05, "loss": 6.0465, "step": 6780 }, { "epoch": 0.73, "learning_rate": 1.0966399464826944e-05, "loss": 6.0648, "step": 6790 }, { "epoch": 0.73, "learning_rate": 1.0943321926173441e-05, "loss": 6.0539, "step": 6800 }, { "epoch": 0.73, "learning_rate": 1.0920239317416851e-05, "loss": 6.0652, "step": 6810 }, { "epoch": 0.73, "learning_rate": 1.0897151762620028e-05, "loss": 6.0965, "step": 6820 }, { "epoch": 0.73, "learning_rate": 1.0874059385872403e-05, "loss": 6.0551, "step": 6830 }, { "epoch": 0.73, "learning_rate": 1.0850962311289322e-05, "loss": 6.0621, "step": 6840 }, { "epoch": 0.74, "learning_rate": 1.082786066301139e-05, "loss": 6.0477, "step": 6850 }, { "epoch": 0.74, "learning_rate": 1.0804754565203784e-05, "loss": 6.0488, "step": 6860 }, { "epoch": 0.74, "learning_rate": 1.0781644142055603e-05, "loss": 6.0551, "step": 6870 }, { "epoch": 0.74, "learning_rate": 1.075852951777919e-05, "loss": 6.0863, "step": 6880 }, { "epoch": 0.74, "learning_rate": 1.0735410816609468e-05, "loss": 6.0699, "step": 6890 }, { "epoch": 0.74, "learning_rate": 1.0712288162803278e-05, "loss": 6.0406, "step": 6900 }, { "epoch": 0.74, "learning_rate": 1.0689161680638692e-05, "loss": 6.0809, "step": 6910 }, { "epoch": 0.74, "learning_rate": 1.0666031494414377e-05, "loss": 6.0766, "step": 6920 }, { "epoch": 0.74, "learning_rate": 1.0642897728448893e-05, "loss": 6.0379, "step": 6930 }, { "epoch": 0.75, "learning_rate": 1.0619760507080045e-05, "loss": 6.0586, "step": 6940 }, { "epoch": 0.75, "learning_rate": 1.059661995466421e-05, "loss": 6.0617, "step": 6950 }, { "epoch": 0.75, "learning_rate": 1.0573476195575673e-05, "loss": 6.1004, "step": 6960 }, { "epoch": 0.75, "learning_rate": 1.0550329354205948e-05, "loss": 6.0496, "step": 6970 }, { "epoch": 0.75, "learning_rate": 1.0527179554963116e-05, "loss": 6.057, "step": 6980 }, { "epoch": 0.75, "learning_rate": 1.0504026922271156e-05, "loss": 6.0441, "step": 6990 }, { "epoch": 0.75, "learning_rate": 1.0480871580569281e-05, "loss": 6.0758, "step": 7000 }, { "epoch": 0.75, "learning_rate": 1.0457713654311255e-05, "loss": 6.0684, "step": 7010 }, { "epoch": 0.75, "learning_rate": 1.0439185536066226e-05, "loss": 6.066, "step": 7020 }, { "epoch": 0.75, "learning_rate": 1.0416023271274547e-05, "loss": 6.0633, "step": 7030 }, { "epoch": 0.76, "learning_rate": 1.0392858770469041e-05, "loss": 6.082, "step": 7040 }, { "epoch": 0.76, "learning_rate": 1.0369692158152705e-05, "loss": 6.0398, "step": 7050 }, { "epoch": 0.76, "learning_rate": 1.0346523558839883e-05, "loss": 6.0672, "step": 7060 }, { "epoch": 0.76, "learning_rate": 1.0323353097055601e-05, "loss": 6.0828, "step": 7070 }, { "epoch": 0.76, "learning_rate": 1.0300180897334897e-05, "loss": 6.05, "step": 7080 }, { "epoch": 0.76, "learning_rate": 1.0277007084222145e-05, "loss": 6.0773, "step": 7090 }, { "epoch": 0.76, "learning_rate": 1.0253831782270395e-05, "loss": 6.0387, "step": 7100 }, { "epoch": 0.76, "learning_rate": 1.0230655116040695e-05, "loss": 6.0566, "step": 7110 }, { "epoch": 0.76, "learning_rate": 1.020747721010143e-05, "loss": 6.0617, "step": 7120 }, { "epoch": 0.77, "learning_rate": 1.0184298189027648e-05, "loss": 6.0375, "step": 7130 }, { "epoch": 0.77, "learning_rate": 1.0161118177400384e-05, "loss": 6.0359, "step": 7140 }, { "epoch": 0.77, "learning_rate": 1.0137937299806005e-05, "loss": 6.0812, "step": 7150 }, { "epoch": 0.77, "learning_rate": 1.0114755680835524e-05, "loss": 6.0723, "step": 7160 }, { "epoch": 0.77, "learning_rate": 1.0091573445083951e-05, "loss": 6.0695, "step": 7170 }, { "epoch": 0.77, "learning_rate": 1.0068390717149597e-05, "loss": 6.0531, "step": 7180 }, { "epoch": 0.77, "learning_rate": 1.004520762163343e-05, "loss": 6.0613, "step": 7190 }, { "epoch": 0.77, "learning_rate": 1.0022024283138385e-05, "loss": 6.0469, "step": 7200 }, { "epoch": 0.77, "learning_rate": 9.998840826268708e-06, "loss": 6.0887, "step": 7210 }, { "epoch": 0.78, "learning_rate": 9.975657375629279e-06, "loss": 6.0621, "step": 7220 }, { "epoch": 0.78, "learning_rate": 9.952474055824947e-06, "loss": 6.084, "step": 7230 }, { "epoch": 0.78, "learning_rate": 9.929290991459855e-06, "loss": 6.0441, "step": 7240 }, { "epoch": 0.78, "learning_rate": 9.906108307136776e-06, "loss": 6.0609, "step": 7250 }, { "epoch": 0.78, "learning_rate": 9.882926127456437e-06, "loss": 6.0621, "step": 7260 }, { "epoch": 0.78, "learning_rate": 9.859744577016852e-06, "loss": 6.0531, "step": 7270 }, { "epoch": 0.78, "learning_rate": 9.83656378041266e-06, "loss": 6.0738, "step": 7280 }, { "epoch": 0.78, "learning_rate": 9.813383862234441e-06, "loss": 6.0812, "step": 7290 }, { "epoch": 0.78, "learning_rate": 9.790204947068054e-06, "loss": 6.0414, "step": 7300 }, { "epoch": 0.78, "learning_rate": 9.767027159493975e-06, "loss": 6.0625, "step": 7310 }, { "epoch": 0.79, "learning_rate": 9.743850624086608e-06, "loss": 6.057, "step": 7320 }, { "epoch": 0.79, "learning_rate": 9.720675465413639e-06, "loss": 6.0988, "step": 7330 }, { "epoch": 0.79, "learning_rate": 9.697501808035341e-06, "loss": 6.0418, "step": 7340 }, { "epoch": 0.79, "learning_rate": 9.674329776503932e-06, "loss": 6.0586, "step": 7350 }, { "epoch": 0.79, "learning_rate": 9.65115949536288e-06, "loss": 6.0746, "step": 7360 }, { "epoch": 0.79, "learning_rate": 9.627991089146249e-06, "loss": 6.0832, "step": 7370 }, { "epoch": 0.79, "learning_rate": 9.604824682378032e-06, "loss": 6.0473, "step": 7380 }, { "epoch": 0.79, "learning_rate": 9.581660399571466e-06, "loss": 6.0551, "step": 7390 }, { "epoch": 0.79, "learning_rate": 9.558498365228379e-06, "loss": 6.0707, "step": 7400 }, { "epoch": 0.8, "learning_rate": 9.53533870383851e-06, "loss": 6.0773, "step": 7410 }, { "epoch": 0.8, "learning_rate": 9.512181539878843e-06, "loss": 6.0504, "step": 7420 }, { "epoch": 0.8, "learning_rate": 9.489026997812946e-06, "loss": 6.0781, "step": 7430 }, { "epoch": 0.8, "learning_rate": 9.465875202090288e-06, "loss": 6.041, "step": 7440 }, { "epoch": 0.8, "learning_rate": 9.442726277145578e-06, "loss": 6.0574, "step": 7450 }, { "epoch": 0.8, "learning_rate": 9.419580347398102e-06, "loss": 6.0664, "step": 7460 }, { "epoch": 0.8, "learning_rate": 9.396437537251038e-06, "loss": 6.0613, "step": 7470 }, { "epoch": 0.8, "learning_rate": 9.373297971090803e-06, "loss": 6.0637, "step": 7480 }, { "epoch": 0.8, "learning_rate": 9.350161773286377e-06, "loss": 6.0715, "step": 7490 }, { "epoch": 0.81, "learning_rate": 9.327029068188636e-06, "loss": 6.0676, "step": 7500 }, { "epoch": 0.81, "learning_rate": 9.303899980129682e-06, "loss": 6.0578, "step": 7510 }, { "epoch": 0.81, "learning_rate": 9.280774633422183e-06, "loss": 6.0598, "step": 7520 }, { "epoch": 0.81, "learning_rate": 9.257653152358687e-06, "loss": 6.0488, "step": 7530 }, { "epoch": 0.81, "learning_rate": 9.234535661210975e-06, "loss": 6.0543, "step": 7540 }, { "epoch": 0.81, "learning_rate": 9.21142228422938e-06, "loss": 6.0477, "step": 7550 }, { "epoch": 0.81, "learning_rate": 9.18831314564212e-06, "loss": 6.0438, "step": 7560 }, { "epoch": 0.81, "learning_rate": 9.165208369654636e-06, "loss": 6.0848, "step": 7570 }, { "epoch": 0.81, "learning_rate": 9.14210808044892e-06, "loss": 6.0656, "step": 7580 }, { "epoch": 0.81, "learning_rate": 9.119012402182851e-06, "loss": 6.0582, "step": 7590 }, { "epoch": 0.82, "learning_rate": 9.09592145898952e-06, "loss": 6.0762, "step": 7600 }, { "epoch": 0.82, "learning_rate": 9.072835374976573e-06, "loss": 6.0777, "step": 7610 }, { "epoch": 0.82, "learning_rate": 9.049754274225536e-06, "loss": 6.0641, "step": 7620 }, { "epoch": 0.82, "learning_rate": 9.026678280791157e-06, "loss": 6.0488, "step": 7630 }, { "epoch": 0.82, "learning_rate": 9.003607518700726e-06, "loss": 6.0465, "step": 7640 }, { "epoch": 0.82, "learning_rate": 8.98054211195342e-06, "loss": 6.0781, "step": 7650 }, { "epoch": 0.82, "learning_rate": 8.957482184519635e-06, "loss": 6.0633, "step": 7660 }, { "epoch": 0.82, "learning_rate": 8.93442786034031e-06, "loss": 6.0531, "step": 7670 }, { "epoch": 0.82, "learning_rate": 8.911379263326275e-06, "loss": 6.0781, "step": 7680 }, { "epoch": 0.83, "learning_rate": 8.888336517357574e-06, "loss": 6.0863, "step": 7690 }, { "epoch": 0.83, "learning_rate": 8.865299746282805e-06, "loss": 6.0824, "step": 7700 }, { "epoch": 0.83, "learning_rate": 8.84226907391845e-06, "loss": 6.0797, "step": 7710 }, { "epoch": 0.83, "learning_rate": 8.819244624048216e-06, "loss": 6.0871, "step": 7720 }, { "epoch": 0.83, "learning_rate": 8.796226520422364e-06, "loss": 6.0461, "step": 7730 }, { "epoch": 0.83, "learning_rate": 8.773214886757045e-06, "loss": 6.0598, "step": 7740 }, { "epoch": 0.83, "learning_rate": 8.750209846733634e-06, "loss": 6.0531, "step": 7750 }, { "epoch": 0.83, "learning_rate": 8.72721152399807e-06, "loss": 6.0422, "step": 7760 }, { "epoch": 0.83, "learning_rate": 8.704220042160188e-06, "loss": 6.0687, "step": 7770 }, { "epoch": 0.84, "learning_rate": 8.681235524793052e-06, "loss": 6.0547, "step": 7780 }, { "epoch": 0.84, "learning_rate": 8.658258095432302e-06, "loss": 6.0434, "step": 7790 }, { "epoch": 0.84, "learning_rate": 8.635287877575471e-06, "loss": 6.0555, "step": 7800 }, { "epoch": 0.84, "learning_rate": 8.61232499468134e-06, "loss": 6.0824, "step": 7810 }, { "epoch": 0.84, "learning_rate": 8.58936957016926e-06, "loss": 6.057, "step": 7820 }, { "epoch": 0.84, "learning_rate": 8.566421727418504e-06, "loss": 6.0461, "step": 7830 }, { "epoch": 0.84, "learning_rate": 8.543481589767586e-06, "loss": 6.0723, "step": 7840 }, { "epoch": 0.84, "learning_rate": 8.520549280513612e-06, "loss": 6.0836, "step": 7850 }, { "epoch": 0.84, "learning_rate": 8.497624922911613e-06, "loss": 6.0816, "step": 7860 }, { "epoch": 0.84, "learning_rate": 8.474708640173878e-06, "loss": 6.0609, "step": 7870 }, { "epoch": 0.85, "learning_rate": 8.4518005554693e-06, "loss": 6.0523, "step": 7880 }, { "epoch": 0.85, "learning_rate": 8.428900791922707e-06, "loss": 6.0352, "step": 7890 }, { "epoch": 0.85, "learning_rate": 8.406009472614205e-06, "loss": 6.0711, "step": 7900 }, { "epoch": 0.85, "learning_rate": 8.383126720578513e-06, "loss": 6.0758, "step": 7910 }, { "epoch": 0.85, "learning_rate": 8.360252658804304e-06, "loss": 6.0695, "step": 7920 }, { "epoch": 0.85, "learning_rate": 8.337387410233544e-06, "loss": 6.05, "step": 7930 }, { "epoch": 0.85, "learning_rate": 8.314531097760827e-06, "loss": 6.0324, "step": 7940 }, { "epoch": 0.85, "learning_rate": 8.291683844232721e-06, "loss": 6.0859, "step": 7950 }, { "epoch": 0.85, "learning_rate": 8.268845772447106e-06, "loss": 6.0648, "step": 7960 }, { "epoch": 0.86, "learning_rate": 8.246017005152508e-06, "loss": 6.0598, "step": 7970 }, { "epoch": 0.86, "learning_rate": 8.223197665047447e-06, "loss": 6.0504, "step": 7980 }, { "epoch": 0.86, "learning_rate": 8.200387874779771e-06, "loss": 6.0684, "step": 7990 }, { "epoch": 0.86, "learning_rate": 8.177587756946008e-06, "loss": 6.0723, "step": 8000 }, { "epoch": 0.86, "learning_rate": 8.15479743409069e-06, "loss": 6.0941, "step": 8010 }, { "epoch": 0.86, "learning_rate": 8.136572310507251e-06, "loss": 6.073, "step": 8020 }, { "epoch": 0.86, "learning_rate": 8.113799927256902e-06, "loss": 6.0602, "step": 8030 }, { "epoch": 0.86, "learning_rate": 8.091037681827527e-06, "loss": 6.0617, "step": 8040 }, { "epoch": 0.86, "learning_rate": 8.068285696560103e-06, "loss": 6.0793, "step": 8050 }, { "epoch": 0.87, "learning_rate": 8.045544093740472e-06, "loss": 6.0555, "step": 8060 }, { "epoch": 0.87, "learning_rate": 8.022812995598664e-06, "loss": 6.0637, "step": 8070 }, { "epoch": 0.87, "learning_rate": 8.000092524308252e-06, "loss": 6.0867, "step": 8080 }, { "epoch": 0.87, "learning_rate": 7.977382801985697e-06, "loss": 6.0465, "step": 8090 }, { "epoch": 0.87, "learning_rate": 7.95468395068968e-06, "loss": 6.0605, "step": 8100 }, { "epoch": 0.87, "learning_rate": 7.931996092420458e-06, "loss": 6.0438, "step": 8110 }, { "epoch": 0.87, "learning_rate": 7.9093193491192e-06, "loss": 6.0602, "step": 8120 }, { "epoch": 0.87, "learning_rate": 7.88665384266734e-06, "loss": 6.0512, "step": 8130 }, { "epoch": 0.87, "learning_rate": 7.863999694885911e-06, "loss": 6.0555, "step": 8140 }, { "epoch": 0.88, "learning_rate": 7.8413570275349e-06, "loss": 6.0625, "step": 8150 }, { "epoch": 0.88, "learning_rate": 7.81872596231259e-06, "loss": 6.0555, "step": 8160 }, { "epoch": 0.88, "learning_rate": 7.796106620854902e-06, "loss": 6.0285, "step": 8170 }, { "epoch": 0.88, "learning_rate": 7.77349912473475e-06, "loss": 6.0684, "step": 8180 }, { "epoch": 0.88, "learning_rate": 7.750903595461376e-06, "loss": 6.0668, "step": 8190 }, { "epoch": 0.88, "learning_rate": 7.728320154479712e-06, "loss": 6.0996, "step": 8200 }, { "epoch": 0.88, "learning_rate": 7.705748923169711e-06, "loss": 6.084, "step": 8210 }, { "epoch": 0.88, "learning_rate": 7.683190022845704e-06, "loss": 6.0563, "step": 8220 }, { "epoch": 0.88, "learning_rate": 7.660643574755751e-06, "loss": 6.0223, "step": 8230 }, { "epoch": 0.88, "learning_rate": 7.63810970008098e-06, "loss": 6.0863, "step": 8240 }, { "epoch": 0.89, "learning_rate": 7.615588519934938e-06, "loss": 6.0617, "step": 8250 }, { "epoch": 0.89, "learning_rate": 7.593080155362949e-06, "loss": 6.0738, "step": 8260 }, { "epoch": 0.89, "learning_rate": 7.570584727341451e-06, "loss": 6.1051, "step": 8270 }, { "epoch": 0.89, "learning_rate": 7.548102356777356e-06, "loss": 6.0566, "step": 8280 }, { "epoch": 0.89, "learning_rate": 7.52563316450739e-06, "loss": 6.0719, "step": 8290 }, { "epoch": 0.89, "learning_rate": 7.5031772712974575e-06, "loss": 6.0516, "step": 8300 }, { "epoch": 0.89, "learning_rate": 7.4807347978419754e-06, "loss": 6.068, "step": 8310 }, { "epoch": 0.89, "learning_rate": 7.458305864763238e-06, "loss": 6.0652, "step": 8320 }, { "epoch": 0.89, "learning_rate": 7.435890592610764e-06, "loss": 6.0605, "step": 8330 }, { "epoch": 0.9, "learning_rate": 7.413489101860642e-06, "loss": 6.0855, "step": 8340 }, { "epoch": 0.9, "learning_rate": 7.3911015129149e-06, "loss": 6.0824, "step": 8350 }, { "epoch": 0.9, "learning_rate": 7.368727946100837e-06, "loss": 6.0805, "step": 8360 }, { "epoch": 0.9, "learning_rate": 7.346368521670396e-06, "loss": 6.0691, "step": 8370 }, { "epoch": 0.9, "learning_rate": 7.3240233597995e-06, "loss": 6.0656, "step": 8380 }, { "epoch": 0.9, "learning_rate": 7.3016925805874196e-06, "loss": 6.0633, "step": 8390 }, { "epoch": 0.9, "learning_rate": 7.279376304056121e-06, "loss": 6.0461, "step": 8400 }, { "epoch": 0.9, "learning_rate": 7.257074650149622e-06, "loss": 6.0367, "step": 8410 }, { "epoch": 0.9, "learning_rate": 7.234787738733351e-06, "loss": 6.0711, "step": 8420 }, { "epoch": 0.91, "learning_rate": 7.2125156895934936e-06, "loss": 6.0535, "step": 8430 }, { "epoch": 0.91, "learning_rate": 7.190258622436359e-06, "loss": 6.0566, "step": 8440 }, { "epoch": 0.91, "learning_rate": 7.1680166568877304e-06, "loss": 6.0527, "step": 8450 }, { "epoch": 0.91, "learning_rate": 7.145789912492227e-06, "loss": 6.0687, "step": 8460 }, { "epoch": 0.91, "learning_rate": 7.123578508712652e-06, "loss": 6.0715, "step": 8470 }, { "epoch": 0.91, "learning_rate": 7.101382564929365e-06, "loss": 6.0637, "step": 8480 }, { "epoch": 0.91, "learning_rate": 7.079202200439625e-06, "loss": 6.0809, "step": 8490 }, { "epoch": 0.91, "learning_rate": 7.057037534456959e-06, "loss": 6.0855, "step": 8500 }, { "epoch": 0.91, "learning_rate": 7.034888686110523e-06, "loss": 6.0664, "step": 8510 }, { "epoch": 0.91, "learning_rate": 7.012755774444451e-06, "loss": 6.0859, "step": 8520 }, { "epoch": 0.92, "learning_rate": 6.990638918417224e-06, "loss": 6.0438, "step": 8530 }, { "epoch": 0.92, "learning_rate": 6.96853823690103e-06, "loss": 6.0734, "step": 8540 }, { "epoch": 0.92, "learning_rate": 6.946453848681121e-06, "loss": 6.0523, "step": 8550 }, { "epoch": 0.92, "learning_rate": 6.9243858724551774e-06, "loss": 6.0426, "step": 8560 }, { "epoch": 0.92, "learning_rate": 6.902334426832671e-06, "loss": 6.0676, "step": 8570 }, { "epoch": 0.92, "learning_rate": 6.880299630334224e-06, "loss": 6.0551, "step": 8580 }, { "epoch": 0.92, "learning_rate": 6.858281601390975e-06, "loss": 6.0504, "step": 8590 }, { "epoch": 0.92, "learning_rate": 6.83628045834394e-06, "loss": 6.0348, "step": 8600 }, { "epoch": 0.92, "learning_rate": 6.814296319443383e-06, "loss": 6.0727, "step": 8610 }, { "epoch": 0.93, "learning_rate": 6.792329302848169e-06, "loss": 6.0426, "step": 8620 }, { "epoch": 0.93, "learning_rate": 6.770379526625136e-06, "loss": 6.0582, "step": 8630 }, { "epoch": 0.93, "learning_rate": 6.7484471087484636e-06, "loss": 6.0605, "step": 8640 }, { "epoch": 0.93, "learning_rate": 6.726532167099034e-06, "loss": 6.0586, "step": 8650 }, { "epoch": 0.93, "learning_rate": 6.704634819463793e-06, "loss": 6.0531, "step": 8660 }, { "epoch": 0.93, "learning_rate": 6.682755183535135e-06, "loss": 6.0531, "step": 8670 }, { "epoch": 0.93, "learning_rate": 6.660893376910244e-06, "loss": 6.0574, "step": 8680 }, { "epoch": 0.93, "learning_rate": 6.639049517090491e-06, "loss": 6.0445, "step": 8690 }, { "epoch": 0.93, "learning_rate": 6.6172237214807775e-06, "loss": 6.0668, "step": 8700 }, { "epoch": 0.94, "learning_rate": 6.595416107388919e-06, "loss": 6.0625, "step": 8710 }, { "epoch": 0.94, "learning_rate": 6.573626792025009e-06, "loss": 6.0789, "step": 8720 }, { "epoch": 0.94, "learning_rate": 6.551855892500792e-06, "loss": 6.0617, "step": 8730 }, { "epoch": 0.94, "learning_rate": 6.530103525829024e-06, "loss": 6.0758, "step": 8740 }, { "epoch": 0.94, "learning_rate": 6.508369808922869e-06, "loss": 6.0699, "step": 8750 }, { "epoch": 0.94, "learning_rate": 6.4866548585952384e-06, "loss": 6.0676, "step": 8760 }, { "epoch": 0.94, "learning_rate": 6.464958791558182e-06, "loss": 6.0633, "step": 8770 }, { "epoch": 0.94, "learning_rate": 6.443281724422261e-06, "loss": 6.0492, "step": 8780 }, { "epoch": 0.94, "learning_rate": 6.421623773695913e-06, "loss": 6.0652, "step": 8790 }, { "epoch": 0.94, "learning_rate": 6.39998505578483e-06, "loss": 6.0867, "step": 8800 }, { "epoch": 0.95, "learning_rate": 6.3783656869913335e-06, "loss": 6.0602, "step": 8810 }, { "epoch": 0.95, "learning_rate": 6.356765783513752e-06, "loss": 6.0492, "step": 8820 }, { "epoch": 0.95, "learning_rate": 6.335185461445787e-06, "loss": 6.0496, "step": 8830 }, { "epoch": 0.95, "learning_rate": 6.313624836775902e-06, "loss": 6.066, "step": 8840 }, { "epoch": 0.95, "learning_rate": 6.292084025386685e-06, "loss": 6.0574, "step": 8850 }, { "epoch": 0.95, "learning_rate": 6.27056314305424e-06, "loss": 6.0613, "step": 8860 }, { "epoch": 0.95, "learning_rate": 6.249062305447553e-06, "loss": 6.066, "step": 8870 }, { "epoch": 0.95, "learning_rate": 6.227581628127877e-06, "loss": 6.0906, "step": 8880 }, { "epoch": 0.95, "learning_rate": 6.206121226548105e-06, "loss": 6.082, "step": 8890 }, { "epoch": 0.96, "learning_rate": 6.184681216052159e-06, "loss": 6.091, "step": 8900 }, { "epoch": 0.96, "learning_rate": 6.163261711874365e-06, "loss": 6.0473, "step": 8910 }, { "epoch": 0.96, "learning_rate": 6.141862829138823e-06, "loss": 6.0504, "step": 8920 }, { "epoch": 0.96, "learning_rate": 6.120484682858809e-06, "loss": 6.0543, "step": 8930 }, { "epoch": 0.96, "learning_rate": 6.099127387936141e-06, "loss": 6.0477, "step": 8940 }, { "epoch": 0.96, "learning_rate": 6.077791059160568e-06, "loss": 6.0777, "step": 8950 }, { "epoch": 0.96, "learning_rate": 6.056475811209153e-06, "loss": 6.0684, "step": 8960 }, { "epoch": 0.96, "learning_rate": 6.035181758645652e-06, "loss": 6.0426, "step": 8970 }, { "epoch": 0.96, "learning_rate": 6.013909015919901e-06, "loss": 6.0434, "step": 8980 }, { "epoch": 0.97, "learning_rate": 5.992657697367208e-06, "loss": 6.0734, "step": 8990 }, { "epoch": 0.97, "learning_rate": 5.971427917207722e-06, "loss": 6.0742, "step": 9000 }, { "epoch": 0.97, "learning_rate": 5.950219789545838e-06, "loss": 6.073, "step": 9010 }, { "epoch": 0.97, "learning_rate": 5.933268953818473e-06, "loss": 6.0773, "step": 9020 }, { "epoch": 0.97, "learning_rate": 5.912100087823469e-06, "loss": 6.0406, "step": 9030 }, { "epoch": 0.97, "learning_rate": 5.890953193197284e-06, "loss": 6.0602, "step": 9040 }, { "epoch": 0.97, "learning_rate": 5.869828383598818e-06, "loss": 6.0809, "step": 9050 }, { "epoch": 0.97, "learning_rate": 5.8487257725682756e-06, "loss": 6.0312, "step": 9060 }, { "epoch": 0.97, "learning_rate": 5.827645473526549e-06, "loss": 6.0629, "step": 9070 }, { "epoch": 0.97, "learning_rate": 5.806587599774602e-06, "loss": 6.0398, "step": 9080 }, { "epoch": 0.98, "learning_rate": 5.785552264492884e-06, "loss": 6.0652, "step": 9090 }, { "epoch": 0.98, "learning_rate": 5.764539580740687e-06, "loss": 6.0402, "step": 9100 }, { "epoch": 0.98, "learning_rate": 5.7435496614555764e-06, "loss": 6.0566, "step": 9110 }, { "epoch": 0.98, "learning_rate": 5.722582619452746e-06, "loss": 6.0563, "step": 9120 }, { "epoch": 0.98, "learning_rate": 5.701638567424447e-06, "loss": 6.0449, "step": 9130 }, { "epoch": 0.98, "learning_rate": 5.680717617939351e-06, "loss": 6.0469, "step": 9140 }, { "epoch": 0.98, "learning_rate": 5.659819883441975e-06, "loss": 6.059, "step": 9150 }, { "epoch": 0.98, "learning_rate": 5.638945476252044e-06, "loss": 6.0656, "step": 9160 }, { "epoch": 0.98, "learning_rate": 5.618094508563923e-06, "loss": 6.0656, "step": 9170 }, { "epoch": 0.99, "learning_rate": 5.597267092445979e-06, "loss": 6.0695, "step": 9180 }, { "epoch": 0.99, "learning_rate": 5.576463339840013e-06, "loss": 6.034, "step": 9190 }, { "epoch": 0.99, "learning_rate": 5.555683362560622e-06, "loss": 6.0699, "step": 9200 }, { "epoch": 0.99, "learning_rate": 5.534927272294637e-06, "loss": 6.0695, "step": 9210 }, { "epoch": 0.99, "learning_rate": 5.5141951806004815e-06, "loss": 6.043, "step": 9220 }, { "epoch": 0.99, "learning_rate": 5.493487198907615e-06, "loss": 6.0656, "step": 9230 }, { "epoch": 0.99, "learning_rate": 5.47280343851589e-06, "loss": 6.0617, "step": 9240 }, { "epoch": 0.99, "learning_rate": 5.452144010594998e-06, "loss": 6.0727, "step": 9250 }, { "epoch": 0.99, "learning_rate": 5.431509026183831e-06, "loss": 6.0746, "step": 9260 }, { "epoch": 1.0, "learning_rate": 5.41089859618992e-06, "loss": 6.0449, "step": 9270 }, { "epoch": 1.0, "learning_rate": 5.390312831388805e-06, "loss": 6.0375, "step": 9280 }, { "epoch": 1.0, "learning_rate": 5.369751842423474e-06, "loss": 6.0453, "step": 9290 }, { "epoch": 1.0, "learning_rate": 5.349215739803735e-06, "loss": 6.0547, "step": 9300 }, { "epoch": 1.0, "learning_rate": 5.328704633905662e-06, "loss": 6.0473, "step": 9310 }, { "epoch": 1.0, "learning_rate": 5.3082186349709495e-06, "loss": 6.066, "step": 9320 }, { "epoch": 1.0, "learning_rate": 5.287757853106377e-06, "loss": 6.0684, "step": 9330 }, { "epoch": 1.0, "learning_rate": 5.267322398283164e-06, "loss": 6.073, "step": 9340 }, { "epoch": 1.0, "learning_rate": 5.246912380336422e-06, "loss": 6.0559, "step": 9350 }, { "epoch": 1.0, "learning_rate": 5.226527908964534e-06, "loss": 6.0539, "step": 9360 }, { "epoch": 1.01, "learning_rate": 5.206169093728588e-06, "loss": 6.0598, "step": 9370 }, { "epoch": 1.01, "learning_rate": 5.185836044051767e-06, "loss": 6.0469, "step": 9380 }, { "epoch": 1.01, "learning_rate": 5.165528869218776e-06, "loss": 6.0734, "step": 9390 }, { "epoch": 1.01, "learning_rate": 5.145247678375251e-06, "loss": 6.1031, "step": 9400 }, { "epoch": 1.01, "learning_rate": 5.1249925805271686e-06, "loss": 6.1199, "step": 9410 }, { "epoch": 1.01, "learning_rate": 5.1047636845402594e-06, "loss": 6.0594, "step": 9420 }, { "epoch": 1.01, "learning_rate": 5.084561099139438e-06, "loss": 6.0523, "step": 9430 }, { "epoch": 1.01, "learning_rate": 5.064384932908186e-06, "loss": 6.0441, "step": 9440 }, { "epoch": 1.01, "learning_rate": 5.044235294288014e-06, "loss": 6.0598, "step": 9450 }, { "epoch": 1.02, "learning_rate": 5.024112291577832e-06, "loss": 6.0555, "step": 9460 }, { "epoch": 1.02, "learning_rate": 5.004016032933403e-06, "loss": 6.048, "step": 9470 }, { "epoch": 1.02, "learning_rate": 4.983946626366739e-06, "loss": 6.0832, "step": 9480 }, { "epoch": 1.02, "learning_rate": 4.963904179745538e-06, "loss": 6.0852, "step": 9490 }, { "epoch": 1.02, "learning_rate": 4.943888800792586e-06, "loss": 6.0695, "step": 9500 }, { "epoch": 1.02, "learning_rate": 4.923900597085196e-06, "loss": 6.0523, "step": 9510 }, { "epoch": 1.02, "learning_rate": 4.903939676054614e-06, "loss": 6.0629, "step": 9520 }, { "epoch": 1.02, "learning_rate": 4.884006144985457e-06, "loss": 6.0723, "step": 9530 }, { "epoch": 1.02, "learning_rate": 4.8641001110151185e-06, "loss": 6.0551, "step": 9540 }, { "epoch": 1.03, "learning_rate": 4.844221681133213e-06, "loss": 6.0723, "step": 9550 }, { "epoch": 1.03, "learning_rate": 4.82437096218098e-06, "loss": 6.084, "step": 9560 }, { "epoch": 1.03, "learning_rate": 4.804548060850731e-06, "loss": 6.0527, "step": 9570 }, { "epoch": 1.03, "learning_rate": 4.784753083685253e-06, "loss": 6.0852, "step": 9580 }, { "epoch": 1.03, "learning_rate": 4.764986137077261e-06, "loss": 6.0668, "step": 9590 }, { "epoch": 1.03, "learning_rate": 4.745247327268799e-06, "loss": 6.0766, "step": 9600 }, { "epoch": 1.03, "learning_rate": 4.725536760350701e-06, "loss": 6.0684, "step": 9610 }, { "epoch": 1.03, "learning_rate": 4.705854542261983e-06, "loss": 6.059, "step": 9620 }, { "epoch": 1.03, "learning_rate": 4.686200778789313e-06, "loss": 6.0395, "step": 9630 }, { "epoch": 1.04, "learning_rate": 4.666575575566405e-06, "loss": 6.0379, "step": 9640 }, { "epoch": 1.04, "learning_rate": 4.646979038073486e-06, "loss": 6.0262, "step": 9650 }, { "epoch": 1.04, "learning_rate": 4.627411271636697e-06, "loss": 6.0336, "step": 9660 }, { "epoch": 1.04, "learning_rate": 4.607872381427557e-06, "loss": 6.0793, "step": 9670 }, { "epoch": 1.04, "learning_rate": 4.588362472462368e-06, "loss": 6.0629, "step": 9680 }, { "epoch": 1.04, "learning_rate": 4.568881649601682e-06, "loss": 6.0723, "step": 9690 }, { "epoch": 1.04, "learning_rate": 4.549430017549703e-06, "loss": 6.0797, "step": 9700 }, { "epoch": 1.04, "learning_rate": 4.530007680853756e-06, "loss": 6.0531, "step": 9710 }, { "epoch": 1.04, "learning_rate": 4.51061474390371e-06, "loss": 6.0703, "step": 9720 }, { "epoch": 1.04, "learning_rate": 4.491251310931407e-06, "loss": 6.0699, "step": 9730 }, { "epoch": 1.05, "learning_rate": 4.47191748601013e-06, "loss": 6.0664, "step": 9740 }, { "epoch": 1.05, "learning_rate": 4.45261337305401e-06, "loss": 6.0828, "step": 9750 }, { "epoch": 1.05, "learning_rate": 4.433339075817498e-06, "loss": 6.0355, "step": 9760 }, { "epoch": 1.05, "learning_rate": 4.414094697894779e-06, "loss": 6.0551, "step": 9770 }, { "epoch": 1.05, "learning_rate": 4.394880342719248e-06, "loss": 6.0555, "step": 9780 }, { "epoch": 1.05, "learning_rate": 4.375696113562915e-06, "loss": 6.0559, "step": 9790 }, { "epoch": 1.05, "learning_rate": 4.356542113535892e-06, "loss": 6.0434, "step": 9800 }, { "epoch": 1.05, "learning_rate": 4.337418445585797e-06, "loss": 6.0785, "step": 9810 }, { "epoch": 1.05, "learning_rate": 4.3183252124972365e-06, "loss": 6.0602, "step": 9820 }, { "epoch": 1.06, "learning_rate": 4.299262516891235e-06, "loss": 6.0578, "step": 9830 }, { "epoch": 1.06, "learning_rate": 4.280230461224676e-06, "loss": 6.0754, "step": 9840 }, { "epoch": 1.06, "learning_rate": 4.261229147789777e-06, "loss": 6.0383, "step": 9850 }, { "epoch": 1.06, "learning_rate": 4.242258678713509e-06, "loss": 6.0645, "step": 9860 }, { "epoch": 1.06, "learning_rate": 4.223319155957078e-06, "loss": 6.0598, "step": 9870 }, { "epoch": 1.06, "learning_rate": 4.204410681315344e-06, "loss": 6.0629, "step": 9880 }, { "epoch": 1.06, "learning_rate": 4.185533356416311e-06, "loss": 6.0445, "step": 9890 }, { "epoch": 1.06, "learning_rate": 4.166687282720545e-06, "loss": 6.0281, "step": 9900 }, { "epoch": 1.06, "learning_rate": 4.147872561520658e-06, "loss": 6.0914, "step": 9910 }, { "epoch": 1.07, "learning_rate": 4.12908929394074e-06, "loss": 6.0781, "step": 9920 }, { "epoch": 1.07, "learning_rate": 4.110337580935836e-06, "loss": 6.0656, "step": 9930 }, { "epoch": 1.07, "learning_rate": 4.091617523291381e-06, "loss": 6.0648, "step": 9940 }, { "epoch": 1.07, "learning_rate": 4.072929221622689e-06, "loss": 6.0781, "step": 9950 }, { "epoch": 1.07, "learning_rate": 4.054272776374374e-06, "loss": 6.0648, "step": 9960 }, { "epoch": 1.07, "learning_rate": 4.035648287819848e-06, "loss": 6.0438, "step": 9970 }, { "epoch": 1.07, "learning_rate": 4.017055856060748e-06, "loss": 6.0199, "step": 9980 }, { "epoch": 1.07, "learning_rate": 3.9984955810264315e-06, "loss": 6.0652, "step": 9990 }, { "epoch": 1.07, "learning_rate": 3.979967562473406e-06, "loss": 6.0758, "step": 10000 }, { "epoch": 1.07, "learning_rate": 3.961471899984822e-06, "loss": 6.0762, "step": 10010 }, { "epoch": 1.08, "learning_rate": 3.946698733169213e-06, "loss": 6.0438, "step": 10020 }, { "epoch": 1.08, "learning_rate": 3.928261561990697e-06, "loss": 6.0422, "step": 10030 }, { "epoch": 1.08, "learning_rate": 3.909857024782542e-06, "loss": 6.0523, "step": 10040 }, { "epoch": 1.08, "learning_rate": 3.891485220464221e-06, "loss": 6.0848, "step": 10050 }, { "epoch": 1.08, "learning_rate": 3.873146247779259e-06, "loss": 6.077, "step": 10060 }, { "epoch": 1.08, "learning_rate": 3.854840205294741e-06, "loss": 6.073, "step": 10070 }, { "epoch": 1.08, "learning_rate": 3.83656719140074e-06, "loss": 6.0477, "step": 10080 }, { "epoch": 1.08, "learning_rate": 3.818327304309827e-06, "loss": 6.0641, "step": 10090 }, { "epoch": 1.08, "learning_rate": 3.8001206420565093e-06, "loss": 6.0656, "step": 10100 }, { "epoch": 1.09, "learning_rate": 3.781947302496737e-06, "loss": 6.0543, "step": 10110 }, { "epoch": 1.09, "learning_rate": 3.763807383307341e-06, "loss": 6.0824, "step": 10120 }, { "epoch": 1.09, "learning_rate": 3.7457009819855428e-06, "loss": 6.016, "step": 10130 }, { "epoch": 1.09, "learning_rate": 3.7276281958484018e-06, "loss": 6.0512, "step": 10140 }, { "epoch": 1.09, "learning_rate": 3.7095891220323166e-06, "loss": 6.0512, "step": 10150 }, { "epoch": 1.09, "learning_rate": 3.691583857492478e-06, "loss": 6.0605, "step": 10160 }, { "epoch": 1.09, "learning_rate": 3.673612499002374e-06, "loss": 6.0547, "step": 10170 }, { "epoch": 1.09, "learning_rate": 3.6556751431532445e-06, "loss": 6.0281, "step": 10180 }, { "epoch": 1.09, "learning_rate": 3.637771886353587e-06, "loss": 6.0656, "step": 10190 }, { "epoch": 1.1, "learning_rate": 3.6199028248286116e-06, "loss": 6.0617, "step": 10200 }, { "epoch": 1.1, "learning_rate": 3.602068054619754e-06, "loss": 6.048, "step": 10210 }, { "epoch": 1.1, "learning_rate": 3.5842676715841252e-06, "loss": 6.0641, "step": 10220 }, { "epoch": 1.1, "learning_rate": 3.566501771394032e-06, "loss": 6.0449, "step": 10230 }, { "epoch": 1.1, "learning_rate": 3.5487704495364294e-06, "loss": 6.0367, "step": 10240 }, { "epoch": 1.1, "learning_rate": 3.531073801312438e-06, "loss": 6.0535, "step": 10250 }, { "epoch": 1.1, "learning_rate": 3.5134119218368034e-06, "loss": 6.0859, "step": 10260 }, { "epoch": 1.1, "learning_rate": 3.495784906037406e-06, "loss": 6.0684, "step": 10270 }, { "epoch": 1.1, "learning_rate": 3.4781928486547458e-06, "loss": 6.0687, "step": 10280 }, { "epoch": 1.1, "learning_rate": 3.4606358442414213e-06, "loss": 6.0574, "step": 10290 }, { "epoch": 1.11, "learning_rate": 3.4431139871616403e-06, "loss": 6.0543, "step": 10300 }, { "epoch": 1.11, "learning_rate": 3.425627371590695e-06, "loss": 6.0414, "step": 10310 }, { "epoch": 1.11, "learning_rate": 3.408176091514469e-06, "loss": 6.0793, "step": 10320 }, { "epoch": 1.11, "learning_rate": 3.39076024072893e-06, "loss": 6.0566, "step": 10330 }, { "epoch": 1.11, "learning_rate": 3.3733799128396106e-06, "loss": 6.0461, "step": 10340 }, { "epoch": 1.11, "learning_rate": 3.356035201261133e-06, "loss": 6.05, "step": 10350 }, { "epoch": 1.11, "learning_rate": 3.3387261992166776e-06, "loss": 6.0539, "step": 10360 }, { "epoch": 1.11, "learning_rate": 3.3214529997375067e-06, "loss": 6.048, "step": 10370 }, { "epoch": 1.11, "learning_rate": 3.3042156956624415e-06, "loss": 6.0641, "step": 10380 }, { "epoch": 1.12, "learning_rate": 3.28701437963739e-06, "loss": 6.0387, "step": 10390 }, { "epoch": 1.12, "learning_rate": 3.2698491441148183e-06, "loss": 6.052, "step": 10400 }, { "epoch": 1.12, "learning_rate": 3.2527200813532823e-06, "loss": 6.0738, "step": 10410 }, { "epoch": 1.12, "learning_rate": 3.2356272834169087e-06, "loss": 6.0867, "step": 10420 }, { "epoch": 1.12, "learning_rate": 3.2185708421749207e-06, "loss": 6.0539, "step": 10430 }, { "epoch": 1.12, "learning_rate": 3.2015508493011226e-06, "loss": 6.0617, "step": 10440 }, { "epoch": 1.12, "learning_rate": 3.1845673962734314e-06, "loss": 6.0508, "step": 10450 }, { "epoch": 1.12, "learning_rate": 3.167620574373359e-06, "loss": 6.0344, "step": 10460 }, { "epoch": 1.12, "learning_rate": 3.1507104746855464e-06, "loss": 6.048, "step": 10470 }, { "epoch": 1.13, "learning_rate": 3.1338371880972506e-06, "loss": 6.0645, "step": 10480 }, { "epoch": 1.13, "learning_rate": 3.1170008052978827e-06, "loss": 6.0695, "step": 10490 }, { "epoch": 1.13, "learning_rate": 3.100201416778491e-06, "loss": 6.0512, "step": 10500 }, { "epoch": 1.13, "learning_rate": 3.0834391128313014e-06, "loss": 6.0305, "step": 10510 }, { "epoch": 1.13, "learning_rate": 3.06671398354921e-06, "loss": 6.0707, "step": 10520 }, { "epoch": 1.13, "learning_rate": 3.050026118825319e-06, "loss": 6.0586, "step": 10530 }, { "epoch": 1.13, "learning_rate": 3.0333756083524335e-06, "loss": 6.0375, "step": 10540 }, { "epoch": 1.13, "learning_rate": 3.016762541622599e-06, "loss": 6.0598, "step": 10550 }, { "epoch": 1.13, "learning_rate": 3.0001870079266016e-06, "loss": 6.048, "step": 10560 }, { "epoch": 1.13, "learning_rate": 2.9836490963535083e-06, "loss": 6.0422, "step": 10570 }, { "epoch": 1.14, "learning_rate": 2.9671488957901652e-06, "loss": 6.0711, "step": 10580 }, { "epoch": 1.14, "learning_rate": 2.9506864949207425e-06, "loss": 6.0664, "step": 10590 }, { "epoch": 1.14, "learning_rate": 2.9342619822262374e-06, "loss": 6.0543, "step": 10600 }, { "epoch": 1.14, "learning_rate": 2.9178754459840196e-06, "loss": 6.0805, "step": 10610 }, { "epoch": 1.14, "learning_rate": 2.9015269742673324e-06, "loss": 6.0648, "step": 10620 }, { "epoch": 1.14, "learning_rate": 2.8852166549448458e-06, "loss": 6.077, "step": 10630 }, { "epoch": 1.14, "learning_rate": 2.8689445756801583e-06, "loss": 6.0559, "step": 10640 }, { "epoch": 1.14, "learning_rate": 2.8527108239313506e-06, "loss": 6.052, "step": 10650 }, { "epoch": 1.14, "learning_rate": 2.8365154869504895e-06, "loss": 6.0586, "step": 10660 }, { "epoch": 1.15, "learning_rate": 2.820358651783186e-06, "loss": 6.0676, "step": 10670 }, { "epoch": 1.15, "learning_rate": 2.804240405268108e-06, "loss": 6.0453, "step": 10680 }, { "epoch": 1.15, "learning_rate": 2.7881608340365176e-06, "loss": 6.0551, "step": 10690 }, { "epoch": 1.15, "learning_rate": 2.7721200245118128e-06, "loss": 6.0504, "step": 10700 }, { "epoch": 1.15, "learning_rate": 2.7561180629090513e-06, "loss": 6.0453, "step": 10710 }, { "epoch": 1.15, "learning_rate": 2.740155035234503e-06, "loss": 6.0605, "step": 10720 }, { "epoch": 1.15, "learning_rate": 2.7242310272851656e-06, "loss": 6.0258, "step": 10730 }, { "epoch": 1.15, "learning_rate": 2.7083461246483313e-06, "loss": 6.0469, "step": 10740 }, { "epoch": 1.15, "learning_rate": 2.692500412701096e-06, "loss": 6.0734, "step": 10750 }, { "epoch": 1.16, "learning_rate": 2.6766939766099297e-06, "loss": 6.0605, "step": 10760 }, { "epoch": 1.16, "learning_rate": 2.660926901330194e-06, "loss": 6.0586, "step": 10770 }, { "epoch": 1.16, "learning_rate": 2.6451992716057096e-06, "loss": 6.05, "step": 10780 }, { "epoch": 1.16, "learning_rate": 2.629511171968271e-06, "loss": 6.0668, "step": 10790 }, { "epoch": 1.16, "learning_rate": 2.6138626867372274e-06, "loss": 6.0887, "step": 10800 }, { "epoch": 1.16, "learning_rate": 2.5982539000189945e-06, "loss": 6.0582, "step": 10810 }, { "epoch": 1.16, "learning_rate": 2.582684895706632e-06, "loss": 6.0406, "step": 10820 }, { "epoch": 1.16, "learning_rate": 2.5671557574793703e-06, "loss": 6.057, "step": 10830 }, { "epoch": 1.16, "learning_rate": 2.5516665688021804e-06, "loss": 6.0586, "step": 10840 }, { "epoch": 1.16, "learning_rate": 2.5362174129253014e-06, "loss": 6.0723, "step": 10850 }, { "epoch": 1.17, "learning_rate": 2.520808372883823e-06, "loss": 6.0563, "step": 10860 }, { "epoch": 1.17, "learning_rate": 2.5054395314972068e-06, "loss": 6.066, "step": 10870 }, { "epoch": 1.17, "learning_rate": 2.4901109713688686e-06, "loss": 6.0977, "step": 10880 }, { "epoch": 1.17, "learning_rate": 2.4748227748857235e-06, "loss": 6.0535, "step": 10890 }, { "epoch": 1.17, "learning_rate": 2.459575024217733e-06, "loss": 6.0781, "step": 10900 }, { "epoch": 1.17, "learning_rate": 2.4443678013174843e-06, "loss": 6.0625, "step": 10910 }, { "epoch": 1.17, "learning_rate": 2.4292011879197284e-06, "loss": 6.0766, "step": 10920 }, { "epoch": 1.17, "learning_rate": 2.4140752655409625e-06, "loss": 6.0574, "step": 10930 }, { "epoch": 1.17, "learning_rate": 2.3989901154789684e-06, "loss": 6.0797, "step": 10940 }, { "epoch": 1.18, "learning_rate": 2.3839458188124e-06, "loss": 6.0488, "step": 10950 }, { "epoch": 1.18, "learning_rate": 2.3689424564003206e-06, "loss": 6.0523, "step": 10960 }, { "epoch": 1.18, "learning_rate": 2.353980108881799e-06, "loss": 6.0395, "step": 10970 }, { "epoch": 1.18, "learning_rate": 2.3390588566754457e-06, "loss": 6.0938, "step": 10980 }, { "epoch": 1.18, "learning_rate": 2.324178779979005e-06, "loss": 6.0621, "step": 10990 }, { "epoch": 1.18, "learning_rate": 2.309339958768906e-06, "loss": 6.0187, "step": 11000 }, { "epoch": 1.18, "learning_rate": 2.2945424727998487e-06, "loss": 6.0496, "step": 11010 }, { "epoch": 1.18, "learning_rate": 2.279786401604359e-06, "loss": 6.0656, "step": 11020 }, { "epoch": 1.18, "learning_rate": 2.2680114165887057e-06, "loss": 6.0383, "step": 11030 }, { "epoch": 1.19, "learning_rate": 2.2533300916968257e-06, "loss": 6.0406, "step": 11040 }, { "epoch": 1.19, "learning_rate": 2.238690403084045e-06, "loss": 6.0652, "step": 11050 }, { "epoch": 1.19, "learning_rate": 2.2240924294347697e-06, "loss": 6.0559, "step": 11060 }, { "epoch": 1.19, "learning_rate": 2.2095362492092085e-06, "loss": 6.043, "step": 11070 }, { "epoch": 1.19, "learning_rate": 2.195021940642934e-06, "loss": 6.0832, "step": 11080 }, { "epoch": 1.19, "learning_rate": 2.1805495817464773e-06, "loss": 6.052, "step": 11090 }, { "epoch": 1.19, "learning_rate": 2.1661192503048913e-06, "loss": 6.0715, "step": 11100 }, { "epoch": 1.19, "learning_rate": 2.151731023877356e-06, "loss": 6.057, "step": 11110 }, { "epoch": 1.19, "learning_rate": 2.1373849797967326e-06, "loss": 6.0594, "step": 11120 }, { "epoch": 1.19, "learning_rate": 2.123081195169179e-06, "loss": 6.0684, "step": 11130 }, { "epoch": 1.2, "learning_rate": 2.108819746873706e-06, "loss": 6.0496, "step": 11140 }, { "epoch": 1.2, "learning_rate": 2.0946007115617895e-06, "loss": 6.0328, "step": 11150 }, { "epoch": 1.2, "learning_rate": 2.0804241656569366e-06, "loss": 6.0742, "step": 11160 }, { "epoch": 1.2, "learning_rate": 2.0662901853542973e-06, "loss": 6.0895, "step": 11170 }, { "epoch": 1.2, "learning_rate": 2.052198846620228e-06, "loss": 6.0766, "step": 11180 }, { "epoch": 1.2, "learning_rate": 2.0381502251919127e-06, "loss": 6.0469, "step": 11190 }, { "epoch": 1.2, "learning_rate": 2.0241443965769293e-06, "loss": 6.0355, "step": 11200 }, { "epoch": 1.2, "learning_rate": 2.010181436052866e-06, "loss": 6.0438, "step": 11210 }, { "epoch": 1.2, "learning_rate": 1.996261418666896e-06, "loss": 6.0629, "step": 11220 }, { "epoch": 1.21, "learning_rate": 1.9823844192353936e-06, "loss": 6.0422, "step": 11230 }, { "epoch": 1.21, "learning_rate": 1.9685505123435224e-06, "loss": 6.0852, "step": 11240 }, { "epoch": 1.21, "learning_rate": 1.9547597723448264e-06, "loss": 6.0723, "step": 11250 }, { "epoch": 1.21, "learning_rate": 1.9410122733608505e-06, "loss": 6.0531, "step": 11260 }, { "epoch": 1.21, "learning_rate": 1.9273080892807205e-06, "loss": 6.0398, "step": 11270 }, { "epoch": 1.21, "learning_rate": 1.9136472937607666e-06, "loss": 6.0648, "step": 11280 }, { "epoch": 1.21, "learning_rate": 1.9000299602241047e-06, "loss": 6.0777, "step": 11290 }, { "epoch": 1.21, "learning_rate": 1.8864561618602672e-06, "loss": 6.0738, "step": 11300 }, { "epoch": 1.21, "learning_rate": 1.872925971624785e-06, "loss": 6.0734, "step": 11310 }, { "epoch": 1.22, "learning_rate": 1.859439462238818e-06, "loss": 6.0305, "step": 11320 }, { "epoch": 1.22, "learning_rate": 1.8459967061887406e-06, "loss": 6.0574, "step": 11330 }, { "epoch": 1.22, "learning_rate": 1.8325977757257784e-06, "loss": 6.0664, "step": 11340 }, { "epoch": 1.22, "learning_rate": 1.8192427428655945e-06, "loss": 6.0687, "step": 11350 }, { "epoch": 1.22, "learning_rate": 1.8059316793879233e-06, "loss": 6.0414, "step": 11360 }, { "epoch": 1.22, "learning_rate": 1.792664656836166e-06, "loss": 6.0719, "step": 11370 }, { "epoch": 1.22, "learning_rate": 1.7794417465170233e-06, "loss": 6.0297, "step": 11380 }, { "epoch": 1.22, "learning_rate": 1.7662630195001051e-06, "loss": 6.0582, "step": 11390 }, { "epoch": 1.22, "learning_rate": 1.7531285466175408e-06, "loss": 6.0719, "step": 11400 }, { "epoch": 1.23, "learning_rate": 1.7400383984636127e-06, "loss": 6.052, "step": 11410 }, { "epoch": 1.23, "learning_rate": 1.7269926453943619e-06, "loss": 6.0406, "step": 11420 }, { "epoch": 1.23, "learning_rate": 1.7139913575272282e-06, "loss": 6.091, "step": 11430 }, { "epoch": 1.23, "learning_rate": 1.7010346047406522e-06, "loss": 6.0695, "step": 11440 }, { "epoch": 1.23, "learning_rate": 1.6881224566737187e-06, "loss": 6.0621, "step": 11450 }, { "epoch": 1.23, "learning_rate": 1.6752549827257669e-06, "loss": 6.048, "step": 11460 }, { "epoch": 1.23, "learning_rate": 1.6624322520560321e-06, "loss": 6.0758, "step": 11470 }, { "epoch": 1.23, "learning_rate": 1.6496543335832583e-06, "loss": 6.0355, "step": 11480 }, { "epoch": 1.23, "learning_rate": 1.6369212959853441e-06, "loss": 6.0785, "step": 11490 }, { "epoch": 1.23, "learning_rate": 1.6242332076989586e-06, "loss": 6.0641, "step": 11500 }, { "epoch": 1.24, "learning_rate": 1.6115901369191855e-06, "loss": 6.0555, "step": 11510 }, { "epoch": 1.24, "learning_rate": 1.598992151599147e-06, "loss": 6.0336, "step": 11520 }, { "epoch": 1.24, "learning_rate": 1.5864393194496474e-06, "loss": 6.0848, "step": 11530 }, { "epoch": 1.24, "learning_rate": 1.5739317079387994e-06, "loss": 6.0563, "step": 11540 }, { "epoch": 1.24, "learning_rate": 1.561469384291674e-06, "loss": 6.0695, "step": 11550 }, { "epoch": 1.24, "learning_rate": 1.5490524154899234e-06, "loss": 6.0297, "step": 11560 }, { "epoch": 1.24, "learning_rate": 1.5366808682714396e-06, "loss": 6.0676, "step": 11570 }, { "epoch": 1.24, "learning_rate": 1.5243548091299753e-06, "loss": 6.0496, "step": 11580 }, { "epoch": 1.24, "learning_rate": 1.5120743043148066e-06, "loss": 6.0711, "step": 11590 }, { "epoch": 1.25, "learning_rate": 1.4998394198303589e-06, "loss": 6.1078, "step": 11600 }, { "epoch": 1.25, "learning_rate": 1.4876502214358678e-06, "loss": 6.0664, "step": 11610 }, { "epoch": 1.25, "learning_rate": 1.4755067746450113e-06, "loss": 6.0578, "step": 11620 }, { "epoch": 1.25, "learning_rate": 1.4634091447255705e-06, "loss": 6.0715, "step": 11630 }, { "epoch": 1.25, "learning_rate": 1.4513573966990735e-06, "loss": 6.0664, "step": 11640 }, { "epoch": 1.25, "learning_rate": 1.439351595340437e-06, "loss": 6.0563, "step": 11650 }, { "epoch": 1.25, "learning_rate": 1.4273918051776392e-06, "loss": 6.0371, "step": 11660 }, { "epoch": 1.25, "learning_rate": 1.415478090491348e-06, "loss": 6.0574, "step": 11670 }, { "epoch": 1.25, "learning_rate": 1.4036105153145996e-06, "loss": 6.084, "step": 11680 }, { "epoch": 1.26, "learning_rate": 1.3917891434324305e-06, "loss": 6.0582, "step": 11690 }, { "epoch": 1.26, "learning_rate": 1.3800140383815585e-06, "loss": 6.0516, "step": 11700 }, { "epoch": 1.26, "learning_rate": 1.368285263450021e-06, "loss": 6.034, "step": 11710 }, { "epoch": 1.26, "learning_rate": 1.3566028816768494e-06, "loss": 6.0633, "step": 11720 }, { "epoch": 1.26, "learning_rate": 1.3449669558517187e-06, "loss": 6.0625, "step": 11730 }, { "epoch": 1.26, "learning_rate": 1.3333775485146217e-06, "loss": 6.034, "step": 11740 }, { "epoch": 1.26, "learning_rate": 1.3218347219555195e-06, "loss": 6.0598, "step": 11750 }, { "epoch": 1.26, "learning_rate": 1.3103385382140222e-06, "loss": 6.0699, "step": 11760 }, { "epoch": 1.26, "learning_rate": 1.2988890590790394e-06, "loss": 6.0492, "step": 11770 }, { "epoch": 1.26, "learning_rate": 1.2874863460884635e-06, "loss": 6.048, "step": 11780 }, { "epoch": 1.27, "learning_rate": 1.2761304605288216e-06, "loss": 6.0598, "step": 11790 }, { "epoch": 1.27, "learning_rate": 1.2648214634349688e-06, "loss": 6.073, "step": 11800 }, { "epoch": 1.27, "learning_rate": 1.2535594155897346e-06, "loss": 6.0625, "step": 11810 }, { "epoch": 1.27, "learning_rate": 1.2423443775236177e-06, "loss": 6.0449, "step": 11820 }, { "epoch": 1.27, "learning_rate": 1.231176409514445e-06, "loss": 6.0402, "step": 11830 }, { "epoch": 1.27, "learning_rate": 1.2200555715870631e-06, "loss": 6.0574, "step": 11840 }, { "epoch": 1.27, "learning_rate": 1.2089819235129964e-06, "loss": 6.084, "step": 11850 }, { "epoch": 1.27, "learning_rate": 1.197955524810146e-06, "loss": 6.0508, "step": 11860 }, { "epoch": 1.27, "learning_rate": 1.186976434742454e-06, "loss": 6.0547, "step": 11870 }, { "epoch": 1.28, "learning_rate": 1.176044712319595e-06, "loss": 6.0789, "step": 11880 }, { "epoch": 1.28, "learning_rate": 1.1651604162966511e-06, "loss": 6.0516, "step": 11890 }, { "epoch": 1.28, "learning_rate": 1.154323605173806e-06, "loss": 6.0617, "step": 11900 }, { "epoch": 1.28, "learning_rate": 1.1435343371960183e-06, "loss": 6.0625, "step": 11910 }, { "epoch": 1.28, "learning_rate": 1.1327926703527203e-06, "loss": 6.0047, "step": 11920 }, { "epoch": 1.28, "learning_rate": 1.1220986623774966e-06, "loss": 6.0684, "step": 11930 }, { "epoch": 1.28, "learning_rate": 1.1114523707477809e-06, "loss": 6.0711, "step": 11940 }, { "epoch": 1.28, "learning_rate": 1.1008538526845468e-06, "loss": 6.0625, "step": 11950 }, { "epoch": 1.28, "learning_rate": 1.0903031651519902e-06, "loss": 6.0687, "step": 11960 }, { "epoch": 1.29, "learning_rate": 1.0798003648572387e-06, "loss": 6.0473, "step": 11970 }, { "epoch": 1.29, "learning_rate": 1.0693455082500303e-06, "loss": 6.0625, "step": 11980 }, { "epoch": 1.29, "learning_rate": 1.0589386515224286e-06, "loss": 6.052, "step": 11990 }, { "epoch": 1.29, "learning_rate": 1.048579850608502e-06, "loss": 6.0332, "step": 12000 }, { "epoch": 1.29, "learning_rate": 1.0382691611840367e-06, "loss": 6.059, "step": 12010 }, { "epoch": 1.29, "learning_rate": 1.0280066386662303e-06, "loss": 6.0367, "step": 12020 }, { "epoch": 1.29, "learning_rate": 1.0198313378998736e-06, "loss": 6.0578, "step": 12030 }, { "epoch": 1.29, "learning_rate": 1.009655654638846e-06, "loss": 6.0637, "step": 12040 }, { "epoch": 1.29, "learning_rate": 9.995282920744242e-07, "loss": 6.0613, "step": 12050 }, { "epoch": 1.29, "learning_rate": 9.894493046384711e-07, "loss": 6.0344, "step": 12060 }, { "epoch": 1.3, "learning_rate": 9.794187465028527e-07, "loss": 6.0652, "step": 12070 }, { "epoch": 1.3, "learning_rate": 9.694366715791327e-07, "loss": 6.0383, "step": 12080 }, { "epoch": 1.3, "learning_rate": 9.595031335182968e-07, "loss": 6.0664, "step": 12090 }, { "epoch": 1.3, "learning_rate": 9.496181857104536e-07, "loss": 6.0648, "step": 12100 }, { "epoch": 1.3, "learning_rate": 9.397818812845571e-07, "loss": 6.0461, "step": 12110 }, { "epoch": 1.3, "learning_rate": 9.299942731081091e-07, "loss": 6.0785, "step": 12120 }, { "epoch": 1.3, "learning_rate": 9.202554137868913e-07, "loss": 6.0707, "step": 12130 }, { "epoch": 1.3, "learning_rate": 9.105653556646621e-07, "loss": 6.0617, "step": 12140 }, { "epoch": 1.3, "learning_rate": 9.009241508228972e-07, "loss": 6.0738, "step": 12150 }, { "epoch": 1.31, "learning_rate": 8.913318510804914e-07, "loss": 6.0594, "step": 12160 }, { "epoch": 1.31, "learning_rate": 8.81788507993494e-07, "loss": 6.0766, "step": 12170 }, { "epoch": 1.31, "learning_rate": 8.722941728548173e-07, "loss": 6.0461, "step": 12180 }, { "epoch": 1.31, "learning_rate": 8.628488966939785e-07, "loss": 6.0582, "step": 12190 }, { "epoch": 1.31, "learning_rate": 8.534527302768058e-07, "loss": 6.0766, "step": 12200 }, { "epoch": 1.31, "learning_rate": 8.441057241051842e-07, "loss": 6.0531, "step": 12210 }, { "epoch": 1.31, "learning_rate": 8.348079284167743e-07, "loss": 6.0723, "step": 12220 }, { "epoch": 1.31, "learning_rate": 8.255593931847372e-07, "loss": 6.0355, "step": 12230 }, { "epoch": 1.31, "learning_rate": 8.163601681174793e-07, "loss": 6.0605, "step": 12240 }, { "epoch": 1.32, "learning_rate": 8.072103026583722e-07, "loss": 6.0363, "step": 12250 }, { "epoch": 1.32, "learning_rate": 7.981098459855008e-07, "loss": 6.0711, "step": 12260 }, { "epoch": 1.32, "learning_rate": 7.890588470113802e-07, "loss": 6.0754, "step": 12270 }, { "epoch": 1.32, "learning_rate": 7.800573543827139e-07, "loss": 6.0641, "step": 12280 }, { "epoch": 1.32, "learning_rate": 7.711054164801147e-07, "loss": 6.0539, "step": 12290 }, { "epoch": 1.32, "learning_rate": 7.622030814178582e-07, "loss": 6.0367, "step": 12300 }, { "epoch": 1.32, "learning_rate": 7.533503970436096e-07, "loss": 6.0805, "step": 12310 }, { "epoch": 1.32, "learning_rate": 7.445474109381856e-07, "loss": 6.0582, "step": 12320 }, { "epoch": 1.32, "learning_rate": 7.357941704152771e-07, "loss": 6.0387, "step": 12330 }, { "epoch": 1.32, "learning_rate": 7.270907225212154e-07, "loss": 6.0566, "step": 12340 }, { "epoch": 1.33, "learning_rate": 7.184371140347024e-07, "loss": 6.0488, "step": 12350 }, { "epoch": 1.33, "learning_rate": 7.098333914665723e-07, "loss": 6.0426, "step": 12360 }, { "epoch": 1.33, "learning_rate": 7.012796010595302e-07, "loss": 6.0699, "step": 12370 }, { "epoch": 1.33, "learning_rate": 6.927757887879139e-07, "loss": 6.0508, "step": 12380 }, { "epoch": 1.33, "learning_rate": 6.84322000357438e-07, "loss": 6.0426, "step": 12390 }, { "epoch": 1.33, "learning_rate": 6.75918281204958e-07, "loss": 6.0449, "step": 12400 }, { "epoch": 1.33, "learning_rate": 6.675646764982147e-07, "loss": 6.0672, "step": 12410 }, { "epoch": 1.33, "learning_rate": 6.592612311355994e-07, "loss": 6.0793, "step": 12420 }, { "epoch": 1.33, "learning_rate": 6.510079897459109e-07, "loss": 6.0563, "step": 12430 }, { "epoch": 1.34, "learning_rate": 6.428049966881156e-07, "loss": 6.0484, "step": 12440 }, { "epoch": 1.34, "learning_rate": 6.346522960511048e-07, "loss": 6.0473, "step": 12450 }, { "epoch": 1.34, "learning_rate": 6.265499316534651e-07, "loss": 6.0539, "step": 12460 }, { "epoch": 1.34, "learning_rate": 6.184979470432407e-07, "loss": 6.0727, "step": 12470 }, { "epoch": 1.34, "learning_rate": 6.104963854976931e-07, "loss": 6.091, "step": 12480 }, { "epoch": 1.34, "learning_rate": 6.025452900230777e-07, "loss": 6.0512, "step": 12490 }, { "epoch": 1.34, "learning_rate": 5.946447033544034e-07, "loss": 6.0816, "step": 12500 }, { "epoch": 1.34, "learning_rate": 5.867946679552138e-07, "loss": 6.0738, "step": 12510 }, { "epoch": 1.34, "learning_rate": 5.789952260173481e-07, "loss": 6.0605, "step": 12520 }, { "epoch": 1.35, "learning_rate": 5.712464194607214e-07, "loss": 6.0344, "step": 12530 }, { "epoch": 1.35, "learning_rate": 5.635482899330968e-07, "loss": 6.048, "step": 12540 }, { "epoch": 1.35, "learning_rate": 5.559008788098619e-07, "loss": 6.0762, "step": 12550 }, { "epoch": 1.35, "learning_rate": 5.483042271938055e-07, "loss": 6.0738, "step": 12560 }, { "epoch": 1.35, "learning_rate": 5.407583759149005e-07, "loss": 6.0574, "step": 12570 }, { "epoch": 1.35, "learning_rate": 5.332633655300767e-07, "loss": 6.0637, "step": 12580 }, { "epoch": 1.35, "learning_rate": 5.258192363230141e-07, "loss": 6.0281, "step": 12590 }, { "epoch": 1.35, "learning_rate": 5.184260283039133e-07, "loss": 6.0754, "step": 12600 }, { "epoch": 1.35, "learning_rate": 5.110837812092906e-07, "loss": 6.0586, "step": 12610 }, { "epoch": 1.35, "learning_rate": 5.037925345017635e-07, "loss": 6.0395, "step": 12620 }, { "epoch": 1.36, "learning_rate": 4.965523273698292e-07, "loss": 6.0422, "step": 12630 }, { "epoch": 1.36, "learning_rate": 4.893631987276682e-07, "loss": 6.0191, "step": 12640 }, { "epoch": 1.36, "learning_rate": 4.822251872149219e-07, "loss": 6.0445, "step": 12650 }, { "epoch": 1.36, "learning_rate": 4.7513833119649633e-07, "loss": 6.075, "step": 12660 }, { "epoch": 1.36, "learning_rate": 4.6810266876234247e-07, "loss": 6.0559, "step": 12670 }, { "epoch": 1.36, "learning_rate": 4.611182377272705e-07, "loss": 6.034, "step": 12680 }, { "epoch": 1.36, "learning_rate": 4.541850756307231e-07, "loss": 6.0461, "step": 12690 }, { "epoch": 1.36, "learning_rate": 4.4730321973659787e-07, "loss": 6.0617, "step": 12700 }, { "epoch": 1.36, "learning_rate": 4.4047270703302613e-07, "loss": 6.0371, "step": 12710 }, { "epoch": 1.37, "learning_rate": 4.3369357423219016e-07, "loss": 6.0852, "step": 12720 }, { "epoch": 1.37, "learning_rate": 4.26965857770113e-07, "loss": 6.0535, "step": 12730 }, { "epoch": 1.37, "learning_rate": 4.2028959380647327e-07, "loss": 6.032, "step": 12740 }, { "epoch": 1.37, "learning_rate": 4.1366481822440186e-07, "loss": 6.0559, "step": 12750 }, { "epoch": 1.37, "learning_rate": 4.070915666302999e-07, "loss": 6.0488, "step": 12760 }, { "epoch": 1.37, "learning_rate": 4.0056987435363346e-07, "loss": 6.0687, "step": 12770 }, { "epoch": 1.37, "learning_rate": 3.9409977644675577e-07, "loss": 6.0582, "step": 12780 }, { "epoch": 1.37, "learning_rate": 3.87681307684713e-07, "loss": 6.0512, "step": 12790 }, { "epoch": 1.37, "learning_rate": 3.8131450256505773e-07, "loss": 6.0551, "step": 12800 }, { "epoch": 1.38, "learning_rate": 3.749993953076647e-07, "loss": 6.0609, "step": 12810 }, { "epoch": 1.38, "learning_rate": 3.6873601985454863e-07, "loss": 6.0809, "step": 12820 }, { "epoch": 1.38, "learning_rate": 3.625244098696734e-07, "loss": 6.0723, "step": 12830 }, { "epoch": 1.38, "learning_rate": 3.563645987387865e-07, "loss": 6.0578, "step": 12840 }, { "epoch": 1.38, "learning_rate": 3.502566195692214e-07, "loss": 6.0758, "step": 12850 }, { "epoch": 1.38, "learning_rate": 3.442005051897357e-07, "loss": 6.0406, "step": 12860 }, { "epoch": 1.38, "learning_rate": 3.38196288150322e-07, "loss": 6.0484, "step": 12870 }, { "epoch": 1.38, "learning_rate": 3.322440007220429e-07, "loss": 6.084, "step": 12880 }, { "epoch": 1.38, "learning_rate": 3.263436748968507e-07, "loss": 6.0461, "step": 12890 }, { "epoch": 1.39, "learning_rate": 3.204953423874202e-07, "loss": 6.0746, "step": 12900 }, { "epoch": 1.39, "learning_rate": 3.146990346269729e-07, "loss": 6.0555, "step": 12910 }, { "epoch": 1.39, "learning_rate": 3.089547827691142e-07, "loss": 6.0582, "step": 12920 }, { "epoch": 1.39, "learning_rate": 3.0326261768766073e-07, "loss": 6.0473, "step": 12930 }, { "epoch": 1.39, "learning_rate": 2.97622569976479e-07, "loss": 6.0652, "step": 12940 }, { "epoch": 1.39, "learning_rate": 2.920346699493137e-07, "loss": 6.0508, "step": 12950 }, { "epoch": 1.39, "learning_rate": 2.864989476396385e-07, "loss": 6.066, "step": 12960 }, { "epoch": 1.39, "learning_rate": 2.810154328004755e-07, "loss": 6.0574, "step": 12970 }, { "epoch": 1.39, "learning_rate": 2.7558415490425375e-07, "loss": 6.0547, "step": 12980 }, { "epoch": 1.39, "learning_rate": 2.702051431426367e-07, "loss": 6.0547, "step": 12990 }, { "epoch": 1.4, "learning_rate": 2.6487842642637704e-07, "loss": 6.0609, "step": 13000 }, { "epoch": 1.4, "learning_rate": 2.5960403338515016e-07, "loss": 6.0945, "step": 13010 }, { "epoch": 1.4, "learning_rate": 2.5438199236740955e-07, "loss": 6.0516, "step": 13020 }, { "epoch": 1.4, "learning_rate": 2.502420718806597e-07, "loss": 6.0559, "step": 13030 }, { "epoch": 1.4, "learning_rate": 2.45114335045038e-07, "loss": 6.0613, "step": 13040 }, { "epoch": 1.4, "learning_rate": 2.4003902811115644e-07, "loss": 6.057, "step": 13050 }, { "epoch": 1.4, "learning_rate": 2.3501617835743007e-07, "loss": 6.0691, "step": 13060 }, { "epoch": 1.4, "learning_rate": 2.3004581278033734e-07, "loss": 6.0527, "step": 13070 }, { "epoch": 1.4, "learning_rate": 2.2512795809426003e-07, "loss": 6.0363, "step": 13080 }, { "epoch": 1.41, "learning_rate": 2.2026264073135462e-07, "loss": 6.0641, "step": 13090 }, { "epoch": 1.41, "learning_rate": 2.15449886841399e-07, "loss": 6.0711, "step": 13100 }, { "epoch": 1.41, "learning_rate": 2.106897222916593e-07, "loss": 6.0664, "step": 13110 }, { "epoch": 1.41, "learning_rate": 2.0598217266674658e-07, "loss": 6.0656, "step": 13120 }, { "epoch": 1.41, "learning_rate": 2.013272632684815e-07, "loss": 6.0711, "step": 13130 }, { "epoch": 1.41, "learning_rate": 1.9672501911575658e-07, "loss": 6.0543, "step": 13140 }, { "epoch": 1.41, "learning_rate": 1.921754649444041e-07, "loss": 6.0691, "step": 13150 }, { "epoch": 1.41, "learning_rate": 1.876786252070606e-07, "loss": 6.0379, "step": 13160 }, { "epoch": 1.41, "learning_rate": 1.832345240730371e-07, "loss": 6.0695, "step": 13170 }, { "epoch": 1.42, "learning_rate": 1.7884318542818912e-07, "loss": 6.0922, "step": 13180 }, { "epoch": 1.42, "learning_rate": 1.7450463287478792e-07, "loss": 6.0777, "step": 13190 }, { "epoch": 1.42, "learning_rate": 1.7021888973139166e-07, "loss": 6.0594, "step": 13200 }, { "epoch": 1.42, "learning_rate": 1.659859790327245e-07, "loss": 6.0664, "step": 13210 }, { "epoch": 1.42, "learning_rate": 1.6180592352955105e-07, "loss": 6.0535, "step": 13220 }, { "epoch": 1.42, "learning_rate": 1.5767874568854868e-07, "loss": 6.068, "step": 13230 }, { "epoch": 1.42, "learning_rate": 1.5360446769219663e-07, "loss": 6.066, "step": 13240 }, { "epoch": 1.42, "learning_rate": 1.4958311143864922e-07, "loss": 6.0391, "step": 13250 }, { "epoch": 1.42, "learning_rate": 1.456146985416207e-07, "loss": 6.05, "step": 13260 }, { "epoch": 1.42, "learning_rate": 1.4169925033026832e-07, "loss": 6.0711, "step": 13270 }, { "epoch": 1.43, "learning_rate": 1.3783678784908162e-07, "loss": 6.0781, "step": 13280 }, { "epoch": 1.43, "learning_rate": 1.3402733185776006e-07, "loss": 6.0809, "step": 13290 }, { "epoch": 1.43, "learning_rate": 1.3027090283111442e-07, "loss": 6.0605, "step": 13300 }, { "epoch": 1.43, "learning_rate": 1.2656752095894342e-07, "loss": 6.0758, "step": 13310 }, { "epoch": 1.43, "learning_rate": 1.2291720614593493e-07, "loss": 6.059, "step": 13320 }, { "epoch": 1.43, "learning_rate": 1.1931997801155614e-07, "loss": 6.0414, "step": 13330 }, { "epoch": 1.43, "learning_rate": 1.1577585588994466e-07, "loss": 6.0504, "step": 13340 }, { "epoch": 1.43, "learning_rate": 1.1228485882980977e-07, "loss": 6.0492, "step": 13350 }, { "epoch": 1.43, "learning_rate": 1.0884700559432693e-07, "loss": 6.0301, "step": 13360 }, { "epoch": 1.44, "learning_rate": 1.0546231466103785e-07, "loss": 6.048, "step": 13370 }, { "epoch": 1.44, "learning_rate": 1.0213080422175281e-07, "loss": 6.0687, "step": 13380 }, { "epoch": 1.44, "learning_rate": 9.88524921824463e-08, "loss": 6.0645, "step": 13390 }, { "epoch": 1.44, "learning_rate": 9.562739616317152e-08, "loss": 6.0953, "step": 13400 }, { "epoch": 1.44, "learning_rate": 9.2455533497956e-08, "loss": 6.0484, "step": 13410 }, { "epoch": 1.44, "learning_rate": 8.933692123471282e-08, "loss": 6.041, "step": 13420 }, { "epoch": 1.44, "learning_rate": 8.627157613514958e-08, "loss": 6.0555, "step": 13430 }, { "epoch": 1.44, "learning_rate": 8.32595146746773e-08, "loss": 6.0844, "step": 13440 }, { "epoch": 1.44, "learning_rate": 8.030075304231944e-08, "loss": 6.0934, "step": 13450 }, { "epoch": 1.45, "learning_rate": 7.73953071406297e-08, "loss": 6.0422, "step": 13460 }, { "epoch": 1.45, "learning_rate": 7.454319258560105e-08, "loss": 6.0812, "step": 13470 }, { "epoch": 1.45, "learning_rate": 7.174442470658794e-08, "loss": 6.0656, "step": 13480 }, { "epoch": 1.45, "learning_rate": 6.89990185462186e-08, "loss": 6.0656, "step": 13490 }, { "epoch": 1.45, "learning_rate": 6.630698886031738e-08, "loss": 6.0613, "step": 13500 }, { "epoch": 1.45, "learning_rate": 6.366835011782368e-08, "loss": 6.0633, "step": 13510 }, { "epoch": 1.45, "learning_rate": 6.10831165007153e-08, "loss": 6.0691, "step": 13520 }, { "epoch": 1.45, "learning_rate": 5.855130190393188e-08, "loss": 6.0508, "step": 13530 }, { "epoch": 1.45, "learning_rate": 5.6072919935298286e-08, "loss": 6.0883, "step": 13540 }, { "epoch": 1.45, "learning_rate": 5.3647983915456894e-08, "loss": 6.032, "step": 13550 }, { "epoch": 1.46, "learning_rate": 5.127650687778873e-08, "loss": 6.0875, "step": 13560 }, { "epoch": 1.46, "learning_rate": 4.895850156834914e-08, "loss": 6.0285, "step": 13570 }, { "epoch": 1.46, "learning_rate": 4.669398044579776e-08, "loss": 6.0605, "step": 13580 }, { "epoch": 1.46, "learning_rate": 4.448295568132866e-08, "loss": 6.0648, "step": 13590 }, { "epoch": 1.46, "learning_rate": 4.2325439158609204e-08, "loss": 6.0863, "step": 13600 }, { "epoch": 1.46, "learning_rate": 4.0221442473713514e-08, "loss": 6.0613, "step": 13610 }, { "epoch": 1.46, "learning_rate": 3.8170976935062445e-08, "loss": 6.0828, "step": 13620 }, { "epoch": 1.46, "learning_rate": 3.617405356335701e-08, "loss": 6.0883, "step": 13630 }, { "epoch": 1.46, "learning_rate": 3.4230683091529545e-08, "loss": 6.0465, "step": 13640 }, { "epoch": 1.47, "learning_rate": 3.2340875964674833e-08, "loss": 6.0465, "step": 13650 }, { "epoch": 1.47, "learning_rate": 3.050464234000017e-08, "loss": 6.0324, "step": 13660 }, { "epoch": 1.47, "learning_rate": 2.8721992086772065e-08, "loss": 6.0449, "step": 13670 }, { "epoch": 1.47, "learning_rate": 2.6992934786257418e-08, "loss": 6.0422, "step": 13680 }, { "epoch": 1.47, "learning_rate": 2.5317479731677973e-08, "loss": 6.0754, "step": 13690 }, { "epoch": 1.47, "learning_rate": 2.3695635928155935e-08, "loss": 6.084, "step": 13700 }, { "epoch": 1.47, "learning_rate": 2.2127412092668444e-08, "loss": 6.0457, "step": 13710 }, { "epoch": 1.47, "learning_rate": 2.0612816653998723e-08, "loss": 6.0687, "step": 13720 }, { "epoch": 1.47, "learning_rate": 1.9151857752691684e-08, "loss": 6.0527, "step": 13730 }, { "epoch": 1.48, "learning_rate": 1.7744543241012823e-08, "loss": 6.059, "step": 13740 }, { "epoch": 1.48, "learning_rate": 1.639088068289829e-08, "loss": 6.0598, "step": 13750 }, { "epoch": 1.48, "learning_rate": 1.509087735392489e-08, "loss": 6.0668, "step": 13760 }, { "epoch": 1.48, "learning_rate": 1.3844540241261251e-08, "loss": 6.0816, "step": 13770 }, { "epoch": 1.48, "learning_rate": 1.2651876043637822e-08, "loss": 6.0895, "step": 13780 }, { "epoch": 1.48, "learning_rate": 1.1512891171303608e-08, "loss": 6.0895, "step": 13790 }, { "epoch": 1.48, "learning_rate": 1.0427591745999499e-08, "loss": 6.0285, "step": 13800 }, { "epoch": 1.48, "learning_rate": 9.395983600918313e-09, "loss": 6.0953, "step": 13810 }, { "epoch": 1.48, "learning_rate": 8.418072280679257e-09, "loss": 6.0426, "step": 13820 }, { "epoch": 1.48, "learning_rate": 7.493863041292405e-09, "loss": 6.0602, "step": 13830 }, { "epoch": 1.49, "learning_rate": 6.623360850136484e-09, "loss": 6.0742, "step": 13840 }, { "epoch": 1.49, "learning_rate": 5.8065703859278014e-09, "loss": 6.0805, "step": 13850 }, { "epoch": 1.49, "learning_rate": 5.0434960386969154e-09, "loss": 6.0574, "step": 13860 }, { "epoch": 1.49, "learning_rate": 4.334141909764223e-09, "loss": 6.0777, "step": 13870 }, { "epoch": 1.49, "learning_rate": 3.6785118117188544e-09, "loss": 6.05, "step": 13880 }, { "epoch": 1.49, "learning_rate": 3.0766092683953697e-09, "loss": 6.0445, "step": 13890 }, { "epoch": 1.49, "learning_rate": 2.5284375148615368e-09, "loss": 6.0648, "step": 13900 }, { "epoch": 1.49, "learning_rate": 2.033999497391692e-09, "loss": 6.0672, "step": 13910 }, { "epoch": 1.49, "learning_rate": 1.5932978734600757e-09, "loss": 6.0727, "step": 13920 }, { "epoch": 1.5, "learning_rate": 1.2063350117175188e-09, "loss": 6.0816, "step": 13930 }, { "epoch": 1.5, "learning_rate": 8.731129919892223e-10, "loss": 6.066, "step": 13940 }, { "epoch": 1.5, "learning_rate": 5.936336052514424e-10, "loss": 6.0367, "step": 13950 }, { "epoch": 1.5, "learning_rate": 3.6789835363260087e-10, "loss": 6.0438, "step": 13960 }, { "epoch": 1.5, "learning_rate": 1.9590845039885175e-10, "loss": 6.0586, "step": 13970 }, { "epoch": 1.5, "step": 13971, "total_flos": 4.912468616465613e+16, "train_loss": 6.204258486328824, "train_runtime": 12473.8331, "train_samples_per_second": 17.92, "train_steps_per_second": 1.12 } ], "max_steps": 13971, "num_train_epochs": 2, "total_flos": 4.912468616465613e+16, "trial_name": null, "trial_params": null }