diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,8407 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5, + "global_step": 13971, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.8571428571428575e-07, + "loss": 10.6891, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 7.61904761904762e-07, + "loss": 10.6539, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.2380952380952382e-06, + "loss": 10.6617, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.7142857142857145e-06, + "loss": 10.6156, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 2.1904761904761908e-06, + "loss": 10.6289, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2.666666666666667e-06, + "loss": 10.6148, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 3.142857142857143e-06, + "loss": 10.5672, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 3.6190476190476194e-06, + "loss": 10.5367, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.095238095238096e-06, + "loss": 10.5062, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.571428571428572e-06, + "loss": 10.4492, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 5.047619047619048e-06, + "loss": 10.4242, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 5.523809523809525e-06, + "loss": 10.3461, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 6e-06, + "loss": 10.2758, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 6.476190476190477e-06, + "loss": 10.2016, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 6.952380952380952e-06, + "loss": 10.132, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 7.428571428571429e-06, + "loss": 10.0852, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 7.904761904761904e-06, + "loss": 10.0258, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 8.380952380952382e-06, + "loss": 10.0055, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 8.857142857142858e-06, + "loss": 9.9734, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 9.333333333333334e-06, + "loss": 9.9578, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 9.80952380952381e-06, + "loss": 9.9516, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 1.0285714285714285e-05, + "loss": 9.9172, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 1.0761904761904763e-05, + "loss": 9.8992, + "step": 230 + }, + { + "epoch": 0.03, + "learning_rate": 1.1238095238095239e-05, + "loss": 9.8641, + "step": 240 + }, + { + "epoch": 0.03, + "learning_rate": 1.1714285714285716e-05, + "loss": 9.8609, + "step": 250 + }, + { + "epoch": 0.03, + "learning_rate": 1.2190476190476192e-05, + "loss": 9.8406, + "step": 260 + }, + { + "epoch": 0.03, + "learning_rate": 1.2666666666666667e-05, + "loss": 9.7977, + "step": 270 + }, + { + "epoch": 0.03, + "learning_rate": 1.3142857142857145e-05, + "loss": 9.7937, + "step": 280 + }, + { + "epoch": 0.03, + "learning_rate": 1.361904761904762e-05, + "loss": 9.7711, + "step": 290 + }, + { + "epoch": 0.03, + "learning_rate": 1.4095238095238097e-05, + "loss": 9.7352, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 1.4571428571428573e-05, + "loss": 9.7195, + "step": 310 + }, + { + "epoch": 0.03, + "learning_rate": 1.5047619047619049e-05, + "loss": 9.7, + "step": 320 + }, + { + "epoch": 0.04, + "learning_rate": 1.5523809523809525e-05, + "loss": 9.6523, + "step": 330 + }, + { + "epoch": 0.04, + "learning_rate": 1.6000000000000003e-05, + "loss": 9.593, + "step": 340 + }, + { + "epoch": 0.04, + "learning_rate": 1.6476190476190477e-05, + "loss": 9.5703, + "step": 350 + }, + { + "epoch": 0.04, + "learning_rate": 1.6952380952380955e-05, + "loss": 9.5336, + "step": 360 + }, + { + "epoch": 0.04, + "learning_rate": 1.742857142857143e-05, + "loss": 9.5039, + "step": 370 + }, + { + "epoch": 0.04, + "learning_rate": 1.7904761904761907e-05, + "loss": 9.418, + "step": 380 + }, + { + "epoch": 0.04, + "learning_rate": 1.838095238095238e-05, + "loss": 9.3305, + "step": 390 + }, + { + "epoch": 0.04, + "learning_rate": 1.885714285714286e-05, + "loss": 9.3203, + "step": 400 + }, + { + "epoch": 0.04, + "learning_rate": 1.9333333333333333e-05, + "loss": 9.207, + "step": 410 + }, + { + "epoch": 0.05, + "learning_rate": 1.980952380952381e-05, + "loss": 9.1086, + "step": 420 + }, + { + "epoch": 0.05, + "learning_rate": 1.9999990325478594e-05, + "loss": 8.9594, + "step": 430 + }, + { + "epoch": 0.05, + "learning_rate": 1.9999931203471123e-05, + "loss": 8.5867, + "step": 440 + }, + { + "epoch": 0.05, + "learning_rate": 1.9999818334507674e-05, + "loss": 8.1969, + "step": 450 + }, + { + "epoch": 0.05, + "learning_rate": 1.9999651719194886e-05, + "loss": 7.7641, + "step": 460 + }, + { + "epoch": 0.05, + "learning_rate": 1.9999431358428275e-05, + "loss": 7.366, + "step": 470 + }, + { + "epoch": 0.05, + "learning_rate": 1.999915725339222e-05, + "loss": 7.082, + "step": 480 + }, + { + "epoch": 0.05, + "learning_rate": 1.9998829405559963e-05, + "loss": 6.9242, + "step": 490 + }, + { + "epoch": 0.05, + "learning_rate": 1.9998447816693596e-05, + "loss": 6.7367, + "step": 500 + }, + { + "epoch": 0.05, + "learning_rate": 1.999801248884406e-05, + "loss": 6.6734, + "step": 510 + }, + { + "epoch": 0.06, + "learning_rate": 1.9997523424351122e-05, + "loss": 6.5676, + "step": 520 + }, + { + "epoch": 0.06, + "learning_rate": 1.999698062584338e-05, + "loss": 6.4836, + "step": 530 + }, + { + "epoch": 0.06, + "learning_rate": 1.999638409623822e-05, + "loss": 6.4262, + "step": 540 + }, + { + "epoch": 0.06, + "learning_rate": 1.999573383874184e-05, + "loss": 6.4371, + "step": 550 + }, + { + "epoch": 0.06, + "learning_rate": 1.9995029856849192e-05, + "loss": 6.3914, + "step": 560 + }, + { + "epoch": 0.06, + "learning_rate": 1.9994272154343995e-05, + "loss": 6.3473, + "step": 570 + }, + { + "epoch": 0.06, + "learning_rate": 1.9993460735298695e-05, + "loss": 6.3812, + "step": 580 + }, + { + "epoch": 0.06, + "learning_rate": 1.9992595604074457e-05, + "loss": 6.3656, + "step": 590 + }, + { + "epoch": 0.06, + "learning_rate": 1.9991676765321124e-05, + "loss": 6.3387, + "step": 600 + }, + { + "epoch": 0.07, + "learning_rate": 1.999070422397721e-05, + "loss": 6.2977, + "step": 610 + }, + { + "epoch": 0.07, + "learning_rate": 1.998967798526987e-05, + "loss": 6.3191, + "step": 620 + }, + { + "epoch": 0.07, + "learning_rate": 1.9988598054714854e-05, + "loss": 6.2926, + "step": 630 + }, + { + "epoch": 0.07, + "learning_rate": 1.9987464438116506e-05, + "loss": 6.2914, + "step": 640 + }, + { + "epoch": 0.07, + "learning_rate": 1.998627714156771e-05, + "loss": 6.2887, + "step": 650 + }, + { + "epoch": 0.07, + "learning_rate": 1.9985036171449868e-05, + "loss": 6.2414, + "step": 660 + }, + { + "epoch": 0.07, + "learning_rate": 1.998374153443286e-05, + "loss": 6.2742, + "step": 670 + }, + { + "epoch": 0.07, + "learning_rate": 1.998239323747502e-05, + "loss": 6.248, + "step": 680 + }, + { + "epoch": 0.07, + "learning_rate": 1.9980991287823076e-05, + "loss": 6.2629, + "step": 690 + }, + { + "epoch": 0.08, + "learning_rate": 1.997953569301214e-05, + "loss": 6.2496, + "step": 700 + }, + { + "epoch": 0.08, + "learning_rate": 1.9978026460865634e-05, + "loss": 6.2461, + "step": 710 + }, + { + "epoch": 0.08, + "learning_rate": 1.997646359949529e-05, + "loss": 6.2062, + "step": 720 + }, + { + "epoch": 0.08, + "learning_rate": 1.9974847117301062e-05, + "loss": 6.2539, + "step": 730 + }, + { + "epoch": 0.08, + "learning_rate": 1.997317702297111e-05, + "loss": 6.2402, + "step": 740 + }, + { + "epoch": 0.08, + "learning_rate": 1.997145332548175e-05, + "loss": 6.2105, + "step": 750 + }, + { + "epoch": 0.08, + "learning_rate": 1.9969676034097386e-05, + "loss": 6.1992, + "step": 760 + }, + { + "epoch": 0.08, + "learning_rate": 1.996784515837049e-05, + "loss": 6.2246, + "step": 770 + }, + { + "epoch": 0.08, + "learning_rate": 1.9965960708141532e-05, + "loss": 6.2129, + "step": 780 + }, + { + "epoch": 0.08, + "learning_rate": 1.996402269353892e-05, + "loss": 6.2012, + "step": 790 + }, + { + "epoch": 0.09, + "learning_rate": 1.9962031124978974e-05, + "loss": 6.1855, + "step": 800 + }, + { + "epoch": 0.09, + "learning_rate": 1.995998601316583e-05, + "loss": 6.1867, + "step": 810 + }, + { + "epoch": 0.09, + "learning_rate": 1.9957887369091427e-05, + "loss": 6.232, + "step": 820 + }, + { + "epoch": 0.09, + "learning_rate": 1.9955735204035412e-05, + "loss": 6.2332, + "step": 830 + }, + { + "epoch": 0.09, + "learning_rate": 1.9953529529565098e-05, + "loss": 6.1937, + "step": 840 + }, + { + "epoch": 0.09, + "learning_rate": 1.9951270357535397e-05, + "loss": 6.2062, + "step": 850 + }, + { + "epoch": 0.09, + "learning_rate": 1.9948957700088747e-05, + "loss": 6.2254, + "step": 860 + }, + { + "epoch": 0.09, + "learning_rate": 1.9946591569655073e-05, + "loss": 6.2078, + "step": 870 + }, + { + "epoch": 0.09, + "learning_rate": 1.9944171978951687e-05, + "loss": 6.1742, + "step": 880 + }, + { + "epoch": 0.1, + "learning_rate": 1.9941698940983243e-05, + "loss": 6.2, + "step": 890 + }, + { + "epoch": 0.1, + "learning_rate": 1.993917246904166e-05, + "loss": 6.1895, + "step": 900 + }, + { + "epoch": 0.1, + "learning_rate": 1.9936592576706048e-05, + "loss": 6.1953, + "step": 910 + }, + { + "epoch": 0.1, + "learning_rate": 1.993395927784264e-05, + "loss": 6.1914, + "step": 920 + }, + { + "epoch": 0.1, + "learning_rate": 1.9931272586604712e-05, + "loss": 6.1883, + "step": 930 + }, + { + "epoch": 0.1, + "learning_rate": 1.992853251743251e-05, + "loss": 6.202, + "step": 940 + }, + { + "epoch": 0.1, + "learning_rate": 1.9925739085053175e-05, + "loss": 6.193, + "step": 950 + }, + { + "epoch": 0.1, + "learning_rate": 1.9922892304480657e-05, + "loss": 6.177, + "step": 960 + }, + { + "epoch": 0.1, + "learning_rate": 1.991999219101564e-05, + "loss": 6.1566, + "step": 970 + }, + { + "epoch": 0.11, + "learning_rate": 1.9917038760245463e-05, + "loss": 6.1863, + "step": 980 + }, + { + "epoch": 0.11, + "learning_rate": 1.991403202804402e-05, + "loss": 6.1723, + "step": 990 + }, + { + "epoch": 0.11, + "learning_rate": 1.99109720105717e-05, + "loss": 6.1937, + "step": 1000 + }, + { + "epoch": 0.11, + "learning_rate": 1.9907858724275272e-05, + "loss": 6.1742, + "step": 1010 + }, + { + "epoch": 0.11, + "learning_rate": 1.990469218588782e-05, + "loss": 6.1789, + "step": 1020 + }, + { + "epoch": 0.11, + "learning_rate": 1.990147241242864e-05, + "loss": 6.1598, + "step": 1030 + }, + { + "epoch": 0.11, + "learning_rate": 1.989819942120315e-05, + "loss": 6.1691, + "step": 1040 + }, + { + "epoch": 0.11, + "learning_rate": 1.989487322980281e-05, + "loss": 6.1512, + "step": 1050 + }, + { + "epoch": 0.11, + "learning_rate": 1.9891493856105007e-05, + "loss": 6.1652, + "step": 1060 + }, + { + "epoch": 0.11, + "learning_rate": 1.988806131827297e-05, + "loss": 6.1574, + "step": 1070 + }, + { + "epoch": 0.12, + "learning_rate": 1.9884575634755667e-05, + "loss": 6.1645, + "step": 1080 + }, + { + "epoch": 0.12, + "learning_rate": 1.9881036824287724e-05, + "loss": 6.1684, + "step": 1090 + }, + { + "epoch": 0.12, + "learning_rate": 1.9877444905889293e-05, + "loss": 6.1473, + "step": 1100 + }, + { + "epoch": 0.12, + "learning_rate": 1.987379989886598e-05, + "loss": 6.1648, + "step": 1110 + }, + { + "epoch": 0.12, + "learning_rate": 1.9870101822808717e-05, + "loss": 6.1359, + "step": 1120 + }, + { + "epoch": 0.12, + "learning_rate": 1.9866350697593682e-05, + "loss": 6.184, + "step": 1130 + }, + { + "epoch": 0.12, + "learning_rate": 1.9862546543382163e-05, + "loss": 6.1445, + "step": 1140 + }, + { + "epoch": 0.12, + "learning_rate": 1.985868938062048e-05, + "loss": 6.098, + "step": 1150 + }, + { + "epoch": 0.12, + "learning_rate": 1.9854779230039838e-05, + "loss": 6.1434, + "step": 1160 + }, + { + "epoch": 0.13, + "learning_rate": 1.985081611265626e-05, + "loss": 6.157, + "step": 1170 + }, + { + "epoch": 0.13, + "learning_rate": 1.9846800049770444e-05, + "loss": 6.1484, + "step": 1180 + }, + { + "epoch": 0.13, + "learning_rate": 1.9842731062967647e-05, + "loss": 6.148, + "step": 1190 + }, + { + "epoch": 0.13, + "learning_rate": 1.9838609174117588e-05, + "loss": 6.1594, + "step": 1200 + }, + { + "epoch": 0.13, + "learning_rate": 1.983443440537432e-05, + "loss": 6.1492, + "step": 1210 + }, + { + "epoch": 0.13, + "learning_rate": 1.9830206779176103e-05, + "loss": 6.1266, + "step": 1220 + }, + { + "epoch": 0.13, + "learning_rate": 1.9825926318245302e-05, + "loss": 6.1281, + "step": 1230 + }, + { + "epoch": 0.13, + "learning_rate": 1.982159304558825e-05, + "loss": 6.1383, + "step": 1240 + }, + { + "epoch": 0.13, + "learning_rate": 1.9817206984495123e-05, + "loss": 6.1152, + "step": 1250 + }, + { + "epoch": 0.14, + "learning_rate": 1.981276815853983e-05, + "loss": 6.1406, + "step": 1260 + }, + { + "epoch": 0.14, + "learning_rate": 1.9808276591579875e-05, + "loss": 6.1707, + "step": 1270 + }, + { + "epoch": 0.14, + "learning_rate": 1.980373230775623e-05, + "loss": 6.143, + "step": 1280 + }, + { + "epoch": 0.14, + "learning_rate": 1.9799135331493202e-05, + "loss": 6.118, + "step": 1290 + }, + { + "epoch": 0.14, + "learning_rate": 1.979448568749831e-05, + "loss": 6.1461, + "step": 1300 + }, + { + "epoch": 0.14, + "learning_rate": 1.9789783400762148e-05, + "loss": 6.1363, + "step": 1310 + }, + { + "epoch": 0.14, + "learning_rate": 1.9785028496558247e-05, + "loss": 6.1434, + "step": 1320 + }, + { + "epoch": 0.14, + "learning_rate": 1.978022100044295e-05, + "loss": 6.1383, + "step": 1330 + }, + { + "epoch": 0.14, + "learning_rate": 1.977536093825526e-05, + "loss": 6.1434, + "step": 1340 + }, + { + "epoch": 0.14, + "learning_rate": 1.977044833611671e-05, + "loss": 6.1512, + "step": 1350 + }, + { + "epoch": 0.15, + "learning_rate": 1.9765483220431227e-05, + "loss": 6.1508, + "step": 1360 + }, + { + "epoch": 0.15, + "learning_rate": 1.9760465617884978e-05, + "loss": 6.1277, + "step": 1370 + }, + { + "epoch": 0.15, + "learning_rate": 1.9755395555446233e-05, + "loss": 6.1164, + "step": 1380 + }, + { + "epoch": 0.15, + "learning_rate": 1.9750273060365225e-05, + "loss": 6.1246, + "step": 1390 + }, + { + "epoch": 0.15, + "learning_rate": 1.974509816017399e-05, + "loss": 6.123, + "step": 1400 + }, + { + "epoch": 0.15, + "learning_rate": 1.973987088268624e-05, + "loss": 6.1355, + "step": 1410 + }, + { + "epoch": 0.15, + "learning_rate": 1.973459125599719e-05, + "loss": 6.1129, + "step": 1420 + }, + { + "epoch": 0.15, + "learning_rate": 1.9729259308483418e-05, + "loss": 6.1164, + "step": 1430 + }, + { + "epoch": 0.15, + "learning_rate": 1.9723875068802722e-05, + "loss": 6.1363, + "step": 1440 + }, + { + "epoch": 0.16, + "learning_rate": 1.971843856589395e-05, + "loss": 6.1133, + "step": 1450 + }, + { + "epoch": 0.16, + "learning_rate": 1.9712949828976844e-05, + "loss": 6.1063, + "step": 1460 + }, + { + "epoch": 0.16, + "learning_rate": 1.9707408887551906e-05, + "loss": 6.1172, + "step": 1470 + }, + { + "epoch": 0.16, + "learning_rate": 1.9701815771400206e-05, + "loss": 6.1316, + "step": 1480 + }, + { + "epoch": 0.16, + "learning_rate": 1.9696170510583255e-05, + "loss": 6.1344, + "step": 1490 + }, + { + "epoch": 0.16, + "learning_rate": 1.9690473135442815e-05, + "loss": 6.1324, + "step": 1500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9684723676600758e-05, + "loss": 6.1133, + "step": 1510 + }, + { + "epoch": 0.16, + "learning_rate": 1.9678922164958886e-05, + "loss": 6.1039, + "step": 1520 + }, + { + "epoch": 0.16, + "learning_rate": 1.9673068631698773e-05, + "loss": 6.1102, + "step": 1530 + }, + { + "epoch": 0.17, + "learning_rate": 1.9667163108281594e-05, + "loss": 6.1246, + "step": 1540 + }, + { + "epoch": 0.17, + "learning_rate": 1.9661205626447954e-05, + "loss": 6.1172, + "step": 1550 + }, + { + "epoch": 0.17, + "learning_rate": 1.9655196218217734e-05, + "loss": 6.118, + "step": 1560 + }, + { + "epoch": 0.17, + "learning_rate": 1.9649134915889886e-05, + "loss": 6.1008, + "step": 1570 + }, + { + "epoch": 0.17, + "learning_rate": 1.96430217520423e-05, + "loss": 6.1324, + "step": 1580 + }, + { + "epoch": 0.17, + "learning_rate": 1.9636856759531586e-05, + "loss": 6.1219, + "step": 1590 + }, + { + "epoch": 0.17, + "learning_rate": 1.9630639971492938e-05, + "loss": 6.1375, + "step": 1600 + }, + { + "epoch": 0.17, + "learning_rate": 1.9624371421339926e-05, + "loss": 6.0957, + "step": 1610 + }, + { + "epoch": 0.17, + "learning_rate": 1.961805114276433e-05, + "loss": 6.0938, + "step": 1620 + }, + { + "epoch": 0.18, + "learning_rate": 1.961167916973596e-05, + "loss": 6.1246, + "step": 1630 + }, + { + "epoch": 0.18, + "learning_rate": 1.9605255536502463e-05, + "loss": 6.1367, + "step": 1640 + }, + { + "epoch": 0.18, + "learning_rate": 1.959878027758915e-05, + "loss": 6.1066, + "step": 1650 + }, + { + "epoch": 0.18, + "learning_rate": 1.959225342779881e-05, + "loss": 6.1473, + "step": 1660 + }, + { + "epoch": 0.18, + "learning_rate": 1.9585675022211514e-05, + "loss": 6.118, + "step": 1670 + }, + { + "epoch": 0.18, + "learning_rate": 1.9579045096184433e-05, + "loss": 6.1016, + "step": 1680 + }, + { + "epoch": 0.18, + "learning_rate": 1.9572363685351642e-05, + "loss": 6.116, + "step": 1690 + }, + { + "epoch": 0.18, + "learning_rate": 1.9565630825623945e-05, + "loss": 6.1184, + "step": 1700 + }, + { + "epoch": 0.18, + "learning_rate": 1.955884655318865e-05, + "loss": 6.1043, + "step": 1710 + }, + { + "epoch": 0.18, + "learning_rate": 1.9552010904509424e-05, + "loss": 6.1344, + "step": 1720 + }, + { + "epoch": 0.19, + "learning_rate": 1.954512391632604e-05, + "loss": 6.1055, + "step": 1730 + }, + { + "epoch": 0.19, + "learning_rate": 1.9538185625654216e-05, + "loss": 6.0887, + "step": 1740 + }, + { + "epoch": 0.19, + "learning_rate": 1.9531196069785414e-05, + "loss": 6.107, + "step": 1750 + }, + { + "epoch": 0.19, + "learning_rate": 1.952415528628663e-05, + "loss": 6.1047, + "step": 1760 + }, + { + "epoch": 0.19, + "learning_rate": 1.9517063313000184e-05, + "loss": 6.0969, + "step": 1770 + }, + { + "epoch": 0.19, + "learning_rate": 1.950992018804354e-05, + "loss": 6.1191, + "step": 1780 + }, + { + "epoch": 0.19, + "learning_rate": 1.9502725949809086e-05, + "loss": 6.1414, + "step": 1790 + }, + { + "epoch": 0.19, + "learning_rate": 1.949548063696393e-05, + "loss": 6.1145, + "step": 1800 + }, + { + "epoch": 0.19, + "learning_rate": 1.9488184288449684e-05, + "loss": 6.1063, + "step": 1810 + }, + { + "epoch": 0.2, + "learning_rate": 1.9480836943482275e-05, + "loss": 6.0973, + "step": 1820 + }, + { + "epoch": 0.2, + "learning_rate": 1.9473438641551715e-05, + "loss": 6.1227, + "step": 1830 + }, + { + "epoch": 0.2, + "learning_rate": 1.9465989422421903e-05, + "loss": 6.1027, + "step": 1840 + }, + { + "epoch": 0.2, + "learning_rate": 1.9458489326130395e-05, + "loss": 6.0707, + "step": 1850 + }, + { + "epoch": 0.2, + "learning_rate": 1.9450938392988208e-05, + "loss": 6.1297, + "step": 1860 + }, + { + "epoch": 0.2, + "learning_rate": 1.9443336663579583e-05, + "loss": 6.1074, + "step": 1870 + }, + { + "epoch": 0.2, + "learning_rate": 1.943568417876178e-05, + "loss": 6.134, + "step": 1880 + }, + { + "epoch": 0.2, + "learning_rate": 1.942798097966487e-05, + "loss": 6.1004, + "step": 1890 + }, + { + "epoch": 0.2, + "learning_rate": 1.942022710769148e-05, + "loss": 6.1297, + "step": 1900 + }, + { + "epoch": 0.21, + "learning_rate": 1.9412422604516594e-05, + "loss": 6.1051, + "step": 1910 + }, + { + "epoch": 0.21, + "learning_rate": 1.9404567512087338e-05, + "loss": 6.0984, + "step": 1920 + }, + { + "epoch": 0.21, + "learning_rate": 1.9396661872622728e-05, + "loss": 6.0879, + "step": 1930 + }, + { + "epoch": 0.21, + "learning_rate": 1.9388705728613465e-05, + "loss": 6.0891, + "step": 1940 + }, + { + "epoch": 0.21, + "learning_rate": 1.9380699122821698e-05, + "loss": 6.1137, + "step": 1950 + }, + { + "epoch": 0.21, + "learning_rate": 1.9372642098280785e-05, + "loss": 6.0914, + "step": 1960 + }, + { + "epoch": 0.21, + "learning_rate": 1.936453469829508e-05, + "loss": 6.1051, + "step": 1970 + }, + { + "epoch": 0.21, + "learning_rate": 1.9356376966439694e-05, + "loss": 6.1188, + "step": 1980 + }, + { + "epoch": 0.21, + "learning_rate": 1.934816894656025e-05, + "loss": 6.0949, + "step": 1990 + }, + { + "epoch": 0.21, + "learning_rate": 1.9339910682772664e-05, + "loss": 6.1266, + "step": 2000 + }, + { + "epoch": 0.22, + "learning_rate": 1.9331602219462888e-05, + "loss": 6.1113, + "step": 2010 + }, + { + "epoch": 0.22, + "learning_rate": 1.9323243601286696e-05, + "loss": 6.0949, + "step": 2020 + }, + { + "epoch": 0.22, + "learning_rate": 1.9314834873169426e-05, + "loss": 6.1105, + "step": 2030 + }, + { + "epoch": 0.22, + "learning_rate": 1.930637608030574e-05, + "loss": 6.1004, + "step": 2040 + }, + { + "epoch": 0.22, + "learning_rate": 1.9297867268159393e-05, + "loss": 6.0992, + "step": 2050 + }, + { + "epoch": 0.22, + "learning_rate": 1.9289308482462964e-05, + "loss": 6.0793, + "step": 2060 + }, + { + "epoch": 0.22, + "learning_rate": 1.9280699769217647e-05, + "loss": 6.0789, + "step": 2070 + }, + { + "epoch": 0.22, + "learning_rate": 1.927204117469297e-05, + "loss": 6.1199, + "step": 2080 + }, + { + "epoch": 0.22, + "learning_rate": 1.9263332745426564e-05, + "loss": 6.1172, + "step": 2090 + }, + { + "epoch": 0.23, + "learning_rate": 1.9254574528223907e-05, + "loss": 6.1051, + "step": 2100 + }, + { + "epoch": 0.23, + "learning_rate": 1.9245766570158072e-05, + "loss": 6.1023, + "step": 2110 + }, + { + "epoch": 0.23, + "learning_rate": 1.9236908918569485e-05, + "loss": 6.0949, + "step": 2120 + }, + { + "epoch": 0.23, + "learning_rate": 1.9228001621065644e-05, + "loss": 6.0934, + "step": 2130 + }, + { + "epoch": 0.23, + "learning_rate": 1.92190447255209e-05, + "loss": 6.0762, + "step": 2140 + }, + { + "epoch": 0.23, + "learning_rate": 1.921003828007617e-05, + "loss": 6.0863, + "step": 2150 + }, + { + "epoch": 0.23, + "learning_rate": 1.9200982333138684e-05, + "loss": 6.0848, + "step": 2160 + }, + { + "epoch": 0.23, + "learning_rate": 1.9191876933381742e-05, + "loss": 6.0895, + "step": 2170 + }, + { + "epoch": 0.23, + "learning_rate": 1.9182722129744426e-05, + "loss": 6.0594, + "step": 2180 + }, + { + "epoch": 0.24, + "learning_rate": 1.9173517971431362e-05, + "loss": 6.098, + "step": 2190 + }, + { + "epoch": 0.24, + "learning_rate": 1.916426450791244e-05, + "loss": 6.1074, + "step": 2200 + }, + { + "epoch": 0.24, + "learning_rate": 1.915496178892255e-05, + "loss": 6.1078, + "step": 2210 + }, + { + "epoch": 0.24, + "learning_rate": 1.9145609864461317e-05, + "loss": 6.0586, + "step": 2220 + }, + { + "epoch": 0.24, + "learning_rate": 1.9136208784792838e-05, + "loss": 6.0922, + "step": 2230 + }, + { + "epoch": 0.24, + "learning_rate": 1.91267586004454e-05, + "loss": 6.1285, + "step": 2240 + }, + { + "epoch": 0.24, + "learning_rate": 1.9117259362211212e-05, + "loss": 6.0855, + "step": 2250 + }, + { + "epoch": 0.24, + "learning_rate": 1.9107711121146152e-05, + "loss": 6.084, + "step": 2260 + }, + { + "epoch": 0.24, + "learning_rate": 1.9098113928569448e-05, + "loss": 6.1105, + "step": 2270 + }, + { + "epoch": 0.24, + "learning_rate": 1.9088467836063452e-05, + "loss": 6.0988, + "step": 2280 + }, + { + "epoch": 0.25, + "learning_rate": 1.9078772895473326e-05, + "loss": 6.0832, + "step": 2290 + }, + { + "epoch": 0.25, + "learning_rate": 1.9069029158906792e-05, + "loss": 6.0965, + "step": 2300 + }, + { + "epoch": 0.25, + "learning_rate": 1.9059236678733817e-05, + "loss": 6.1094, + "step": 2310 + }, + { + "epoch": 0.25, + "learning_rate": 1.904939550758637e-05, + "loss": 6.0738, + "step": 2320 + }, + { + "epoch": 0.25, + "learning_rate": 1.9039505698358116e-05, + "loss": 6.0941, + "step": 2330 + }, + { + "epoch": 0.25, + "learning_rate": 1.902956730420413e-05, + "loss": 6.1012, + "step": 2340 + }, + { + "epoch": 0.25, + "learning_rate": 1.9019580378540622e-05, + "loss": 6.0891, + "step": 2350 + }, + { + "epoch": 0.25, + "learning_rate": 1.9009544975044652e-05, + "loss": 6.1082, + "step": 2360 + }, + { + "epoch": 0.25, + "learning_rate": 1.8999461147653828e-05, + "loss": 6.1094, + "step": 2370 + }, + { + "epoch": 0.26, + "learning_rate": 1.8989328950566023e-05, + "loss": 6.077, + "step": 2380 + }, + { + "epoch": 0.26, + "learning_rate": 1.897914843823909e-05, + "loss": 6.0977, + "step": 2390 + }, + { + "epoch": 0.26, + "learning_rate": 1.8968919665390556e-05, + "loss": 6.1102, + "step": 2400 + }, + { + "epoch": 0.26, + "learning_rate": 1.8958642686997348e-05, + "loss": 6.084, + "step": 2410 + }, + { + "epoch": 0.26, + "learning_rate": 1.8948317558295464e-05, + "loss": 6.0949, + "step": 2420 + }, + { + "epoch": 0.26, + "learning_rate": 1.893794433477972e-05, + "loss": 6.0895, + "step": 2430 + }, + { + "epoch": 0.26, + "learning_rate": 1.8927523072203417e-05, + "loss": 6.0824, + "step": 2440 + }, + { + "epoch": 0.26, + "learning_rate": 1.8917053826578047e-05, + "loss": 6.0855, + "step": 2450 + }, + { + "epoch": 0.26, + "learning_rate": 1.8906536654173013e-05, + "loss": 6.1, + "step": 2460 + }, + { + "epoch": 0.27, + "learning_rate": 1.88959716115153e-05, + "loss": 6.0941, + "step": 2470 + }, + { + "epoch": 0.27, + "learning_rate": 1.8885358755389192e-05, + "loss": 6.0848, + "step": 2480 + }, + { + "epoch": 0.27, + "learning_rate": 1.887469814283595e-05, + "loss": 6.0926, + "step": 2490 + }, + { + "epoch": 0.27, + "learning_rate": 1.8863989831153513e-05, + "loss": 6.0637, + "step": 2500 + }, + { + "epoch": 0.27, + "learning_rate": 1.8853233877896197e-05, + "loss": 6.0922, + "step": 2510 + }, + { + "epoch": 0.27, + "learning_rate": 1.8842430340874366e-05, + "loss": 6.0785, + "step": 2520 + }, + { + "epoch": 0.27, + "learning_rate": 1.883157927815415e-05, + "loss": 6.0625, + "step": 2530 + }, + { + "epoch": 0.27, + "learning_rate": 1.8820680748057113e-05, + "loss": 6.073, + "step": 2540 + }, + { + "epoch": 0.27, + "learning_rate": 1.880973480915993e-05, + "loss": 6.0723, + "step": 2550 + }, + { + "epoch": 0.27, + "learning_rate": 1.8798741520294097e-05, + "loss": 6.0812, + "step": 2560 + }, + { + "epoch": 0.28, + "learning_rate": 1.8787700940545608e-05, + "loss": 6.082, + "step": 2570 + }, + { + "epoch": 0.28, + "learning_rate": 1.877661312925462e-05, + "loss": 6.0664, + "step": 2580 + }, + { + "epoch": 0.28, + "learning_rate": 1.8765478146015156e-05, + "loss": 6.0715, + "step": 2590 + }, + { + "epoch": 0.28, + "learning_rate": 1.8754296050674776e-05, + "loss": 6.0715, + "step": 2600 + }, + { + "epoch": 0.28, + "learning_rate": 1.8743066903334252e-05, + "loss": 6.0875, + "step": 2610 + }, + { + "epoch": 0.28, + "learning_rate": 1.873179076434724e-05, + "loss": 6.0754, + "step": 2620 + }, + { + "epoch": 0.28, + "learning_rate": 1.872046769431998e-05, + "loss": 6.0766, + "step": 2630 + }, + { + "epoch": 0.28, + "learning_rate": 1.8709097754110945e-05, + "loss": 6.0887, + "step": 2640 + }, + { + "epoch": 0.28, + "learning_rate": 1.8697681004830514e-05, + "loss": 6.0492, + "step": 2650 + }, + { + "epoch": 0.29, + "learning_rate": 1.868621750784067e-05, + "loss": 6.0684, + "step": 2660 + }, + { + "epoch": 0.29, + "learning_rate": 1.8674707324754643e-05, + "loss": 6.0809, + "step": 2670 + }, + { + "epoch": 0.29, + "learning_rate": 1.8663150517436586e-05, + "loss": 6.0875, + "step": 2680 + }, + { + "epoch": 0.29, + "learning_rate": 1.865154714800125e-05, + "loss": 6.0891, + "step": 2690 + }, + { + "epoch": 0.29, + "learning_rate": 1.863989727881364e-05, + "loss": 6.0816, + "step": 2700 + }, + { + "epoch": 0.29, + "learning_rate": 1.862820097248869e-05, + "loss": 6.082, + "step": 2710 + }, + { + "epoch": 0.29, + "learning_rate": 1.861645829189092e-05, + "loss": 6.0672, + "step": 2720 + }, + { + "epoch": 0.29, + "learning_rate": 1.8604669300134094e-05, + "loss": 6.0852, + "step": 2730 + }, + { + "epoch": 0.29, + "learning_rate": 1.859283406058089e-05, + "loss": 6.0879, + "step": 2740 + }, + { + "epoch": 0.3, + "learning_rate": 1.8580952636842557e-05, + "loss": 6.0723, + "step": 2750 + }, + { + "epoch": 0.3, + "learning_rate": 1.856902509277857e-05, + "loss": 6.0891, + "step": 2760 + }, + { + "epoch": 0.3, + "learning_rate": 1.855705149249629e-05, + "loss": 6.077, + "step": 2770 + }, + { + "epoch": 0.3, + "learning_rate": 1.854503190035062e-05, + "loss": 6.0484, + "step": 2780 + }, + { + "epoch": 0.3, + "learning_rate": 1.8532966380943643e-05, + "loss": 6.0797, + "step": 2790 + }, + { + "epoch": 0.3, + "learning_rate": 1.8520854999124308e-05, + "loss": 6.0738, + "step": 2800 + }, + { + "epoch": 0.3, + "learning_rate": 1.850869781998805e-05, + "loss": 6.0965, + "step": 2810 + }, + { + "epoch": 0.3, + "learning_rate": 1.8496494908876454e-05, + "loss": 6.0895, + "step": 2820 + }, + { + "epoch": 0.3, + "learning_rate": 1.8484246331376908e-05, + "loss": 6.0742, + "step": 2830 + }, + { + "epoch": 0.3, + "learning_rate": 1.8471952153322237e-05, + "loss": 6.0703, + "step": 2840 + }, + { + "epoch": 0.31, + "learning_rate": 1.8459612440790364e-05, + "loss": 6.0773, + "step": 2850 + }, + { + "epoch": 0.31, + "learning_rate": 1.8447227260103942e-05, + "loss": 6.0758, + "step": 2860 + }, + { + "epoch": 0.31, + "learning_rate": 1.843479667783e-05, + "loss": 6.0645, + "step": 2870 + }, + { + "epoch": 0.31, + "learning_rate": 1.8422320760779602e-05, + "loss": 6.0914, + "step": 2880 + }, + { + "epoch": 0.31, + "learning_rate": 1.8409799576007465e-05, + "loss": 6.0863, + "step": 2890 + }, + { + "epoch": 0.31, + "learning_rate": 1.8397233190811597e-05, + "loss": 6.0805, + "step": 2900 + }, + { + "epoch": 0.31, + "learning_rate": 1.8384621672732975e-05, + "loss": 6.0824, + "step": 2910 + }, + { + "epoch": 0.31, + "learning_rate": 1.837196508955512e-05, + "loss": 6.1023, + "step": 2920 + }, + { + "epoch": 0.31, + "learning_rate": 1.8359263509303792e-05, + "loss": 6.0867, + "step": 2930 + }, + { + "epoch": 0.32, + "learning_rate": 1.834651700024659e-05, + "loss": 6.107, + "step": 2940 + }, + { + "epoch": 0.32, + "learning_rate": 1.8333725630892584e-05, + "loss": 6.0941, + "step": 2950 + }, + { + "epoch": 0.32, + "learning_rate": 1.8320889469991965e-05, + "loss": 6.1039, + "step": 2960 + }, + { + "epoch": 0.32, + "learning_rate": 1.8308008586535666e-05, + "loss": 6.0805, + "step": 2970 + }, + { + "epoch": 0.32, + "learning_rate": 1.8295083049754994e-05, + "loss": 6.1336, + "step": 2980 + }, + { + "epoch": 0.32, + "learning_rate": 1.828211292912125e-05, + "loss": 6.0762, + "step": 2990 + }, + { + "epoch": 0.32, + "learning_rate": 1.826909829434536e-05, + "loss": 6.0906, + "step": 3000 + }, + { + "epoch": 0.32, + "learning_rate": 1.82586545833408e-05, + "loss": 6.0547, + "step": 3010 + }, + { + "epoch": 0.32, + "learning_rate": 1.8245559999544053e-05, + "loss": 6.1059, + "step": 3020 + }, + { + "epoch": 0.33, + "learning_rate": 1.8232421098067383e-05, + "loss": 6.0973, + "step": 3030 + }, + { + "epoch": 0.33, + "learning_rate": 1.8219237949528875e-05, + "loss": 6.0801, + "step": 3040 + }, + { + "epoch": 0.33, + "learning_rate": 1.8206010624784426e-05, + "loss": 6.082, + "step": 3050 + }, + { + "epoch": 0.33, + "learning_rate": 1.8192739194927366e-05, + "loss": 6.066, + "step": 3060 + }, + { + "epoch": 0.33, + "learning_rate": 1.8179423731288088e-05, + "loss": 6.0605, + "step": 3070 + }, + { + "epoch": 0.33, + "learning_rate": 1.816606430543364e-05, + "loss": 6.1137, + "step": 3080 + }, + { + "epoch": 0.33, + "learning_rate": 1.8152660989167373e-05, + "loss": 6.0871, + "step": 3090 + }, + { + "epoch": 0.33, + "learning_rate": 1.813921385452852e-05, + "loss": 6.0836, + "step": 3100 + }, + { + "epoch": 0.33, + "learning_rate": 1.8125722973791836e-05, + "loss": 6.0656, + "step": 3110 + }, + { + "epoch": 0.33, + "learning_rate": 1.81121884194672e-05, + "loss": 6.093, + "step": 3120 + }, + { + "epoch": 0.34, + "learning_rate": 1.8098610264299213e-05, + "loss": 6.0535, + "step": 3130 + }, + { + "epoch": 0.34, + "learning_rate": 1.8084988581266837e-05, + "loss": 6.0766, + "step": 3140 + }, + { + "epoch": 0.34, + "learning_rate": 1.8071323443582973e-05, + "loss": 6.0719, + "step": 3150 + }, + { + "epoch": 0.34, + "learning_rate": 1.805761492469408e-05, + "loss": 6.0809, + "step": 3160 + }, + { + "epoch": 0.34, + "learning_rate": 1.804386309827978e-05, + "loss": 6.0996, + "step": 3170 + }, + { + "epoch": 0.34, + "learning_rate": 1.803006803825247e-05, + "loss": 6.082, + "step": 3180 + }, + { + "epoch": 0.34, + "learning_rate": 1.80162298187569e-05, + "loss": 6.0656, + "step": 3190 + }, + { + "epoch": 0.34, + "learning_rate": 1.8002348514169802e-05, + "loss": 6.0848, + "step": 3200 + }, + { + "epoch": 0.34, + "learning_rate": 1.7988424199099476e-05, + "loss": 6.0855, + "step": 3210 + }, + { + "epoch": 0.35, + "learning_rate": 1.797445694838539e-05, + "loss": 6.0918, + "step": 3220 + }, + { + "epoch": 0.35, + "learning_rate": 1.7960446837097784e-05, + "loss": 6.0789, + "step": 3230 + }, + { + "epoch": 0.35, + "learning_rate": 1.7946393940537262e-05, + "loss": 6.0563, + "step": 3240 + }, + { + "epoch": 0.35, + "learning_rate": 1.793229833423438e-05, + "loss": 6.0473, + "step": 3250 + }, + { + "epoch": 0.35, + "learning_rate": 1.7918160093949254e-05, + "loss": 6.0777, + "step": 3260 + }, + { + "epoch": 0.35, + "learning_rate": 1.790397929567114e-05, + "loss": 6.077, + "step": 3270 + }, + { + "epoch": 0.35, + "learning_rate": 1.7889756015618047e-05, + "loss": 6.0852, + "step": 3280 + }, + { + "epoch": 0.35, + "learning_rate": 1.787549033023629e-05, + "loss": 6.0738, + "step": 3290 + }, + { + "epoch": 0.35, + "learning_rate": 1.786118231620012e-05, + "loss": 6.0914, + "step": 3300 + }, + { + "epoch": 0.36, + "learning_rate": 1.784683205041129e-05, + "loss": 6.0785, + "step": 3310 + }, + { + "epoch": 0.36, + "learning_rate": 1.783243960999863e-05, + "loss": 6.1156, + "step": 3320 + }, + { + "epoch": 0.36, + "learning_rate": 1.7818005072317665e-05, + "loss": 6.057, + "step": 3330 + }, + { + "epoch": 0.36, + "learning_rate": 1.7803528514950173e-05, + "loss": 6.0777, + "step": 3340 + }, + { + "epoch": 0.36, + "learning_rate": 1.778901001570378e-05, + "loss": 6.0508, + "step": 3350 + }, + { + "epoch": 0.36, + "learning_rate": 1.7774449652611538e-05, + "loss": 6.1152, + "step": 3360 + }, + { + "epoch": 0.36, + "learning_rate": 1.77598475039315e-05, + "loss": 6.0715, + "step": 3370 + }, + { + "epoch": 0.36, + "learning_rate": 1.7745203648146314e-05, + "loss": 6.0727, + "step": 3380 + }, + { + "epoch": 0.36, + "learning_rate": 1.7730518163962788e-05, + "loss": 6.066, + "step": 3390 + }, + { + "epoch": 0.37, + "learning_rate": 1.771579113031147e-05, + "loss": 6.107, + "step": 3400 + }, + { + "epoch": 0.37, + "learning_rate": 1.770102262634623e-05, + "loss": 6.1012, + "step": 3410 + }, + { + "epoch": 0.37, + "learning_rate": 1.768621273144383e-05, + "loss": 6.1059, + "step": 3420 + }, + { + "epoch": 0.37, + "learning_rate": 1.767136152520349e-05, + "loss": 6.0902, + "step": 3430 + }, + { + "epoch": 0.37, + "learning_rate": 1.765646908744647e-05, + "loss": 6.0848, + "step": 3440 + }, + { + "epoch": 0.37, + "learning_rate": 1.7641535498215645e-05, + "loss": 6.0773, + "step": 3450 + }, + { + "epoch": 0.37, + "learning_rate": 1.7626560837775062e-05, + "loss": 6.073, + "step": 3460 + }, + { + "epoch": 0.37, + "learning_rate": 1.7611545186609516e-05, + "loss": 6.0805, + "step": 3470 + }, + { + "epoch": 0.37, + "learning_rate": 1.7596488625424118e-05, + "loss": 6.0848, + "step": 3480 + }, + { + "epoch": 0.37, + "learning_rate": 1.7581391235143854e-05, + "loss": 6.0793, + "step": 3490 + }, + { + "epoch": 0.38, + "learning_rate": 1.7566253096913162e-05, + "loss": 6.0805, + "step": 3500 + }, + { + "epoch": 0.38, + "learning_rate": 1.755107429209549e-05, + "loss": 6.0691, + "step": 3510 + }, + { + "epoch": 0.38, + "learning_rate": 1.753585490227285e-05, + "loss": 6.0836, + "step": 3520 + }, + { + "epoch": 0.38, + "learning_rate": 1.7520595009245394e-05, + "loss": 6.0691, + "step": 3530 + }, + { + "epoch": 0.38, + "learning_rate": 1.7505294695030964e-05, + "loss": 6.0746, + "step": 3540 + }, + { + "epoch": 0.38, + "learning_rate": 1.748995404186466e-05, + "loss": 6.0852, + "step": 3550 + }, + { + "epoch": 0.38, + "learning_rate": 1.7474573132198387e-05, + "loss": 6.082, + "step": 3560 + }, + { + "epoch": 0.38, + "learning_rate": 1.7459152048700423e-05, + "loss": 6.0758, + "step": 3570 + }, + { + "epoch": 0.38, + "learning_rate": 1.744369087425497e-05, + "loss": 6.0777, + "step": 3580 + }, + { + "epoch": 0.39, + "learning_rate": 1.7428189691961703e-05, + "loss": 6.077, + "step": 3590 + }, + { + "epoch": 0.39, + "learning_rate": 1.741264858513533e-05, + "loss": 6.0918, + "step": 3600 + }, + { + "epoch": 0.39, + "learning_rate": 1.7397067637305153e-05, + "loss": 6.066, + "step": 3610 + }, + { + "epoch": 0.39, + "learning_rate": 1.7381446932214587e-05, + "loss": 6.0945, + "step": 3620 + }, + { + "epoch": 0.39, + "learning_rate": 1.7365786553820757e-05, + "loss": 6.075, + "step": 3630 + }, + { + "epoch": 0.39, + "learning_rate": 1.7350086586293997e-05, + "loss": 6.0746, + "step": 3640 + }, + { + "epoch": 0.39, + "learning_rate": 1.733434711401744e-05, + "loss": 6.0836, + "step": 3650 + }, + { + "epoch": 0.39, + "learning_rate": 1.7318568221586543e-05, + "loss": 6.0609, + "step": 3660 + }, + { + "epoch": 0.39, + "learning_rate": 1.7302749993808626e-05, + "loss": 6.0836, + "step": 3670 + }, + { + "epoch": 0.4, + "learning_rate": 1.728689251570244e-05, + "loss": 6.0742, + "step": 3680 + }, + { + "epoch": 0.4, + "learning_rate": 1.7270995872497686e-05, + "loss": 6.1008, + "step": 3690 + }, + { + "epoch": 0.4, + "learning_rate": 1.725506014963457e-05, + "loss": 6.0758, + "step": 3700 + }, + { + "epoch": 0.4, + "learning_rate": 1.723908543276334e-05, + "loss": 6.0637, + "step": 3710 + }, + { + "epoch": 0.4, + "learning_rate": 1.722307180774383e-05, + "loss": 6.0566, + "step": 3720 + }, + { + "epoch": 0.4, + "learning_rate": 1.720701936064499e-05, + "loss": 6.0754, + "step": 3730 + }, + { + "epoch": 0.4, + "learning_rate": 1.719092817774443e-05, + "loss": 6.0473, + "step": 3740 + }, + { + "epoch": 0.4, + "learning_rate": 1.7174798345527953e-05, + "loss": 6.0602, + "step": 3750 + }, + { + "epoch": 0.4, + "learning_rate": 1.71586299506891e-05, + "loss": 6.0664, + "step": 3760 + }, + { + "epoch": 0.4, + "learning_rate": 1.7142423080128666e-05, + "loss": 6.0672, + "step": 3770 + }, + { + "epoch": 0.41, + "learning_rate": 1.7126177820954242e-05, + "loss": 6.0629, + "step": 3780 + }, + { + "epoch": 0.41, + "learning_rate": 1.710989426047976e-05, + "loss": 6.0539, + "step": 3790 + }, + { + "epoch": 0.41, + "learning_rate": 1.709357248622499e-05, + "loss": 6.0848, + "step": 3800 + }, + { + "epoch": 0.41, + "learning_rate": 1.7077212585915118e-05, + "loss": 6.032, + "step": 3810 + }, + { + "epoch": 0.41, + "learning_rate": 1.7060814647480228e-05, + "loss": 6.0855, + "step": 3820 + }, + { + "epoch": 0.41, + "learning_rate": 1.7044378759054846e-05, + "loss": 6.0738, + "step": 3830 + }, + { + "epoch": 0.41, + "learning_rate": 1.702790500897749e-05, + "loss": 6.0582, + "step": 3840 + }, + { + "epoch": 0.41, + "learning_rate": 1.701139348579015e-05, + "loss": 6.0563, + "step": 3850 + }, + { + "epoch": 0.41, + "learning_rate": 1.6994844278237857e-05, + "loss": 6.1, + "step": 3860 + }, + { + "epoch": 0.42, + "learning_rate": 1.6978257475268173e-05, + "loss": 6.0598, + "step": 3870 + }, + { + "epoch": 0.42, + "learning_rate": 1.6961633166030723e-05, + "loss": 6.0801, + "step": 3880 + }, + { + "epoch": 0.42, + "learning_rate": 1.6944971439876727e-05, + "loss": 6.0754, + "step": 3890 + }, + { + "epoch": 0.42, + "learning_rate": 1.692827238635851e-05, + "loss": 6.0941, + "step": 3900 + }, + { + "epoch": 0.42, + "learning_rate": 1.691153609522901e-05, + "loss": 6.077, + "step": 3910 + }, + { + "epoch": 0.42, + "learning_rate": 1.6894762656441328e-05, + "loss": 6.0566, + "step": 3920 + }, + { + "epoch": 0.42, + "learning_rate": 1.6877952160148203e-05, + "loss": 6.0734, + "step": 3930 + }, + { + "epoch": 0.42, + "learning_rate": 1.686110469670156e-05, + "loss": 6.0445, + "step": 3940 + }, + { + "epoch": 0.42, + "learning_rate": 1.6844220356652013e-05, + "loss": 6.0613, + "step": 3950 + }, + { + "epoch": 0.43, + "learning_rate": 1.6827299230748368e-05, + "loss": 6.0809, + "step": 3960 + }, + { + "epoch": 0.43, + "learning_rate": 1.681034140993716e-05, + "loss": 6.059, + "step": 3970 + }, + { + "epoch": 0.43, + "learning_rate": 1.6793346985362128e-05, + "loss": 6.0805, + "step": 3980 + }, + { + "epoch": 0.43, + "learning_rate": 1.677631604836377e-05, + "loss": 6.0687, + "step": 3990 + }, + { + "epoch": 0.43, + "learning_rate": 1.6759248690478814e-05, + "loss": 6.0637, + "step": 4000 + }, + { + "epoch": 0.43, + "learning_rate": 1.6745568642770166e-05, + "loss": 6.0637, + "step": 4010 + }, + { + "epoch": 0.43, + "learning_rate": 1.6728435958586427e-05, + "loss": 6.0512, + "step": 4020 + }, + { + "epoch": 0.43, + "learning_rate": 1.671126711085877e-05, + "loss": 6.0793, + "step": 4030 + }, + { + "epoch": 0.43, + "learning_rate": 1.6694062191865163e-05, + "loss": 6.0711, + "step": 4040 + }, + { + "epoch": 0.43, + "learning_rate": 1.6676821294077435e-05, + "loss": 6.0551, + "step": 4050 + }, + { + "epoch": 0.44, + "learning_rate": 1.6659544510160808e-05, + "loss": 6.0746, + "step": 4060 + }, + { + "epoch": 0.44, + "learning_rate": 1.664223193297337e-05, + "loss": 6.0773, + "step": 4070 + }, + { + "epoch": 0.44, + "learning_rate": 1.6624883655565602e-05, + "loss": 6.0914, + "step": 4080 + }, + { + "epoch": 0.44, + "learning_rate": 1.6607499771179853e-05, + "loss": 6.0336, + "step": 4090 + }, + { + "epoch": 0.44, + "learning_rate": 1.6590080373249846e-05, + "loss": 6.0766, + "step": 4100 + }, + { + "epoch": 0.44, + "learning_rate": 1.6572625555400194e-05, + "loss": 6.068, + "step": 4110 + }, + { + "epoch": 0.44, + "learning_rate": 1.655513541144587e-05, + "loss": 6.0473, + "step": 4120 + }, + { + "epoch": 0.44, + "learning_rate": 1.6537610035391726e-05, + "loss": 6.0852, + "step": 4130 + }, + { + "epoch": 0.44, + "learning_rate": 1.6520049521431966e-05, + "loss": 6.0773, + "step": 4140 + }, + { + "epoch": 0.45, + "learning_rate": 1.6502453963949662e-05, + "loss": 6.0762, + "step": 4150 + }, + { + "epoch": 0.45, + "learning_rate": 1.6484823457516225e-05, + "loss": 6.0727, + "step": 4160 + }, + { + "epoch": 0.45, + "learning_rate": 1.6467158096890915e-05, + "loss": 6.0875, + "step": 4170 + }, + { + "epoch": 0.45, + "learning_rate": 1.6449457977020315e-05, + "loss": 6.0645, + "step": 4180 + }, + { + "epoch": 0.45, + "learning_rate": 1.6431723193037847e-05, + "loss": 6.0766, + "step": 4190 + }, + { + "epoch": 0.45, + "learning_rate": 1.641395384026322e-05, + "loss": 6.0719, + "step": 4200 + }, + { + "epoch": 0.45, + "learning_rate": 1.6396150014201965e-05, + "loss": 6.0652, + "step": 4210 + }, + { + "epoch": 0.45, + "learning_rate": 1.6378311810544877e-05, + "loss": 6.0836, + "step": 4220 + }, + { + "epoch": 0.45, + "learning_rate": 1.6360439325167536e-05, + "loss": 6.0828, + "step": 4230 + }, + { + "epoch": 0.46, + "learning_rate": 1.6342532654129764e-05, + "loss": 6.0891, + "step": 4240 + }, + { + "epoch": 0.46, + "learning_rate": 1.632459189367514e-05, + "loss": 6.0687, + "step": 4250 + }, + { + "epoch": 0.46, + "learning_rate": 1.6306617140230442e-05, + "loss": 6.0883, + "step": 4260 + }, + { + "epoch": 0.46, + "learning_rate": 1.6288608490405172e-05, + "loss": 6.0848, + "step": 4270 + }, + { + "epoch": 0.46, + "learning_rate": 1.6270566040991004e-05, + "loss": 6.0578, + "step": 4280 + }, + { + "epoch": 0.46, + "learning_rate": 1.6252489888961275e-05, + "loss": 6.075, + "step": 4290 + }, + { + "epoch": 0.46, + "learning_rate": 1.623438013147047e-05, + "loss": 6.073, + "step": 4300 + }, + { + "epoch": 0.46, + "learning_rate": 1.6216236865853695e-05, + "loss": 6.0512, + "step": 4310 + }, + { + "epoch": 0.46, + "learning_rate": 1.6198060189626147e-05, + "loss": 6.0789, + "step": 4320 + }, + { + "epoch": 0.46, + "learning_rate": 1.6179850200482606e-05, + "loss": 6.0766, + "step": 4330 + }, + { + "epoch": 0.47, + "learning_rate": 1.6161606996296888e-05, + "loss": 6.0711, + "step": 4340 + }, + { + "epoch": 0.47, + "learning_rate": 1.614333067512134e-05, + "loss": 6.0738, + "step": 4350 + }, + { + "epoch": 0.47, + "learning_rate": 1.6125021335186295e-05, + "loss": 6.0469, + "step": 4360 + }, + { + "epoch": 0.47, + "learning_rate": 1.6106679074899565e-05, + "loss": 6.0555, + "step": 4370 + }, + { + "epoch": 0.47, + "learning_rate": 1.608830399284589e-05, + "loss": 6.0801, + "step": 4380 + }, + { + "epoch": 0.47, + "learning_rate": 1.6069896187786428e-05, + "loss": 6.0465, + "step": 4390 + }, + { + "epoch": 0.47, + "learning_rate": 1.6051455758658202e-05, + "loss": 6.0773, + "step": 4400 + }, + { + "epoch": 0.47, + "learning_rate": 1.603298280457359e-05, + "loss": 6.075, + "step": 4410 + }, + { + "epoch": 0.47, + "learning_rate": 1.601447742481978e-05, + "loss": 6.0625, + "step": 4420 + }, + { + "epoch": 0.48, + "learning_rate": 1.5995939718858238e-05, + "loss": 6.0566, + "step": 4430 + }, + { + "epoch": 0.48, + "learning_rate": 1.5977369786324177e-05, + "loss": 6.0723, + "step": 4440 + }, + { + "epoch": 0.48, + "learning_rate": 1.595876772702602e-05, + "loss": 6.0676, + "step": 4450 + }, + { + "epoch": 0.48, + "learning_rate": 1.5940133640944858e-05, + "loss": 6.0609, + "step": 4460 + }, + { + "epoch": 0.48, + "learning_rate": 1.592146762823392e-05, + "loss": 6.0832, + "step": 4470 + }, + { + "epoch": 0.48, + "learning_rate": 1.5902769789218037e-05, + "loss": 6.0895, + "step": 4480 + }, + { + "epoch": 0.48, + "learning_rate": 1.588404022439309e-05, + "loss": 6.0391, + "step": 4490 + }, + { + "epoch": 0.48, + "learning_rate": 1.5865279034425484e-05, + "loss": 6.0934, + "step": 4500 + }, + { + "epoch": 0.48, + "learning_rate": 1.5846486320151593e-05, + "loss": 6.0816, + "step": 4510 + }, + { + "epoch": 0.49, + "learning_rate": 1.5827662182577234e-05, + "loss": 6.0523, + "step": 4520 + }, + { + "epoch": 0.49, + "learning_rate": 1.580880672287711e-05, + "loss": 6.0605, + "step": 4530 + }, + { + "epoch": 0.49, + "learning_rate": 1.578992004239428e-05, + "loss": 6.0941, + "step": 4540 + }, + { + "epoch": 0.49, + "learning_rate": 1.5771002242639595e-05, + "loss": 6.0656, + "step": 4550 + }, + { + "epoch": 0.49, + "learning_rate": 1.5752053425291173e-05, + "loss": 6.0797, + "step": 4560 + }, + { + "epoch": 0.49, + "learning_rate": 1.5733073692193833e-05, + "loss": 6.0359, + "step": 4570 + }, + { + "epoch": 0.49, + "learning_rate": 1.5714063145358577e-05, + "loss": 6.0625, + "step": 4580 + }, + { + "epoch": 0.49, + "learning_rate": 1.5695021886961998e-05, + "loss": 6.0734, + "step": 4590 + }, + { + "epoch": 0.49, + "learning_rate": 1.5675950019345775e-05, + "loss": 6.0371, + "step": 4600 + }, + { + "epoch": 0.49, + "learning_rate": 1.56568476450161e-05, + "loss": 6.075, + "step": 4610 + }, + { + "epoch": 0.5, + "learning_rate": 1.563771486664311e-05, + "loss": 6.0828, + "step": 4620 + }, + { + "epoch": 0.5, + "learning_rate": 1.561855178706039e-05, + "loss": 6.0777, + "step": 4630 + }, + { + "epoch": 0.5, + "learning_rate": 1.5599358509264363e-05, + "loss": 6.082, + "step": 4640 + }, + { + "epoch": 0.5, + "learning_rate": 1.5580135136413757e-05, + "loss": 6.0879, + "step": 4650 + }, + { + "epoch": 0.5, + "learning_rate": 1.556088177182907e-05, + "loss": 6.052, + "step": 4660 + }, + { + "epoch": 0.5, + "learning_rate": 1.5541598518991983e-05, + "loss": 6.0824, + "step": 4670 + }, + { + "epoch": 0.5, + "learning_rate": 1.552228548154482e-05, + "loss": 6.0477, + "step": 4680 + }, + { + "epoch": 0.5, + "learning_rate": 1.550294276329e-05, + "loss": 6.0664, + "step": 4690 + }, + { + "epoch": 0.5, + "learning_rate": 1.5483570468189455e-05, + "loss": 6.0559, + "step": 4700 + }, + { + "epoch": 0.51, + "learning_rate": 1.5464168700364093e-05, + "loss": 6.0566, + "step": 4710 + }, + { + "epoch": 0.51, + "learning_rate": 1.544473756409323e-05, + "loss": 6.059, + "step": 4720 + }, + { + "epoch": 0.51, + "learning_rate": 1.5425277163814017e-05, + "loss": 6.0934, + "step": 4730 + }, + { + "epoch": 0.51, + "learning_rate": 1.5405787604120915e-05, + "loss": 6.0863, + "step": 4740 + }, + { + "epoch": 0.51, + "learning_rate": 1.5386268989765085e-05, + "loss": 6.0492, + "step": 4750 + }, + { + "epoch": 0.51, + "learning_rate": 1.5366721425653867e-05, + "loss": 6.0723, + "step": 4760 + }, + { + "epoch": 0.51, + "learning_rate": 1.5347145016850183e-05, + "loss": 6.0586, + "step": 4770 + }, + { + "epoch": 0.51, + "learning_rate": 1.5327539868571998e-05, + "loss": 6.0461, + "step": 4780 + }, + { + "epoch": 0.51, + "learning_rate": 1.5307906086191744e-05, + "loss": 6.0434, + "step": 4790 + }, + { + "epoch": 0.52, + "learning_rate": 1.528824377523575e-05, + "loss": 6.1023, + "step": 4800 + }, + { + "epoch": 0.52, + "learning_rate": 1.5268553041383675e-05, + "loss": 6.0734, + "step": 4810 + }, + { + "epoch": 0.52, + "learning_rate": 1.524883399046795e-05, + "loss": 6.0918, + "step": 4820 + }, + { + "epoch": 0.52, + "learning_rate": 1.5229086728473203e-05, + "loss": 6.0586, + "step": 4830 + }, + { + "epoch": 0.52, + "learning_rate": 1.5209311361535682e-05, + "loss": 6.0738, + "step": 4840 + }, + { + "epoch": 0.52, + "learning_rate": 1.5189507995942698e-05, + "loss": 6.0602, + "step": 4850 + }, + { + "epoch": 0.52, + "learning_rate": 1.5169676738132046e-05, + "loss": 6.0789, + "step": 4860 + }, + { + "epoch": 0.52, + "learning_rate": 1.514981769469143e-05, + "loss": 6.0687, + "step": 4870 + }, + { + "epoch": 0.52, + "learning_rate": 1.5129930972357902e-05, + "loss": 6.0465, + "step": 4880 + }, + { + "epoch": 0.53, + "learning_rate": 1.5110016678017277e-05, + "loss": 6.0559, + "step": 4890 + }, + { + "epoch": 0.53, + "learning_rate": 1.5090074918703553e-05, + "loss": 6.0629, + "step": 4900 + }, + { + "epoch": 0.53, + "learning_rate": 1.5070105801598363e-05, + "loss": 6.059, + "step": 4910 + }, + { + "epoch": 0.53, + "learning_rate": 1.5050109434030366e-05, + "loss": 6.0633, + "step": 4920 + }, + { + "epoch": 0.53, + "learning_rate": 1.503008592347469e-05, + "loss": 6.0691, + "step": 4930 + }, + { + "epoch": 0.53, + "learning_rate": 1.5010035377552354e-05, + "loss": 6.0527, + "step": 4940 + }, + { + "epoch": 0.53, + "learning_rate": 1.4989957904029675e-05, + "loss": 6.0695, + "step": 4950 + }, + { + "epoch": 0.53, + "learning_rate": 1.4969853610817707e-05, + "loss": 6.0676, + "step": 4960 + }, + { + "epoch": 0.53, + "learning_rate": 1.4949722605971647e-05, + "loss": 6.0664, + "step": 4970 + }, + { + "epoch": 0.53, + "learning_rate": 1.4929564997690268e-05, + "loss": 6.0586, + "step": 4980 + }, + { + "epoch": 0.54, + "learning_rate": 1.4909380894315316e-05, + "loss": 6.0492, + "step": 4990 + }, + { + "epoch": 0.54, + "learning_rate": 1.4889170404330953e-05, + "loss": 6.0539, + "step": 5000 + }, + { + "epoch": 0.54, + "learning_rate": 1.4872983086976594e-05, + "loss": 6.0746, + "step": 5010 + }, + { + "epoch": 0.54, + "learning_rate": 1.485272537492769e-05, + "loss": 6.0453, + "step": 5020 + }, + { + "epoch": 0.54, + "learning_rate": 1.4832441580777642e-05, + "loss": 6.0875, + "step": 5030 + }, + { + "epoch": 0.54, + "learning_rate": 1.4812131813546416e-05, + "loss": 6.075, + "step": 5040 + }, + { + "epoch": 0.54, + "learning_rate": 1.4791796182393578e-05, + "loss": 6.077, + "step": 5050 + }, + { + "epoch": 0.54, + "learning_rate": 1.4771434796617716e-05, + "loss": 6.084, + "step": 5060 + }, + { + "epoch": 0.54, + "learning_rate": 1.4751047765655824e-05, + "loss": 6.0563, + "step": 5070 + }, + { + "epoch": 0.55, + "learning_rate": 1.4730635199082746e-05, + "loss": 6.1125, + "step": 5080 + }, + { + "epoch": 0.55, + "learning_rate": 1.4710197206610567e-05, + "loss": 6.0598, + "step": 5090 + }, + { + "epoch": 0.55, + "learning_rate": 1.4689733898088032e-05, + "loss": 6.0789, + "step": 5100 + }, + { + "epoch": 0.55, + "learning_rate": 1.4669245383499952e-05, + "loss": 6.0664, + "step": 5110 + }, + { + "epoch": 0.55, + "learning_rate": 1.4648731772966613e-05, + "loss": 6.0781, + "step": 5120 + }, + { + "epoch": 0.55, + "learning_rate": 1.4628193176743185e-05, + "loss": 6.073, + "step": 5130 + }, + { + "epoch": 0.55, + "learning_rate": 1.460762970521913e-05, + "loss": 6.0598, + "step": 5140 + }, + { + "epoch": 0.55, + "learning_rate": 1.4587041468917609e-05, + "loss": 6.0699, + "step": 5150 + }, + { + "epoch": 0.55, + "learning_rate": 1.4566428578494888e-05, + "loss": 6.068, + "step": 5160 + }, + { + "epoch": 0.56, + "learning_rate": 1.4545791144739737e-05, + "loss": 6.0422, + "step": 5170 + }, + { + "epoch": 0.56, + "learning_rate": 1.452512927857285e-05, + "loss": 6.0852, + "step": 5180 + }, + { + "epoch": 0.56, + "learning_rate": 1.4504443091046225e-05, + "loss": 6.0863, + "step": 5190 + }, + { + "epoch": 0.56, + "learning_rate": 1.448373269334259e-05, + "loss": 6.0699, + "step": 5200 + }, + { + "epoch": 0.56, + "learning_rate": 1.4462998196774796e-05, + "loss": 6.0621, + "step": 5210 + }, + { + "epoch": 0.56, + "learning_rate": 1.4442239712785215e-05, + "loss": 6.0805, + "step": 5220 + }, + { + "epoch": 0.56, + "learning_rate": 1.4421457352945148e-05, + "loss": 6.0809, + "step": 5230 + }, + { + "epoch": 0.56, + "learning_rate": 1.440065122895422e-05, + "loss": 6.0785, + "step": 5240 + }, + { + "epoch": 0.56, + "learning_rate": 1.4379821452639789e-05, + "loss": 6.0484, + "step": 5250 + }, + { + "epoch": 0.56, + "learning_rate": 1.4358968135956323e-05, + "loss": 6.0797, + "step": 5260 + }, + { + "epoch": 0.57, + "learning_rate": 1.4338091390984831e-05, + "loss": 6.0863, + "step": 5270 + }, + { + "epoch": 0.57, + "learning_rate": 1.431719132993223e-05, + "loss": 6.0324, + "step": 5280 + }, + { + "epoch": 0.57, + "learning_rate": 1.4296268065130759e-05, + "loss": 6.0422, + "step": 5290 + }, + { + "epoch": 0.57, + "learning_rate": 1.4275321709037371e-05, + "loss": 6.0672, + "step": 5300 + }, + { + "epoch": 0.57, + "learning_rate": 1.425435237423313e-05, + "loss": 6.0785, + "step": 5310 + }, + { + "epoch": 0.57, + "learning_rate": 1.4233360173422602e-05, + "loss": 6.0828, + "step": 5320 + }, + { + "epoch": 0.57, + "learning_rate": 1.421234521943325e-05, + "loss": 6.0781, + "step": 5330 + }, + { + "epoch": 0.57, + "learning_rate": 1.4191307625214834e-05, + "loss": 6.0445, + "step": 5340 + }, + { + "epoch": 0.57, + "learning_rate": 1.41702475038388e-05, + "loss": 6.0738, + "step": 5350 + }, + { + "epoch": 0.58, + "learning_rate": 1.4149164968497661e-05, + "loss": 6.0773, + "step": 5360 + }, + { + "epoch": 0.58, + "learning_rate": 1.4128060132504413e-05, + "loss": 6.0598, + "step": 5370 + }, + { + "epoch": 0.58, + "learning_rate": 1.4106933109291903e-05, + "loss": 6.0621, + "step": 5380 + }, + { + "epoch": 0.58, + "learning_rate": 1.4085784012412232e-05, + "loss": 6.0539, + "step": 5390 + }, + { + "epoch": 0.58, + "learning_rate": 1.4064612955536138e-05, + "loss": 6.0656, + "step": 5400 + }, + { + "epoch": 0.58, + "learning_rate": 1.4043420052452393e-05, + "loss": 6.0668, + "step": 5410 + }, + { + "epoch": 0.58, + "learning_rate": 1.4022205417067178e-05, + "loss": 6.098, + "step": 5420 + }, + { + "epoch": 0.58, + "learning_rate": 1.4000969163403491e-05, + "loss": 6.0449, + "step": 5430 + }, + { + "epoch": 0.58, + "learning_rate": 1.3979711405600512e-05, + "loss": 6.0687, + "step": 5440 + }, + { + "epoch": 0.59, + "learning_rate": 1.3958432257913005e-05, + "loss": 6.0707, + "step": 5450 + }, + { + "epoch": 0.59, + "learning_rate": 1.3937131834710702e-05, + "loss": 6.084, + "step": 5460 + }, + { + "epoch": 0.59, + "learning_rate": 1.3915810250477679e-05, + "loss": 6.066, + "step": 5470 + }, + { + "epoch": 0.59, + "learning_rate": 1.3894467619811746e-05, + "loss": 6.0598, + "step": 5480 + }, + { + "epoch": 0.59, + "learning_rate": 1.387310405742384e-05, + "loss": 6.0687, + "step": 5490 + }, + { + "epoch": 0.59, + "learning_rate": 1.3851719678137395e-05, + "loss": 6.0605, + "step": 5500 + }, + { + "epoch": 0.59, + "learning_rate": 1.3830314596887728e-05, + "loss": 6.0625, + "step": 5510 + }, + { + "epoch": 0.59, + "learning_rate": 1.380888892872143e-05, + "loss": 6.0684, + "step": 5520 + }, + { + "epoch": 0.59, + "learning_rate": 1.3787442788795733e-05, + "loss": 6.0762, + "step": 5530 + }, + { + "epoch": 0.59, + "learning_rate": 1.3765976292377907e-05, + "loss": 6.0555, + "step": 5540 + }, + { + "epoch": 0.6, + "learning_rate": 1.3744489554844633e-05, + "loss": 6.0848, + "step": 5550 + }, + { + "epoch": 0.6, + "learning_rate": 1.3722982691681372e-05, + "loss": 6.0367, + "step": 5560 + }, + { + "epoch": 0.6, + "learning_rate": 1.3701455818481767e-05, + "loss": 6.0672, + "step": 5570 + }, + { + "epoch": 0.6, + "learning_rate": 1.3679909050947003e-05, + "loss": 6.0445, + "step": 5580 + }, + { + "epoch": 0.6, + "learning_rate": 1.3658342504885192e-05, + "loss": 6.0809, + "step": 5590 + }, + { + "epoch": 0.6, + "learning_rate": 1.3636756296210751e-05, + "loss": 6.0789, + "step": 5600 + }, + { + "epoch": 0.6, + "learning_rate": 1.3615150540943785e-05, + "loss": 6.0805, + "step": 5610 + }, + { + "epoch": 0.6, + "learning_rate": 1.3593525355209444e-05, + "loss": 6.0609, + "step": 5620 + }, + { + "epoch": 0.6, + "learning_rate": 1.3571880855237324e-05, + "loss": 6.0566, + "step": 5630 + }, + { + "epoch": 0.61, + "learning_rate": 1.3550217157360816e-05, + "loss": 6.059, + "step": 5640 + }, + { + "epoch": 0.61, + "learning_rate": 1.3528534378016509e-05, + "loss": 6.0766, + "step": 5650 + }, + { + "epoch": 0.61, + "learning_rate": 1.3506832633743543e-05, + "loss": 6.0742, + "step": 5660 + }, + { + "epoch": 0.61, + "learning_rate": 1.3485112041182988e-05, + "loss": 6.0438, + "step": 5670 + }, + { + "epoch": 0.61, + "learning_rate": 1.3463372717077221e-05, + "loss": 6.0406, + "step": 5680 + }, + { + "epoch": 0.61, + "learning_rate": 1.3441614778269297e-05, + "loss": 6.0574, + "step": 5690 + }, + { + "epoch": 0.61, + "learning_rate": 1.3419838341702314e-05, + "loss": 6.0656, + "step": 5700 + }, + { + "epoch": 0.61, + "learning_rate": 1.33980435244188e-05, + "loss": 6.0535, + "step": 5710 + }, + { + "epoch": 0.61, + "learning_rate": 1.3376230443560066e-05, + "loss": 6.0488, + "step": 5720 + }, + { + "epoch": 0.62, + "learning_rate": 1.335439921636559e-05, + "loss": 6.0727, + "step": 5730 + }, + { + "epoch": 0.62, + "learning_rate": 1.333254996017238e-05, + "loss": 6.1, + "step": 5740 + }, + { + "epoch": 0.62, + "learning_rate": 1.3310682792414344e-05, + "loss": 6.0625, + "step": 5750 + }, + { + "epoch": 0.62, + "learning_rate": 1.3288797830621661e-05, + "loss": 6.0465, + "step": 5760 + }, + { + "epoch": 0.62, + "learning_rate": 1.3266895192420149e-05, + "loss": 6.0668, + "step": 5770 + }, + { + "epoch": 0.62, + "learning_rate": 1.324497499553063e-05, + "loss": 6.0781, + "step": 5780 + }, + { + "epoch": 0.62, + "learning_rate": 1.32230373577683e-05, + "loss": 6.0762, + "step": 5790 + }, + { + "epoch": 0.62, + "learning_rate": 1.3201082397042097e-05, + "loss": 6.0711, + "step": 5800 + }, + { + "epoch": 0.62, + "learning_rate": 1.3179110231354062e-05, + "loss": 6.0719, + "step": 5810 + }, + { + "epoch": 0.62, + "learning_rate": 1.315712097879871e-05, + "loss": 6.0715, + "step": 5820 + }, + { + "epoch": 0.63, + "learning_rate": 1.313511475756239e-05, + "loss": 6.0609, + "step": 5830 + }, + { + "epoch": 0.63, + "learning_rate": 1.3113091685922663e-05, + "loss": 6.0629, + "step": 5840 + }, + { + "epoch": 0.63, + "learning_rate": 1.3091051882247646e-05, + "loss": 6.0691, + "step": 5850 + }, + { + "epoch": 0.63, + "learning_rate": 1.3068995464995387e-05, + "loss": 6.0664, + "step": 5860 + }, + { + "epoch": 0.63, + "learning_rate": 1.3046922552713232e-05, + "loss": 6.0926, + "step": 5870 + }, + { + "epoch": 0.63, + "learning_rate": 1.3024833264037185e-05, + "loss": 6.0512, + "step": 5880 + }, + { + "epoch": 0.63, + "learning_rate": 1.300272771769126e-05, + "loss": 6.0441, + "step": 5890 + }, + { + "epoch": 0.63, + "learning_rate": 1.2980606032486862e-05, + "loss": 6.0664, + "step": 5900 + }, + { + "epoch": 0.63, + "learning_rate": 1.295846832732213e-05, + "loss": 6.0781, + "step": 5910 + }, + { + "epoch": 0.64, + "learning_rate": 1.2936314721181309e-05, + "loss": 6.0445, + "step": 5920 + }, + { + "epoch": 0.64, + "learning_rate": 1.291414533313411e-05, + "loss": 6.0512, + "step": 5930 + }, + { + "epoch": 0.64, + "learning_rate": 1.2891960282335063e-05, + "loss": 6.0586, + "step": 5940 + }, + { + "epoch": 0.64, + "learning_rate": 1.2869759688022882e-05, + "loss": 6.0816, + "step": 5950 + }, + { + "epoch": 0.64, + "learning_rate": 1.2847543669519828e-05, + "loss": 6.0602, + "step": 5960 + }, + { + "epoch": 0.64, + "learning_rate": 1.2825312346231058e-05, + "loss": 6.0816, + "step": 5970 + }, + { + "epoch": 0.64, + "learning_rate": 1.2803065837643987e-05, + "loss": 6.0789, + "step": 5980 + }, + { + "epoch": 0.64, + "learning_rate": 1.2780804263327653e-05, + "loss": 6.0754, + "step": 5990 + }, + { + "epoch": 0.64, + "learning_rate": 1.2758527742932061e-05, + "loss": 6.0805, + "step": 6000 + }, + { + "epoch": 0.65, + "learning_rate": 1.2736236396187554e-05, + "loss": 6.0617, + "step": 6010 + }, + { + "epoch": 0.65, + "learning_rate": 1.2718392724330404e-05, + "loss": 6.0637, + "step": 6020 + }, + { + "epoch": 0.65, + "learning_rate": 1.2696074992132255e-05, + "loss": 6.0383, + "step": 6030 + }, + { + "epoch": 0.65, + "learning_rate": 1.2673742769252024e-05, + "loss": 6.0625, + "step": 6040 + }, + { + "epoch": 0.65, + "learning_rate": 1.2651396175719437e-05, + "loss": 6.0523, + "step": 6050 + }, + { + "epoch": 0.65, + "learning_rate": 1.2629035331641457e-05, + "loss": 6.0496, + "step": 6060 + }, + { + "epoch": 0.65, + "learning_rate": 1.260666035720164e-05, + "loss": 6.0875, + "step": 6070 + }, + { + "epoch": 0.65, + "learning_rate": 1.2584271372659495e-05, + "loss": 6.0566, + "step": 6080 + }, + { + "epoch": 0.65, + "learning_rate": 1.256186849834982e-05, + "loss": 6.0273, + "step": 6090 + }, + { + "epoch": 0.65, + "learning_rate": 1.2539451854682078e-05, + "loss": 6.0637, + "step": 6100 + }, + { + "epoch": 0.66, + "learning_rate": 1.2517021562139732e-05, + "loss": 6.0664, + "step": 6110 + }, + { + "epoch": 0.66, + "learning_rate": 1.249457774127961e-05, + "loss": 6.0543, + "step": 6120 + }, + { + "epoch": 0.66, + "learning_rate": 1.2472120512731239e-05, + "loss": 6.0859, + "step": 6130 + }, + { + "epoch": 0.66, + "learning_rate": 1.2449649997196223e-05, + "loss": 6.0438, + "step": 6140 + }, + { + "epoch": 0.66, + "learning_rate": 1.2427166315447572e-05, + "loss": 6.0711, + "step": 6150 + }, + { + "epoch": 0.66, + "learning_rate": 1.240466958832906e-05, + "loss": 6.0512, + "step": 6160 + }, + { + "epoch": 0.66, + "learning_rate": 1.2382159936754583e-05, + "loss": 6.0273, + "step": 6170 + }, + { + "epoch": 0.66, + "learning_rate": 1.2359637481707499e-05, + "loss": 6.0586, + "step": 6180 + }, + { + "epoch": 0.66, + "learning_rate": 1.233710234423998e-05, + "loss": 6.0613, + "step": 6190 + }, + { + "epoch": 0.67, + "learning_rate": 1.231455464547236e-05, + "loss": 6.082, + "step": 6200 + }, + { + "epoch": 0.67, + "learning_rate": 1.2291994506592493e-05, + "loss": 6.0703, + "step": 6210 + }, + { + "epoch": 0.67, + "learning_rate": 1.2269422048855093e-05, + "loss": 6.1051, + "step": 6220 + }, + { + "epoch": 0.67, + "learning_rate": 1.2246837393581081e-05, + "loss": 6.066, + "step": 6230 + }, + { + "epoch": 0.67, + "learning_rate": 1.2224240662156943e-05, + "loss": 6.057, + "step": 6240 + }, + { + "epoch": 0.67, + "learning_rate": 1.2201631976034064e-05, + "loss": 6.0687, + "step": 6250 + }, + { + "epoch": 0.67, + "learning_rate": 1.217901145672809e-05, + "loss": 6.0809, + "step": 6260 + }, + { + "epoch": 0.67, + "learning_rate": 1.2156379225818257e-05, + "loss": 6.0367, + "step": 6270 + }, + { + "epoch": 0.67, + "learning_rate": 1.213373540494676e-05, + "loss": 6.0555, + "step": 6280 + }, + { + "epoch": 0.68, + "learning_rate": 1.2111080115818076e-05, + "loss": 6.052, + "step": 6290 + }, + { + "epoch": 0.68, + "learning_rate": 1.2088413480198326e-05, + "loss": 6.0613, + "step": 6300 + }, + { + "epoch": 0.68, + "learning_rate": 1.2065735619914618e-05, + "loss": 6.059, + "step": 6310 + }, + { + "epoch": 0.68, + "learning_rate": 1.2043046656854385e-05, + "loss": 6.0816, + "step": 6320 + }, + { + "epoch": 0.68, + "learning_rate": 1.2020346712964732e-05, + "loss": 6.0734, + "step": 6330 + }, + { + "epoch": 0.68, + "learning_rate": 1.1997635910251793e-05, + "loss": 6.0582, + "step": 6340 + }, + { + "epoch": 0.68, + "learning_rate": 1.1974914370780053e-05, + "loss": 6.0648, + "step": 6350 + }, + { + "epoch": 0.68, + "learning_rate": 1.1952182216671715e-05, + "loss": 6.0746, + "step": 6360 + }, + { + "epoch": 0.68, + "learning_rate": 1.1929439570106028e-05, + "loss": 6.0434, + "step": 6370 + }, + { + "epoch": 0.68, + "learning_rate": 1.1906686553318632e-05, + "loss": 6.084, + "step": 6380 + }, + { + "epoch": 0.69, + "learning_rate": 1.188392328860091e-05, + "loss": 6.0395, + "step": 6390 + }, + { + "epoch": 0.69, + "learning_rate": 1.186114989829932e-05, + "loss": 6.0074, + "step": 6400 + }, + { + "epoch": 0.69, + "learning_rate": 1.1838366504814749e-05, + "loss": 6.0477, + "step": 6410 + }, + { + "epoch": 0.69, + "learning_rate": 1.181557323060184e-05, + "loss": 6.0625, + "step": 6420 + }, + { + "epoch": 0.69, + "learning_rate": 1.1792770198168348e-05, + "loss": 6.0648, + "step": 6430 + }, + { + "epoch": 0.69, + "learning_rate": 1.1769957530074474e-05, + "loss": 6.0895, + "step": 6440 + }, + { + "epoch": 0.69, + "learning_rate": 1.1747135348932208e-05, + "loss": 6.0687, + "step": 6450 + }, + { + "epoch": 0.69, + "learning_rate": 1.1724303777404671e-05, + "loss": 6.066, + "step": 6460 + }, + { + "epoch": 0.69, + "learning_rate": 1.1701462938205455e-05, + "loss": 6.0695, + "step": 6470 + }, + { + "epoch": 0.7, + "learning_rate": 1.167861295409796e-05, + "loss": 6.059, + "step": 6480 + }, + { + "epoch": 0.7, + "learning_rate": 1.1655753947894743e-05, + "loss": 6.0645, + "step": 6490 + }, + { + "epoch": 0.7, + "learning_rate": 1.1632886042456847e-05, + "loss": 6.0977, + "step": 6500 + }, + { + "epoch": 0.7, + "learning_rate": 1.1610009360693151e-05, + "loss": 6.0563, + "step": 6510 + }, + { + "epoch": 0.7, + "learning_rate": 1.1587124025559697e-05, + "loss": 6.0621, + "step": 6520 + }, + { + "epoch": 0.7, + "learning_rate": 1.1564230160059044e-05, + "loss": 6.0812, + "step": 6530 + }, + { + "epoch": 0.7, + "learning_rate": 1.1541327887239597e-05, + "loss": 6.0484, + "step": 6540 + }, + { + "epoch": 0.7, + "learning_rate": 1.1518417330194949e-05, + "loss": 6.041, + "step": 6550 + }, + { + "epoch": 0.7, + "learning_rate": 1.1495498612063212e-05, + "loss": 6.0566, + "step": 6560 + }, + { + "epoch": 0.71, + "learning_rate": 1.147257185602637e-05, + "loss": 6.0738, + "step": 6570 + }, + { + "epoch": 0.71, + "learning_rate": 1.144963718530961e-05, + "loss": 6.0492, + "step": 6580 + }, + { + "epoch": 0.71, + "learning_rate": 1.1426694723180647e-05, + "loss": 6.0906, + "step": 6590 + }, + { + "epoch": 0.71, + "learning_rate": 1.1403744592949082e-05, + "loss": 6.0648, + "step": 6600 + }, + { + "epoch": 0.71, + "learning_rate": 1.1380786917965727e-05, + "loss": 6.0492, + "step": 6610 + }, + { + "epoch": 0.71, + "learning_rate": 1.1357821821621948e-05, + "loss": 6.0648, + "step": 6620 + }, + { + "epoch": 0.71, + "learning_rate": 1.1334849427348992e-05, + "loss": 6.0504, + "step": 6630 + }, + { + "epoch": 0.71, + "learning_rate": 1.1311869858617342e-05, + "loss": 6.057, + "step": 6640 + }, + { + "epoch": 0.71, + "learning_rate": 1.1288883238936026e-05, + "loss": 6.0496, + "step": 6650 + }, + { + "epoch": 0.72, + "learning_rate": 1.1265889691851981e-05, + "loss": 6.0852, + "step": 6660 + }, + { + "epoch": 0.72, + "learning_rate": 1.1242889340949376e-05, + "loss": 6.0457, + "step": 6670 + }, + { + "epoch": 0.72, + "learning_rate": 1.1219882309848945e-05, + "loss": 6.0379, + "step": 6680 + }, + { + "epoch": 0.72, + "learning_rate": 1.1196868722207323e-05, + "loss": 6.0691, + "step": 6690 + }, + { + "epoch": 0.72, + "learning_rate": 1.1173848701716396e-05, + "loss": 6.0727, + "step": 6700 + }, + { + "epoch": 0.72, + "learning_rate": 1.1150822372102612e-05, + "loss": 6.0672, + "step": 6710 + }, + { + "epoch": 0.72, + "learning_rate": 1.1127789857126341e-05, + "loss": 6.0758, + "step": 6720 + }, + { + "epoch": 0.72, + "learning_rate": 1.1104751280581187e-05, + "loss": 6.0453, + "step": 6730 + }, + { + "epoch": 0.72, + "learning_rate": 1.1081706766293339e-05, + "loss": 6.0609, + "step": 6740 + }, + { + "epoch": 0.72, + "learning_rate": 1.1058656438120898e-05, + "loss": 6.0738, + "step": 6750 + }, + { + "epoch": 0.73, + "learning_rate": 1.1035600419953216e-05, + "loss": 6.084, + "step": 6760 + }, + { + "epoch": 0.73, + "learning_rate": 1.1012538835710223e-05, + "loss": 6.0277, + "step": 6770 + }, + { + "epoch": 0.73, + "learning_rate": 1.0989471809341764e-05, + "loss": 6.0465, + "step": 6780 + }, + { + "epoch": 0.73, + "learning_rate": 1.0966399464826944e-05, + "loss": 6.0648, + "step": 6790 + }, + { + "epoch": 0.73, + "learning_rate": 1.0943321926173441e-05, + "loss": 6.0539, + "step": 6800 + }, + { + "epoch": 0.73, + "learning_rate": 1.0920239317416851e-05, + "loss": 6.0652, + "step": 6810 + }, + { + "epoch": 0.73, + "learning_rate": 1.0897151762620028e-05, + "loss": 6.0965, + "step": 6820 + }, + { + "epoch": 0.73, + "learning_rate": 1.0874059385872403e-05, + "loss": 6.0551, + "step": 6830 + }, + { + "epoch": 0.73, + "learning_rate": 1.0850962311289322e-05, + "loss": 6.0621, + "step": 6840 + }, + { + "epoch": 0.74, + "learning_rate": 1.082786066301139e-05, + "loss": 6.0477, + "step": 6850 + }, + { + "epoch": 0.74, + "learning_rate": 1.0804754565203784e-05, + "loss": 6.0488, + "step": 6860 + }, + { + "epoch": 0.74, + "learning_rate": 1.0781644142055603e-05, + "loss": 6.0551, + "step": 6870 + }, + { + "epoch": 0.74, + "learning_rate": 1.075852951777919e-05, + "loss": 6.0863, + "step": 6880 + }, + { + "epoch": 0.74, + "learning_rate": 1.0735410816609468e-05, + "loss": 6.0699, + "step": 6890 + }, + { + "epoch": 0.74, + "learning_rate": 1.0712288162803278e-05, + "loss": 6.0406, + "step": 6900 + }, + { + "epoch": 0.74, + "learning_rate": 1.0689161680638692e-05, + "loss": 6.0809, + "step": 6910 + }, + { + "epoch": 0.74, + "learning_rate": 1.0666031494414377e-05, + "loss": 6.0766, + "step": 6920 + }, + { + "epoch": 0.74, + "learning_rate": 1.0642897728448893e-05, + "loss": 6.0379, + "step": 6930 + }, + { + "epoch": 0.75, + "learning_rate": 1.0619760507080045e-05, + "loss": 6.0586, + "step": 6940 + }, + { + "epoch": 0.75, + "learning_rate": 1.059661995466421e-05, + "loss": 6.0617, + "step": 6950 + }, + { + "epoch": 0.75, + "learning_rate": 1.0573476195575673e-05, + "loss": 6.1004, + "step": 6960 + }, + { + "epoch": 0.75, + "learning_rate": 1.0550329354205948e-05, + "loss": 6.0496, + "step": 6970 + }, + { + "epoch": 0.75, + "learning_rate": 1.0527179554963116e-05, + "loss": 6.057, + "step": 6980 + }, + { + "epoch": 0.75, + "learning_rate": 1.0504026922271156e-05, + "loss": 6.0441, + "step": 6990 + }, + { + "epoch": 0.75, + "learning_rate": 1.0480871580569281e-05, + "loss": 6.0758, + "step": 7000 + }, + { + "epoch": 0.75, + "learning_rate": 1.0457713654311255e-05, + "loss": 6.0684, + "step": 7010 + }, + { + "epoch": 0.75, + "learning_rate": 1.0439185536066226e-05, + "loss": 6.066, + "step": 7020 + }, + { + "epoch": 0.75, + "learning_rate": 1.0416023271274547e-05, + "loss": 6.0633, + "step": 7030 + }, + { + "epoch": 0.76, + "learning_rate": 1.0392858770469041e-05, + "loss": 6.082, + "step": 7040 + }, + { + "epoch": 0.76, + "learning_rate": 1.0369692158152705e-05, + "loss": 6.0398, + "step": 7050 + }, + { + "epoch": 0.76, + "learning_rate": 1.0346523558839883e-05, + "loss": 6.0672, + "step": 7060 + }, + { + "epoch": 0.76, + "learning_rate": 1.0323353097055601e-05, + "loss": 6.0828, + "step": 7070 + }, + { + "epoch": 0.76, + "learning_rate": 1.0300180897334897e-05, + "loss": 6.05, + "step": 7080 + }, + { + "epoch": 0.76, + "learning_rate": 1.0277007084222145e-05, + "loss": 6.0773, + "step": 7090 + }, + { + "epoch": 0.76, + "learning_rate": 1.0253831782270395e-05, + "loss": 6.0387, + "step": 7100 + }, + { + "epoch": 0.76, + "learning_rate": 1.0230655116040695e-05, + "loss": 6.0566, + "step": 7110 + }, + { + "epoch": 0.76, + "learning_rate": 1.020747721010143e-05, + "loss": 6.0617, + "step": 7120 + }, + { + "epoch": 0.77, + "learning_rate": 1.0184298189027648e-05, + "loss": 6.0375, + "step": 7130 + }, + { + "epoch": 0.77, + "learning_rate": 1.0161118177400384e-05, + "loss": 6.0359, + "step": 7140 + }, + { + "epoch": 0.77, + "learning_rate": 1.0137937299806005e-05, + "loss": 6.0812, + "step": 7150 + }, + { + "epoch": 0.77, + "learning_rate": 1.0114755680835524e-05, + "loss": 6.0723, + "step": 7160 + }, + { + "epoch": 0.77, + "learning_rate": 1.0091573445083951e-05, + "loss": 6.0695, + "step": 7170 + }, + { + "epoch": 0.77, + "learning_rate": 1.0068390717149597e-05, + "loss": 6.0531, + "step": 7180 + }, + { + "epoch": 0.77, + "learning_rate": 1.004520762163343e-05, + "loss": 6.0613, + "step": 7190 + }, + { + "epoch": 0.77, + "learning_rate": 1.0022024283138385e-05, + "loss": 6.0469, + "step": 7200 + }, + { + "epoch": 0.77, + "learning_rate": 9.998840826268708e-06, + "loss": 6.0887, + "step": 7210 + }, + { + "epoch": 0.78, + "learning_rate": 9.975657375629279e-06, + "loss": 6.0621, + "step": 7220 + }, + { + "epoch": 0.78, + "learning_rate": 9.952474055824947e-06, + "loss": 6.084, + "step": 7230 + }, + { + "epoch": 0.78, + "learning_rate": 9.929290991459855e-06, + "loss": 6.0441, + "step": 7240 + }, + { + "epoch": 0.78, + "learning_rate": 9.906108307136776e-06, + "loss": 6.0609, + "step": 7250 + }, + { + "epoch": 0.78, + "learning_rate": 9.882926127456437e-06, + "loss": 6.0621, + "step": 7260 + }, + { + "epoch": 0.78, + "learning_rate": 9.859744577016852e-06, + "loss": 6.0531, + "step": 7270 + }, + { + "epoch": 0.78, + "learning_rate": 9.83656378041266e-06, + "loss": 6.0738, + "step": 7280 + }, + { + "epoch": 0.78, + "learning_rate": 9.813383862234441e-06, + "loss": 6.0812, + "step": 7290 + }, + { + "epoch": 0.78, + "learning_rate": 9.790204947068054e-06, + "loss": 6.0414, + "step": 7300 + }, + { + "epoch": 0.78, + "learning_rate": 9.767027159493975e-06, + "loss": 6.0625, + "step": 7310 + }, + { + "epoch": 0.79, + "learning_rate": 9.743850624086608e-06, + "loss": 6.057, + "step": 7320 + }, + { + "epoch": 0.79, + "learning_rate": 9.720675465413639e-06, + "loss": 6.0988, + "step": 7330 + }, + { + "epoch": 0.79, + "learning_rate": 9.697501808035341e-06, + "loss": 6.0418, + "step": 7340 + }, + { + "epoch": 0.79, + "learning_rate": 9.674329776503932e-06, + "loss": 6.0586, + "step": 7350 + }, + { + "epoch": 0.79, + "learning_rate": 9.65115949536288e-06, + "loss": 6.0746, + "step": 7360 + }, + { + "epoch": 0.79, + "learning_rate": 9.627991089146249e-06, + "loss": 6.0832, + "step": 7370 + }, + { + "epoch": 0.79, + "learning_rate": 9.604824682378032e-06, + "loss": 6.0473, + "step": 7380 + }, + { + "epoch": 0.79, + "learning_rate": 9.581660399571466e-06, + "loss": 6.0551, + "step": 7390 + }, + { + "epoch": 0.79, + "learning_rate": 9.558498365228379e-06, + "loss": 6.0707, + "step": 7400 + }, + { + "epoch": 0.8, + "learning_rate": 9.53533870383851e-06, + "loss": 6.0773, + "step": 7410 + }, + { + "epoch": 0.8, + "learning_rate": 9.512181539878843e-06, + "loss": 6.0504, + "step": 7420 + }, + { + "epoch": 0.8, + "learning_rate": 9.489026997812946e-06, + "loss": 6.0781, + "step": 7430 + }, + { + "epoch": 0.8, + "learning_rate": 9.465875202090288e-06, + "loss": 6.041, + "step": 7440 + }, + { + "epoch": 0.8, + "learning_rate": 9.442726277145578e-06, + "loss": 6.0574, + "step": 7450 + }, + { + "epoch": 0.8, + "learning_rate": 9.419580347398102e-06, + "loss": 6.0664, + "step": 7460 + }, + { + "epoch": 0.8, + "learning_rate": 9.396437537251038e-06, + "loss": 6.0613, + "step": 7470 + }, + { + "epoch": 0.8, + "learning_rate": 9.373297971090803e-06, + "loss": 6.0637, + "step": 7480 + }, + { + "epoch": 0.8, + "learning_rate": 9.350161773286377e-06, + "loss": 6.0715, + "step": 7490 + }, + { + "epoch": 0.81, + "learning_rate": 9.327029068188636e-06, + "loss": 6.0676, + "step": 7500 + }, + { + "epoch": 0.81, + "learning_rate": 9.303899980129682e-06, + "loss": 6.0578, + "step": 7510 + }, + { + "epoch": 0.81, + "learning_rate": 9.280774633422183e-06, + "loss": 6.0598, + "step": 7520 + }, + { + "epoch": 0.81, + "learning_rate": 9.257653152358687e-06, + "loss": 6.0488, + "step": 7530 + }, + { + "epoch": 0.81, + "learning_rate": 9.234535661210975e-06, + "loss": 6.0543, + "step": 7540 + }, + { + "epoch": 0.81, + "learning_rate": 9.21142228422938e-06, + "loss": 6.0477, + "step": 7550 + }, + { + "epoch": 0.81, + "learning_rate": 9.18831314564212e-06, + "loss": 6.0438, + "step": 7560 + }, + { + "epoch": 0.81, + "learning_rate": 9.165208369654636e-06, + "loss": 6.0848, + "step": 7570 + }, + { + "epoch": 0.81, + "learning_rate": 9.14210808044892e-06, + "loss": 6.0656, + "step": 7580 + }, + { + "epoch": 0.81, + "learning_rate": 9.119012402182851e-06, + "loss": 6.0582, + "step": 7590 + }, + { + "epoch": 0.82, + "learning_rate": 9.09592145898952e-06, + "loss": 6.0762, + "step": 7600 + }, + { + "epoch": 0.82, + "learning_rate": 9.072835374976573e-06, + "loss": 6.0777, + "step": 7610 + }, + { + "epoch": 0.82, + "learning_rate": 9.049754274225536e-06, + "loss": 6.0641, + "step": 7620 + }, + { + "epoch": 0.82, + "learning_rate": 9.026678280791157e-06, + "loss": 6.0488, + "step": 7630 + }, + { + "epoch": 0.82, + "learning_rate": 9.003607518700726e-06, + "loss": 6.0465, + "step": 7640 + }, + { + "epoch": 0.82, + "learning_rate": 8.98054211195342e-06, + "loss": 6.0781, + "step": 7650 + }, + { + "epoch": 0.82, + "learning_rate": 8.957482184519635e-06, + "loss": 6.0633, + "step": 7660 + }, + { + "epoch": 0.82, + "learning_rate": 8.93442786034031e-06, + "loss": 6.0531, + "step": 7670 + }, + { + "epoch": 0.82, + "learning_rate": 8.911379263326275e-06, + "loss": 6.0781, + "step": 7680 + }, + { + "epoch": 0.83, + "learning_rate": 8.888336517357574e-06, + "loss": 6.0863, + "step": 7690 + }, + { + "epoch": 0.83, + "learning_rate": 8.865299746282805e-06, + "loss": 6.0824, + "step": 7700 + }, + { + "epoch": 0.83, + "learning_rate": 8.84226907391845e-06, + "loss": 6.0797, + "step": 7710 + }, + { + "epoch": 0.83, + "learning_rate": 8.819244624048216e-06, + "loss": 6.0871, + "step": 7720 + }, + { + "epoch": 0.83, + "learning_rate": 8.796226520422364e-06, + "loss": 6.0461, + "step": 7730 + }, + { + "epoch": 0.83, + "learning_rate": 8.773214886757045e-06, + "loss": 6.0598, + "step": 7740 + }, + { + "epoch": 0.83, + "learning_rate": 8.750209846733634e-06, + "loss": 6.0531, + "step": 7750 + }, + { + "epoch": 0.83, + "learning_rate": 8.72721152399807e-06, + "loss": 6.0422, + "step": 7760 + }, + { + "epoch": 0.83, + "learning_rate": 8.704220042160188e-06, + "loss": 6.0687, + "step": 7770 + }, + { + "epoch": 0.84, + "learning_rate": 8.681235524793052e-06, + "loss": 6.0547, + "step": 7780 + }, + { + "epoch": 0.84, + "learning_rate": 8.658258095432302e-06, + "loss": 6.0434, + "step": 7790 + }, + { + "epoch": 0.84, + "learning_rate": 8.635287877575471e-06, + "loss": 6.0555, + "step": 7800 + }, + { + "epoch": 0.84, + "learning_rate": 8.61232499468134e-06, + "loss": 6.0824, + "step": 7810 + }, + { + "epoch": 0.84, + "learning_rate": 8.58936957016926e-06, + "loss": 6.057, + "step": 7820 + }, + { + "epoch": 0.84, + "learning_rate": 8.566421727418504e-06, + "loss": 6.0461, + "step": 7830 + }, + { + "epoch": 0.84, + "learning_rate": 8.543481589767586e-06, + "loss": 6.0723, + "step": 7840 + }, + { + "epoch": 0.84, + "learning_rate": 8.520549280513612e-06, + "loss": 6.0836, + "step": 7850 + }, + { + "epoch": 0.84, + "learning_rate": 8.497624922911613e-06, + "loss": 6.0816, + "step": 7860 + }, + { + "epoch": 0.84, + "learning_rate": 8.474708640173878e-06, + "loss": 6.0609, + "step": 7870 + }, + { + "epoch": 0.85, + "learning_rate": 8.4518005554693e-06, + "loss": 6.0523, + "step": 7880 + }, + { + "epoch": 0.85, + "learning_rate": 8.428900791922707e-06, + "loss": 6.0352, + "step": 7890 + }, + { + "epoch": 0.85, + "learning_rate": 8.406009472614205e-06, + "loss": 6.0711, + "step": 7900 + }, + { + "epoch": 0.85, + "learning_rate": 8.383126720578513e-06, + "loss": 6.0758, + "step": 7910 + }, + { + "epoch": 0.85, + "learning_rate": 8.360252658804304e-06, + "loss": 6.0695, + "step": 7920 + }, + { + "epoch": 0.85, + "learning_rate": 8.337387410233544e-06, + "loss": 6.05, + "step": 7930 + }, + { + "epoch": 0.85, + "learning_rate": 8.314531097760827e-06, + "loss": 6.0324, + "step": 7940 + }, + { + "epoch": 0.85, + "learning_rate": 8.291683844232721e-06, + "loss": 6.0859, + "step": 7950 + }, + { + "epoch": 0.85, + "learning_rate": 8.268845772447106e-06, + "loss": 6.0648, + "step": 7960 + }, + { + "epoch": 0.86, + "learning_rate": 8.246017005152508e-06, + "loss": 6.0598, + "step": 7970 + }, + { + "epoch": 0.86, + "learning_rate": 8.223197665047447e-06, + "loss": 6.0504, + "step": 7980 + }, + { + "epoch": 0.86, + "learning_rate": 8.200387874779771e-06, + "loss": 6.0684, + "step": 7990 + }, + { + "epoch": 0.86, + "learning_rate": 8.177587756946008e-06, + "loss": 6.0723, + "step": 8000 + }, + { + "epoch": 0.86, + "learning_rate": 8.15479743409069e-06, + "loss": 6.0941, + "step": 8010 + }, + { + "epoch": 0.86, + "learning_rate": 8.136572310507251e-06, + "loss": 6.073, + "step": 8020 + }, + { + "epoch": 0.86, + "learning_rate": 8.113799927256902e-06, + "loss": 6.0602, + "step": 8030 + }, + { + "epoch": 0.86, + "learning_rate": 8.091037681827527e-06, + "loss": 6.0617, + "step": 8040 + }, + { + "epoch": 0.86, + "learning_rate": 8.068285696560103e-06, + "loss": 6.0793, + "step": 8050 + }, + { + "epoch": 0.87, + "learning_rate": 8.045544093740472e-06, + "loss": 6.0555, + "step": 8060 + }, + { + "epoch": 0.87, + "learning_rate": 8.022812995598664e-06, + "loss": 6.0637, + "step": 8070 + }, + { + "epoch": 0.87, + "learning_rate": 8.000092524308252e-06, + "loss": 6.0867, + "step": 8080 + }, + { + "epoch": 0.87, + "learning_rate": 7.977382801985697e-06, + "loss": 6.0465, + "step": 8090 + }, + { + "epoch": 0.87, + "learning_rate": 7.95468395068968e-06, + "loss": 6.0605, + "step": 8100 + }, + { + "epoch": 0.87, + "learning_rate": 7.931996092420458e-06, + "loss": 6.0438, + "step": 8110 + }, + { + "epoch": 0.87, + "learning_rate": 7.9093193491192e-06, + "loss": 6.0602, + "step": 8120 + }, + { + "epoch": 0.87, + "learning_rate": 7.88665384266734e-06, + "loss": 6.0512, + "step": 8130 + }, + { + "epoch": 0.87, + "learning_rate": 7.863999694885911e-06, + "loss": 6.0555, + "step": 8140 + }, + { + "epoch": 0.88, + "learning_rate": 7.8413570275349e-06, + "loss": 6.0625, + "step": 8150 + }, + { + "epoch": 0.88, + "learning_rate": 7.81872596231259e-06, + "loss": 6.0555, + "step": 8160 + }, + { + "epoch": 0.88, + "learning_rate": 7.796106620854902e-06, + "loss": 6.0285, + "step": 8170 + }, + { + "epoch": 0.88, + "learning_rate": 7.77349912473475e-06, + "loss": 6.0684, + "step": 8180 + }, + { + "epoch": 0.88, + "learning_rate": 7.750903595461376e-06, + "loss": 6.0668, + "step": 8190 + }, + { + "epoch": 0.88, + "learning_rate": 7.728320154479712e-06, + "loss": 6.0996, + "step": 8200 + }, + { + "epoch": 0.88, + "learning_rate": 7.705748923169711e-06, + "loss": 6.084, + "step": 8210 + }, + { + "epoch": 0.88, + "learning_rate": 7.683190022845704e-06, + "loss": 6.0563, + "step": 8220 + }, + { + "epoch": 0.88, + "learning_rate": 7.660643574755751e-06, + "loss": 6.0223, + "step": 8230 + }, + { + "epoch": 0.88, + "learning_rate": 7.63810970008098e-06, + "loss": 6.0863, + "step": 8240 + }, + { + "epoch": 0.89, + "learning_rate": 7.615588519934938e-06, + "loss": 6.0617, + "step": 8250 + }, + { + "epoch": 0.89, + "learning_rate": 7.593080155362949e-06, + "loss": 6.0738, + "step": 8260 + }, + { + "epoch": 0.89, + "learning_rate": 7.570584727341451e-06, + "loss": 6.1051, + "step": 8270 + }, + { + "epoch": 0.89, + "learning_rate": 7.548102356777356e-06, + "loss": 6.0566, + "step": 8280 + }, + { + "epoch": 0.89, + "learning_rate": 7.52563316450739e-06, + "loss": 6.0719, + "step": 8290 + }, + { + "epoch": 0.89, + "learning_rate": 7.5031772712974575e-06, + "loss": 6.0516, + "step": 8300 + }, + { + "epoch": 0.89, + "learning_rate": 7.4807347978419754e-06, + "loss": 6.068, + "step": 8310 + }, + { + "epoch": 0.89, + "learning_rate": 7.458305864763238e-06, + "loss": 6.0652, + "step": 8320 + }, + { + "epoch": 0.89, + "learning_rate": 7.435890592610764e-06, + "loss": 6.0605, + "step": 8330 + }, + { + "epoch": 0.9, + "learning_rate": 7.413489101860642e-06, + "loss": 6.0855, + "step": 8340 + }, + { + "epoch": 0.9, + "learning_rate": 7.3911015129149e-06, + "loss": 6.0824, + "step": 8350 + }, + { + "epoch": 0.9, + "learning_rate": 7.368727946100837e-06, + "loss": 6.0805, + "step": 8360 + }, + { + "epoch": 0.9, + "learning_rate": 7.346368521670396e-06, + "loss": 6.0691, + "step": 8370 + }, + { + "epoch": 0.9, + "learning_rate": 7.3240233597995e-06, + "loss": 6.0656, + "step": 8380 + }, + { + "epoch": 0.9, + "learning_rate": 7.3016925805874196e-06, + "loss": 6.0633, + "step": 8390 + }, + { + "epoch": 0.9, + "learning_rate": 7.279376304056121e-06, + "loss": 6.0461, + "step": 8400 + }, + { + "epoch": 0.9, + "learning_rate": 7.257074650149622e-06, + "loss": 6.0367, + "step": 8410 + }, + { + "epoch": 0.9, + "learning_rate": 7.234787738733351e-06, + "loss": 6.0711, + "step": 8420 + }, + { + "epoch": 0.91, + "learning_rate": 7.2125156895934936e-06, + "loss": 6.0535, + "step": 8430 + }, + { + "epoch": 0.91, + "learning_rate": 7.190258622436359e-06, + "loss": 6.0566, + "step": 8440 + }, + { + "epoch": 0.91, + "learning_rate": 7.1680166568877304e-06, + "loss": 6.0527, + "step": 8450 + }, + { + "epoch": 0.91, + "learning_rate": 7.145789912492227e-06, + "loss": 6.0687, + "step": 8460 + }, + { + "epoch": 0.91, + "learning_rate": 7.123578508712652e-06, + "loss": 6.0715, + "step": 8470 + }, + { + "epoch": 0.91, + "learning_rate": 7.101382564929365e-06, + "loss": 6.0637, + "step": 8480 + }, + { + "epoch": 0.91, + "learning_rate": 7.079202200439625e-06, + "loss": 6.0809, + "step": 8490 + }, + { + "epoch": 0.91, + "learning_rate": 7.057037534456959e-06, + "loss": 6.0855, + "step": 8500 + }, + { + "epoch": 0.91, + "learning_rate": 7.034888686110523e-06, + "loss": 6.0664, + "step": 8510 + }, + { + "epoch": 0.91, + "learning_rate": 7.012755774444451e-06, + "loss": 6.0859, + "step": 8520 + }, + { + "epoch": 0.92, + "learning_rate": 6.990638918417224e-06, + "loss": 6.0438, + "step": 8530 + }, + { + "epoch": 0.92, + "learning_rate": 6.96853823690103e-06, + "loss": 6.0734, + "step": 8540 + }, + { + "epoch": 0.92, + "learning_rate": 6.946453848681121e-06, + "loss": 6.0523, + "step": 8550 + }, + { + "epoch": 0.92, + "learning_rate": 6.9243858724551774e-06, + "loss": 6.0426, + "step": 8560 + }, + { + "epoch": 0.92, + "learning_rate": 6.902334426832671e-06, + "loss": 6.0676, + "step": 8570 + }, + { + "epoch": 0.92, + "learning_rate": 6.880299630334224e-06, + "loss": 6.0551, + "step": 8580 + }, + { + "epoch": 0.92, + "learning_rate": 6.858281601390975e-06, + "loss": 6.0504, + "step": 8590 + }, + { + "epoch": 0.92, + "learning_rate": 6.83628045834394e-06, + "loss": 6.0348, + "step": 8600 + }, + { + "epoch": 0.92, + "learning_rate": 6.814296319443383e-06, + "loss": 6.0727, + "step": 8610 + }, + { + "epoch": 0.93, + "learning_rate": 6.792329302848169e-06, + "loss": 6.0426, + "step": 8620 + }, + { + "epoch": 0.93, + "learning_rate": 6.770379526625136e-06, + "loss": 6.0582, + "step": 8630 + }, + { + "epoch": 0.93, + "learning_rate": 6.7484471087484636e-06, + "loss": 6.0605, + "step": 8640 + }, + { + "epoch": 0.93, + "learning_rate": 6.726532167099034e-06, + "loss": 6.0586, + "step": 8650 + }, + { + "epoch": 0.93, + "learning_rate": 6.704634819463793e-06, + "loss": 6.0531, + "step": 8660 + }, + { + "epoch": 0.93, + "learning_rate": 6.682755183535135e-06, + "loss": 6.0531, + "step": 8670 + }, + { + "epoch": 0.93, + "learning_rate": 6.660893376910244e-06, + "loss": 6.0574, + "step": 8680 + }, + { + "epoch": 0.93, + "learning_rate": 6.639049517090491e-06, + "loss": 6.0445, + "step": 8690 + }, + { + "epoch": 0.93, + "learning_rate": 6.6172237214807775e-06, + "loss": 6.0668, + "step": 8700 + }, + { + "epoch": 0.94, + "learning_rate": 6.595416107388919e-06, + "loss": 6.0625, + "step": 8710 + }, + { + "epoch": 0.94, + "learning_rate": 6.573626792025009e-06, + "loss": 6.0789, + "step": 8720 + }, + { + "epoch": 0.94, + "learning_rate": 6.551855892500792e-06, + "loss": 6.0617, + "step": 8730 + }, + { + "epoch": 0.94, + "learning_rate": 6.530103525829024e-06, + "loss": 6.0758, + "step": 8740 + }, + { + "epoch": 0.94, + "learning_rate": 6.508369808922869e-06, + "loss": 6.0699, + "step": 8750 + }, + { + "epoch": 0.94, + "learning_rate": 6.4866548585952384e-06, + "loss": 6.0676, + "step": 8760 + }, + { + "epoch": 0.94, + "learning_rate": 6.464958791558182e-06, + "loss": 6.0633, + "step": 8770 + }, + { + "epoch": 0.94, + "learning_rate": 6.443281724422261e-06, + "loss": 6.0492, + "step": 8780 + }, + { + "epoch": 0.94, + "learning_rate": 6.421623773695913e-06, + "loss": 6.0652, + "step": 8790 + }, + { + "epoch": 0.94, + "learning_rate": 6.39998505578483e-06, + "loss": 6.0867, + "step": 8800 + }, + { + "epoch": 0.95, + "learning_rate": 6.3783656869913335e-06, + "loss": 6.0602, + "step": 8810 + }, + { + "epoch": 0.95, + "learning_rate": 6.356765783513752e-06, + "loss": 6.0492, + "step": 8820 + }, + { + "epoch": 0.95, + "learning_rate": 6.335185461445787e-06, + "loss": 6.0496, + "step": 8830 + }, + { + "epoch": 0.95, + "learning_rate": 6.313624836775902e-06, + "loss": 6.066, + "step": 8840 + }, + { + "epoch": 0.95, + "learning_rate": 6.292084025386685e-06, + "loss": 6.0574, + "step": 8850 + }, + { + "epoch": 0.95, + "learning_rate": 6.27056314305424e-06, + "loss": 6.0613, + "step": 8860 + }, + { + "epoch": 0.95, + "learning_rate": 6.249062305447553e-06, + "loss": 6.066, + "step": 8870 + }, + { + "epoch": 0.95, + "learning_rate": 6.227581628127877e-06, + "loss": 6.0906, + "step": 8880 + }, + { + "epoch": 0.95, + "learning_rate": 6.206121226548105e-06, + "loss": 6.082, + "step": 8890 + }, + { + "epoch": 0.96, + "learning_rate": 6.184681216052159e-06, + "loss": 6.091, + "step": 8900 + }, + { + "epoch": 0.96, + "learning_rate": 6.163261711874365e-06, + "loss": 6.0473, + "step": 8910 + }, + { + "epoch": 0.96, + "learning_rate": 6.141862829138823e-06, + "loss": 6.0504, + "step": 8920 + }, + { + "epoch": 0.96, + "learning_rate": 6.120484682858809e-06, + "loss": 6.0543, + "step": 8930 + }, + { + "epoch": 0.96, + "learning_rate": 6.099127387936141e-06, + "loss": 6.0477, + "step": 8940 + }, + { + "epoch": 0.96, + "learning_rate": 6.077791059160568e-06, + "loss": 6.0777, + "step": 8950 + }, + { + "epoch": 0.96, + "learning_rate": 6.056475811209153e-06, + "loss": 6.0684, + "step": 8960 + }, + { + "epoch": 0.96, + "learning_rate": 6.035181758645652e-06, + "loss": 6.0426, + "step": 8970 + }, + { + "epoch": 0.96, + "learning_rate": 6.013909015919901e-06, + "loss": 6.0434, + "step": 8980 + }, + { + "epoch": 0.97, + "learning_rate": 5.992657697367208e-06, + "loss": 6.0734, + "step": 8990 + }, + { + "epoch": 0.97, + "learning_rate": 5.971427917207722e-06, + "loss": 6.0742, + "step": 9000 + }, + { + "epoch": 0.97, + "learning_rate": 5.950219789545838e-06, + "loss": 6.073, + "step": 9010 + }, + { + "epoch": 0.97, + "learning_rate": 5.933268953818473e-06, + "loss": 6.0773, + "step": 9020 + }, + { + "epoch": 0.97, + "learning_rate": 5.912100087823469e-06, + "loss": 6.0406, + "step": 9030 + }, + { + "epoch": 0.97, + "learning_rate": 5.890953193197284e-06, + "loss": 6.0602, + "step": 9040 + }, + { + "epoch": 0.97, + "learning_rate": 5.869828383598818e-06, + "loss": 6.0809, + "step": 9050 + }, + { + "epoch": 0.97, + "learning_rate": 5.8487257725682756e-06, + "loss": 6.0312, + "step": 9060 + }, + { + "epoch": 0.97, + "learning_rate": 5.827645473526549e-06, + "loss": 6.0629, + "step": 9070 + }, + { + "epoch": 0.97, + "learning_rate": 5.806587599774602e-06, + "loss": 6.0398, + "step": 9080 + }, + { + "epoch": 0.98, + "learning_rate": 5.785552264492884e-06, + "loss": 6.0652, + "step": 9090 + }, + { + "epoch": 0.98, + "learning_rate": 5.764539580740687e-06, + "loss": 6.0402, + "step": 9100 + }, + { + "epoch": 0.98, + "learning_rate": 5.7435496614555764e-06, + "loss": 6.0566, + "step": 9110 + }, + { + "epoch": 0.98, + "learning_rate": 5.722582619452746e-06, + "loss": 6.0563, + "step": 9120 + }, + { + "epoch": 0.98, + "learning_rate": 5.701638567424447e-06, + "loss": 6.0449, + "step": 9130 + }, + { + "epoch": 0.98, + "learning_rate": 5.680717617939351e-06, + "loss": 6.0469, + "step": 9140 + }, + { + "epoch": 0.98, + "learning_rate": 5.659819883441975e-06, + "loss": 6.059, + "step": 9150 + }, + { + "epoch": 0.98, + "learning_rate": 5.638945476252044e-06, + "loss": 6.0656, + "step": 9160 + }, + { + "epoch": 0.98, + "learning_rate": 5.618094508563923e-06, + "loss": 6.0656, + "step": 9170 + }, + { + "epoch": 0.99, + "learning_rate": 5.597267092445979e-06, + "loss": 6.0695, + "step": 9180 + }, + { + "epoch": 0.99, + "learning_rate": 5.576463339840013e-06, + "loss": 6.034, + "step": 9190 + }, + { + "epoch": 0.99, + "learning_rate": 5.555683362560622e-06, + "loss": 6.0699, + "step": 9200 + }, + { + "epoch": 0.99, + "learning_rate": 5.534927272294637e-06, + "loss": 6.0695, + "step": 9210 + }, + { + "epoch": 0.99, + "learning_rate": 5.5141951806004815e-06, + "loss": 6.043, + "step": 9220 + }, + { + "epoch": 0.99, + "learning_rate": 5.493487198907615e-06, + "loss": 6.0656, + "step": 9230 + }, + { + "epoch": 0.99, + "learning_rate": 5.47280343851589e-06, + "loss": 6.0617, + "step": 9240 + }, + { + "epoch": 0.99, + "learning_rate": 5.452144010594998e-06, + "loss": 6.0727, + "step": 9250 + }, + { + "epoch": 0.99, + "learning_rate": 5.431509026183831e-06, + "loss": 6.0746, + "step": 9260 + }, + { + "epoch": 1.0, + "learning_rate": 5.41089859618992e-06, + "loss": 6.0449, + "step": 9270 + }, + { + "epoch": 1.0, + "learning_rate": 5.390312831388805e-06, + "loss": 6.0375, + "step": 9280 + }, + { + "epoch": 1.0, + "learning_rate": 5.369751842423474e-06, + "loss": 6.0453, + "step": 9290 + }, + { + "epoch": 1.0, + "learning_rate": 5.349215739803735e-06, + "loss": 6.0547, + "step": 9300 + }, + { + "epoch": 1.0, + "learning_rate": 5.328704633905662e-06, + "loss": 6.0473, + "step": 9310 + }, + { + "epoch": 1.0, + "learning_rate": 5.3082186349709495e-06, + "loss": 6.066, + "step": 9320 + }, + { + "epoch": 1.0, + "learning_rate": 5.287757853106377e-06, + "loss": 6.0684, + "step": 9330 + }, + { + "epoch": 1.0, + "learning_rate": 5.267322398283164e-06, + "loss": 6.073, + "step": 9340 + }, + { + "epoch": 1.0, + "learning_rate": 5.246912380336422e-06, + "loss": 6.0559, + "step": 9350 + }, + { + "epoch": 1.0, + "learning_rate": 5.226527908964534e-06, + "loss": 6.0539, + "step": 9360 + }, + { + "epoch": 1.01, + "learning_rate": 5.206169093728588e-06, + "loss": 6.0598, + "step": 9370 + }, + { + "epoch": 1.01, + "learning_rate": 5.185836044051767e-06, + "loss": 6.0469, + "step": 9380 + }, + { + "epoch": 1.01, + "learning_rate": 5.165528869218776e-06, + "loss": 6.0734, + "step": 9390 + }, + { + "epoch": 1.01, + "learning_rate": 5.145247678375251e-06, + "loss": 6.1031, + "step": 9400 + }, + { + "epoch": 1.01, + "learning_rate": 5.1249925805271686e-06, + "loss": 6.1199, + "step": 9410 + }, + { + "epoch": 1.01, + "learning_rate": 5.1047636845402594e-06, + "loss": 6.0594, + "step": 9420 + }, + { + "epoch": 1.01, + "learning_rate": 5.084561099139438e-06, + "loss": 6.0523, + "step": 9430 + }, + { + "epoch": 1.01, + "learning_rate": 5.064384932908186e-06, + "loss": 6.0441, + "step": 9440 + }, + { + "epoch": 1.01, + "learning_rate": 5.044235294288014e-06, + "loss": 6.0598, + "step": 9450 + }, + { + "epoch": 1.02, + "learning_rate": 5.024112291577832e-06, + "loss": 6.0555, + "step": 9460 + }, + { + "epoch": 1.02, + "learning_rate": 5.004016032933403e-06, + "loss": 6.048, + "step": 9470 + }, + { + "epoch": 1.02, + "learning_rate": 4.983946626366739e-06, + "loss": 6.0832, + "step": 9480 + }, + { + "epoch": 1.02, + "learning_rate": 4.963904179745538e-06, + "loss": 6.0852, + "step": 9490 + }, + { + "epoch": 1.02, + "learning_rate": 4.943888800792586e-06, + "loss": 6.0695, + "step": 9500 + }, + { + "epoch": 1.02, + "learning_rate": 4.923900597085196e-06, + "loss": 6.0523, + "step": 9510 + }, + { + "epoch": 1.02, + "learning_rate": 4.903939676054614e-06, + "loss": 6.0629, + "step": 9520 + }, + { + "epoch": 1.02, + "learning_rate": 4.884006144985457e-06, + "loss": 6.0723, + "step": 9530 + }, + { + "epoch": 1.02, + "learning_rate": 4.8641001110151185e-06, + "loss": 6.0551, + "step": 9540 + }, + { + "epoch": 1.03, + "learning_rate": 4.844221681133213e-06, + "loss": 6.0723, + "step": 9550 + }, + { + "epoch": 1.03, + "learning_rate": 4.82437096218098e-06, + "loss": 6.084, + "step": 9560 + }, + { + "epoch": 1.03, + "learning_rate": 4.804548060850731e-06, + "loss": 6.0527, + "step": 9570 + }, + { + "epoch": 1.03, + "learning_rate": 4.784753083685253e-06, + "loss": 6.0852, + "step": 9580 + }, + { + "epoch": 1.03, + "learning_rate": 4.764986137077261e-06, + "loss": 6.0668, + "step": 9590 + }, + { + "epoch": 1.03, + "learning_rate": 4.745247327268799e-06, + "loss": 6.0766, + "step": 9600 + }, + { + "epoch": 1.03, + "learning_rate": 4.725536760350701e-06, + "loss": 6.0684, + "step": 9610 + }, + { + "epoch": 1.03, + "learning_rate": 4.705854542261983e-06, + "loss": 6.059, + "step": 9620 + }, + { + "epoch": 1.03, + "learning_rate": 4.686200778789313e-06, + "loss": 6.0395, + "step": 9630 + }, + { + "epoch": 1.04, + "learning_rate": 4.666575575566405e-06, + "loss": 6.0379, + "step": 9640 + }, + { + "epoch": 1.04, + "learning_rate": 4.646979038073486e-06, + "loss": 6.0262, + "step": 9650 + }, + { + "epoch": 1.04, + "learning_rate": 4.627411271636697e-06, + "loss": 6.0336, + "step": 9660 + }, + { + "epoch": 1.04, + "learning_rate": 4.607872381427557e-06, + "loss": 6.0793, + "step": 9670 + }, + { + "epoch": 1.04, + "learning_rate": 4.588362472462368e-06, + "loss": 6.0629, + "step": 9680 + }, + { + "epoch": 1.04, + "learning_rate": 4.568881649601682e-06, + "loss": 6.0723, + "step": 9690 + }, + { + "epoch": 1.04, + "learning_rate": 4.549430017549703e-06, + "loss": 6.0797, + "step": 9700 + }, + { + "epoch": 1.04, + "learning_rate": 4.530007680853756e-06, + "loss": 6.0531, + "step": 9710 + }, + { + "epoch": 1.04, + "learning_rate": 4.51061474390371e-06, + "loss": 6.0703, + "step": 9720 + }, + { + "epoch": 1.04, + "learning_rate": 4.491251310931407e-06, + "loss": 6.0699, + "step": 9730 + }, + { + "epoch": 1.05, + "learning_rate": 4.47191748601013e-06, + "loss": 6.0664, + "step": 9740 + }, + { + "epoch": 1.05, + "learning_rate": 4.45261337305401e-06, + "loss": 6.0828, + "step": 9750 + }, + { + "epoch": 1.05, + "learning_rate": 4.433339075817498e-06, + "loss": 6.0355, + "step": 9760 + }, + { + "epoch": 1.05, + "learning_rate": 4.414094697894779e-06, + "loss": 6.0551, + "step": 9770 + }, + { + "epoch": 1.05, + "learning_rate": 4.394880342719248e-06, + "loss": 6.0555, + "step": 9780 + }, + { + "epoch": 1.05, + "learning_rate": 4.375696113562915e-06, + "loss": 6.0559, + "step": 9790 + }, + { + "epoch": 1.05, + "learning_rate": 4.356542113535892e-06, + "loss": 6.0434, + "step": 9800 + }, + { + "epoch": 1.05, + "learning_rate": 4.337418445585797e-06, + "loss": 6.0785, + "step": 9810 + }, + { + "epoch": 1.05, + "learning_rate": 4.3183252124972365e-06, + "loss": 6.0602, + "step": 9820 + }, + { + "epoch": 1.06, + "learning_rate": 4.299262516891235e-06, + "loss": 6.0578, + "step": 9830 + }, + { + "epoch": 1.06, + "learning_rate": 4.280230461224676e-06, + "loss": 6.0754, + "step": 9840 + }, + { + "epoch": 1.06, + "learning_rate": 4.261229147789777e-06, + "loss": 6.0383, + "step": 9850 + }, + { + "epoch": 1.06, + "learning_rate": 4.242258678713509e-06, + "loss": 6.0645, + "step": 9860 + }, + { + "epoch": 1.06, + "learning_rate": 4.223319155957078e-06, + "loss": 6.0598, + "step": 9870 + }, + { + "epoch": 1.06, + "learning_rate": 4.204410681315344e-06, + "loss": 6.0629, + "step": 9880 + }, + { + "epoch": 1.06, + "learning_rate": 4.185533356416311e-06, + "loss": 6.0445, + "step": 9890 + }, + { + "epoch": 1.06, + "learning_rate": 4.166687282720545e-06, + "loss": 6.0281, + "step": 9900 + }, + { + "epoch": 1.06, + "learning_rate": 4.147872561520658e-06, + "loss": 6.0914, + "step": 9910 + }, + { + "epoch": 1.07, + "learning_rate": 4.12908929394074e-06, + "loss": 6.0781, + "step": 9920 + }, + { + "epoch": 1.07, + "learning_rate": 4.110337580935836e-06, + "loss": 6.0656, + "step": 9930 + }, + { + "epoch": 1.07, + "learning_rate": 4.091617523291381e-06, + "loss": 6.0648, + "step": 9940 + }, + { + "epoch": 1.07, + "learning_rate": 4.072929221622689e-06, + "loss": 6.0781, + "step": 9950 + }, + { + "epoch": 1.07, + "learning_rate": 4.054272776374374e-06, + "loss": 6.0648, + "step": 9960 + }, + { + "epoch": 1.07, + "learning_rate": 4.035648287819848e-06, + "loss": 6.0438, + "step": 9970 + }, + { + "epoch": 1.07, + "learning_rate": 4.017055856060748e-06, + "loss": 6.0199, + "step": 9980 + }, + { + "epoch": 1.07, + "learning_rate": 3.9984955810264315e-06, + "loss": 6.0652, + "step": 9990 + }, + { + "epoch": 1.07, + "learning_rate": 3.979967562473406e-06, + "loss": 6.0758, + "step": 10000 + }, + { + "epoch": 1.07, + "learning_rate": 3.961471899984822e-06, + "loss": 6.0762, + "step": 10010 + }, + { + "epoch": 1.08, + "learning_rate": 3.946698733169213e-06, + "loss": 6.0438, + "step": 10020 + }, + { + "epoch": 1.08, + "learning_rate": 3.928261561990697e-06, + "loss": 6.0422, + "step": 10030 + }, + { + "epoch": 1.08, + "learning_rate": 3.909857024782542e-06, + "loss": 6.0523, + "step": 10040 + }, + { + "epoch": 1.08, + "learning_rate": 3.891485220464221e-06, + "loss": 6.0848, + "step": 10050 + }, + { + "epoch": 1.08, + "learning_rate": 3.873146247779259e-06, + "loss": 6.077, + "step": 10060 + }, + { + "epoch": 1.08, + "learning_rate": 3.854840205294741e-06, + "loss": 6.073, + "step": 10070 + }, + { + "epoch": 1.08, + "learning_rate": 3.83656719140074e-06, + "loss": 6.0477, + "step": 10080 + }, + { + "epoch": 1.08, + "learning_rate": 3.818327304309827e-06, + "loss": 6.0641, + "step": 10090 + }, + { + "epoch": 1.08, + "learning_rate": 3.8001206420565093e-06, + "loss": 6.0656, + "step": 10100 + }, + { + "epoch": 1.09, + "learning_rate": 3.781947302496737e-06, + "loss": 6.0543, + "step": 10110 + }, + { + "epoch": 1.09, + "learning_rate": 3.763807383307341e-06, + "loss": 6.0824, + "step": 10120 + }, + { + "epoch": 1.09, + "learning_rate": 3.7457009819855428e-06, + "loss": 6.016, + "step": 10130 + }, + { + "epoch": 1.09, + "learning_rate": 3.7276281958484018e-06, + "loss": 6.0512, + "step": 10140 + }, + { + "epoch": 1.09, + "learning_rate": 3.7095891220323166e-06, + "loss": 6.0512, + "step": 10150 + }, + { + "epoch": 1.09, + "learning_rate": 3.691583857492478e-06, + "loss": 6.0605, + "step": 10160 + }, + { + "epoch": 1.09, + "learning_rate": 3.673612499002374e-06, + "loss": 6.0547, + "step": 10170 + }, + { + "epoch": 1.09, + "learning_rate": 3.6556751431532445e-06, + "loss": 6.0281, + "step": 10180 + }, + { + "epoch": 1.09, + "learning_rate": 3.637771886353587e-06, + "loss": 6.0656, + "step": 10190 + }, + { + "epoch": 1.1, + "learning_rate": 3.6199028248286116e-06, + "loss": 6.0617, + "step": 10200 + }, + { + "epoch": 1.1, + "learning_rate": 3.602068054619754e-06, + "loss": 6.048, + "step": 10210 + }, + { + "epoch": 1.1, + "learning_rate": 3.5842676715841252e-06, + "loss": 6.0641, + "step": 10220 + }, + { + "epoch": 1.1, + "learning_rate": 3.566501771394032e-06, + "loss": 6.0449, + "step": 10230 + }, + { + "epoch": 1.1, + "learning_rate": 3.5487704495364294e-06, + "loss": 6.0367, + "step": 10240 + }, + { + "epoch": 1.1, + "learning_rate": 3.531073801312438e-06, + "loss": 6.0535, + "step": 10250 + }, + { + "epoch": 1.1, + "learning_rate": 3.5134119218368034e-06, + "loss": 6.0859, + "step": 10260 + }, + { + "epoch": 1.1, + "learning_rate": 3.495784906037406e-06, + "loss": 6.0684, + "step": 10270 + }, + { + "epoch": 1.1, + "learning_rate": 3.4781928486547458e-06, + "loss": 6.0687, + "step": 10280 + }, + { + "epoch": 1.1, + "learning_rate": 3.4606358442414213e-06, + "loss": 6.0574, + "step": 10290 + }, + { + "epoch": 1.11, + "learning_rate": 3.4431139871616403e-06, + "loss": 6.0543, + "step": 10300 + }, + { + "epoch": 1.11, + "learning_rate": 3.425627371590695e-06, + "loss": 6.0414, + "step": 10310 + }, + { + "epoch": 1.11, + "learning_rate": 3.408176091514469e-06, + "loss": 6.0793, + "step": 10320 + }, + { + "epoch": 1.11, + "learning_rate": 3.39076024072893e-06, + "loss": 6.0566, + "step": 10330 + }, + { + "epoch": 1.11, + "learning_rate": 3.3733799128396106e-06, + "loss": 6.0461, + "step": 10340 + }, + { + "epoch": 1.11, + "learning_rate": 3.356035201261133e-06, + "loss": 6.05, + "step": 10350 + }, + { + "epoch": 1.11, + "learning_rate": 3.3387261992166776e-06, + "loss": 6.0539, + "step": 10360 + }, + { + "epoch": 1.11, + "learning_rate": 3.3214529997375067e-06, + "loss": 6.048, + "step": 10370 + }, + { + "epoch": 1.11, + "learning_rate": 3.3042156956624415e-06, + "loss": 6.0641, + "step": 10380 + }, + { + "epoch": 1.12, + "learning_rate": 3.28701437963739e-06, + "loss": 6.0387, + "step": 10390 + }, + { + "epoch": 1.12, + "learning_rate": 3.2698491441148183e-06, + "loss": 6.052, + "step": 10400 + }, + { + "epoch": 1.12, + "learning_rate": 3.2527200813532823e-06, + "loss": 6.0738, + "step": 10410 + }, + { + "epoch": 1.12, + "learning_rate": 3.2356272834169087e-06, + "loss": 6.0867, + "step": 10420 + }, + { + "epoch": 1.12, + "learning_rate": 3.2185708421749207e-06, + "loss": 6.0539, + "step": 10430 + }, + { + "epoch": 1.12, + "learning_rate": 3.2015508493011226e-06, + "loss": 6.0617, + "step": 10440 + }, + { + "epoch": 1.12, + "learning_rate": 3.1845673962734314e-06, + "loss": 6.0508, + "step": 10450 + }, + { + "epoch": 1.12, + "learning_rate": 3.167620574373359e-06, + "loss": 6.0344, + "step": 10460 + }, + { + "epoch": 1.12, + "learning_rate": 3.1507104746855464e-06, + "loss": 6.048, + "step": 10470 + }, + { + "epoch": 1.13, + "learning_rate": 3.1338371880972506e-06, + "loss": 6.0645, + "step": 10480 + }, + { + "epoch": 1.13, + "learning_rate": 3.1170008052978827e-06, + "loss": 6.0695, + "step": 10490 + }, + { + "epoch": 1.13, + "learning_rate": 3.100201416778491e-06, + "loss": 6.0512, + "step": 10500 + }, + { + "epoch": 1.13, + "learning_rate": 3.0834391128313014e-06, + "loss": 6.0305, + "step": 10510 + }, + { + "epoch": 1.13, + "learning_rate": 3.06671398354921e-06, + "loss": 6.0707, + "step": 10520 + }, + { + "epoch": 1.13, + "learning_rate": 3.050026118825319e-06, + "loss": 6.0586, + "step": 10530 + }, + { + "epoch": 1.13, + "learning_rate": 3.0333756083524335e-06, + "loss": 6.0375, + "step": 10540 + }, + { + "epoch": 1.13, + "learning_rate": 3.016762541622599e-06, + "loss": 6.0598, + "step": 10550 + }, + { + "epoch": 1.13, + "learning_rate": 3.0001870079266016e-06, + "loss": 6.048, + "step": 10560 + }, + { + "epoch": 1.13, + "learning_rate": 2.9836490963535083e-06, + "loss": 6.0422, + "step": 10570 + }, + { + "epoch": 1.14, + "learning_rate": 2.9671488957901652e-06, + "loss": 6.0711, + "step": 10580 + }, + { + "epoch": 1.14, + "learning_rate": 2.9506864949207425e-06, + "loss": 6.0664, + "step": 10590 + }, + { + "epoch": 1.14, + "learning_rate": 2.9342619822262374e-06, + "loss": 6.0543, + "step": 10600 + }, + { + "epoch": 1.14, + "learning_rate": 2.9178754459840196e-06, + "loss": 6.0805, + "step": 10610 + }, + { + "epoch": 1.14, + "learning_rate": 2.9015269742673324e-06, + "loss": 6.0648, + "step": 10620 + }, + { + "epoch": 1.14, + "learning_rate": 2.8852166549448458e-06, + "loss": 6.077, + "step": 10630 + }, + { + "epoch": 1.14, + "learning_rate": 2.8689445756801583e-06, + "loss": 6.0559, + "step": 10640 + }, + { + "epoch": 1.14, + "learning_rate": 2.8527108239313506e-06, + "loss": 6.052, + "step": 10650 + }, + { + "epoch": 1.14, + "learning_rate": 2.8365154869504895e-06, + "loss": 6.0586, + "step": 10660 + }, + { + "epoch": 1.15, + "learning_rate": 2.820358651783186e-06, + "loss": 6.0676, + "step": 10670 + }, + { + "epoch": 1.15, + "learning_rate": 2.804240405268108e-06, + "loss": 6.0453, + "step": 10680 + }, + { + "epoch": 1.15, + "learning_rate": 2.7881608340365176e-06, + "loss": 6.0551, + "step": 10690 + }, + { + "epoch": 1.15, + "learning_rate": 2.7721200245118128e-06, + "loss": 6.0504, + "step": 10700 + }, + { + "epoch": 1.15, + "learning_rate": 2.7561180629090513e-06, + "loss": 6.0453, + "step": 10710 + }, + { + "epoch": 1.15, + "learning_rate": 2.740155035234503e-06, + "loss": 6.0605, + "step": 10720 + }, + { + "epoch": 1.15, + "learning_rate": 2.7242310272851656e-06, + "loss": 6.0258, + "step": 10730 + }, + { + "epoch": 1.15, + "learning_rate": 2.7083461246483313e-06, + "loss": 6.0469, + "step": 10740 + }, + { + "epoch": 1.15, + "learning_rate": 2.692500412701096e-06, + "loss": 6.0734, + "step": 10750 + }, + { + "epoch": 1.16, + "learning_rate": 2.6766939766099297e-06, + "loss": 6.0605, + "step": 10760 + }, + { + "epoch": 1.16, + "learning_rate": 2.660926901330194e-06, + "loss": 6.0586, + "step": 10770 + }, + { + "epoch": 1.16, + "learning_rate": 2.6451992716057096e-06, + "loss": 6.05, + "step": 10780 + }, + { + "epoch": 1.16, + "learning_rate": 2.629511171968271e-06, + "loss": 6.0668, + "step": 10790 + }, + { + "epoch": 1.16, + "learning_rate": 2.6138626867372274e-06, + "loss": 6.0887, + "step": 10800 + }, + { + "epoch": 1.16, + "learning_rate": 2.5982539000189945e-06, + "loss": 6.0582, + "step": 10810 + }, + { + "epoch": 1.16, + "learning_rate": 2.582684895706632e-06, + "loss": 6.0406, + "step": 10820 + }, + { + "epoch": 1.16, + "learning_rate": 2.5671557574793703e-06, + "loss": 6.057, + "step": 10830 + }, + { + "epoch": 1.16, + "learning_rate": 2.5516665688021804e-06, + "loss": 6.0586, + "step": 10840 + }, + { + "epoch": 1.16, + "learning_rate": 2.5362174129253014e-06, + "loss": 6.0723, + "step": 10850 + }, + { + "epoch": 1.17, + "learning_rate": 2.520808372883823e-06, + "loss": 6.0563, + "step": 10860 + }, + { + "epoch": 1.17, + "learning_rate": 2.5054395314972068e-06, + "loss": 6.066, + "step": 10870 + }, + { + "epoch": 1.17, + "learning_rate": 2.4901109713688686e-06, + "loss": 6.0977, + "step": 10880 + }, + { + "epoch": 1.17, + "learning_rate": 2.4748227748857235e-06, + "loss": 6.0535, + "step": 10890 + }, + { + "epoch": 1.17, + "learning_rate": 2.459575024217733e-06, + "loss": 6.0781, + "step": 10900 + }, + { + "epoch": 1.17, + "learning_rate": 2.4443678013174843e-06, + "loss": 6.0625, + "step": 10910 + }, + { + "epoch": 1.17, + "learning_rate": 2.4292011879197284e-06, + "loss": 6.0766, + "step": 10920 + }, + { + "epoch": 1.17, + "learning_rate": 2.4140752655409625e-06, + "loss": 6.0574, + "step": 10930 + }, + { + "epoch": 1.17, + "learning_rate": 2.3989901154789684e-06, + "loss": 6.0797, + "step": 10940 + }, + { + "epoch": 1.18, + "learning_rate": 2.3839458188124e-06, + "loss": 6.0488, + "step": 10950 + }, + { + "epoch": 1.18, + "learning_rate": 2.3689424564003206e-06, + "loss": 6.0523, + "step": 10960 + }, + { + "epoch": 1.18, + "learning_rate": 2.353980108881799e-06, + "loss": 6.0395, + "step": 10970 + }, + { + "epoch": 1.18, + "learning_rate": 2.3390588566754457e-06, + "loss": 6.0938, + "step": 10980 + }, + { + "epoch": 1.18, + "learning_rate": 2.324178779979005e-06, + "loss": 6.0621, + "step": 10990 + }, + { + "epoch": 1.18, + "learning_rate": 2.309339958768906e-06, + "loss": 6.0187, + "step": 11000 + }, + { + "epoch": 1.18, + "learning_rate": 2.2945424727998487e-06, + "loss": 6.0496, + "step": 11010 + }, + { + "epoch": 1.18, + "learning_rate": 2.279786401604359e-06, + "loss": 6.0656, + "step": 11020 + }, + { + "epoch": 1.18, + "learning_rate": 2.2680114165887057e-06, + "loss": 6.0383, + "step": 11030 + }, + { + "epoch": 1.19, + "learning_rate": 2.2533300916968257e-06, + "loss": 6.0406, + "step": 11040 + }, + { + "epoch": 1.19, + "learning_rate": 2.238690403084045e-06, + "loss": 6.0652, + "step": 11050 + }, + { + "epoch": 1.19, + "learning_rate": 2.2240924294347697e-06, + "loss": 6.0559, + "step": 11060 + }, + { + "epoch": 1.19, + "learning_rate": 2.2095362492092085e-06, + "loss": 6.043, + "step": 11070 + }, + { + "epoch": 1.19, + "learning_rate": 2.195021940642934e-06, + "loss": 6.0832, + "step": 11080 + }, + { + "epoch": 1.19, + "learning_rate": 2.1805495817464773e-06, + "loss": 6.052, + "step": 11090 + }, + { + "epoch": 1.19, + "learning_rate": 2.1661192503048913e-06, + "loss": 6.0715, + "step": 11100 + }, + { + "epoch": 1.19, + "learning_rate": 2.151731023877356e-06, + "loss": 6.057, + "step": 11110 + }, + { + "epoch": 1.19, + "learning_rate": 2.1373849797967326e-06, + "loss": 6.0594, + "step": 11120 + }, + { + "epoch": 1.19, + "learning_rate": 2.123081195169179e-06, + "loss": 6.0684, + "step": 11130 + }, + { + "epoch": 1.2, + "learning_rate": 2.108819746873706e-06, + "loss": 6.0496, + "step": 11140 + }, + { + "epoch": 1.2, + "learning_rate": 2.0946007115617895e-06, + "loss": 6.0328, + "step": 11150 + }, + { + "epoch": 1.2, + "learning_rate": 2.0804241656569366e-06, + "loss": 6.0742, + "step": 11160 + }, + { + "epoch": 1.2, + "learning_rate": 2.0662901853542973e-06, + "loss": 6.0895, + "step": 11170 + }, + { + "epoch": 1.2, + "learning_rate": 2.052198846620228e-06, + "loss": 6.0766, + "step": 11180 + }, + { + "epoch": 1.2, + "learning_rate": 2.0381502251919127e-06, + "loss": 6.0469, + "step": 11190 + }, + { + "epoch": 1.2, + "learning_rate": 2.0241443965769293e-06, + "loss": 6.0355, + "step": 11200 + }, + { + "epoch": 1.2, + "learning_rate": 2.010181436052866e-06, + "loss": 6.0438, + "step": 11210 + }, + { + "epoch": 1.2, + "learning_rate": 1.996261418666896e-06, + "loss": 6.0629, + "step": 11220 + }, + { + "epoch": 1.21, + "learning_rate": 1.9823844192353936e-06, + "loss": 6.0422, + "step": 11230 + }, + { + "epoch": 1.21, + "learning_rate": 1.9685505123435224e-06, + "loss": 6.0852, + "step": 11240 + }, + { + "epoch": 1.21, + "learning_rate": 1.9547597723448264e-06, + "loss": 6.0723, + "step": 11250 + }, + { + "epoch": 1.21, + "learning_rate": 1.9410122733608505e-06, + "loss": 6.0531, + "step": 11260 + }, + { + "epoch": 1.21, + "learning_rate": 1.9273080892807205e-06, + "loss": 6.0398, + "step": 11270 + }, + { + "epoch": 1.21, + "learning_rate": 1.9136472937607666e-06, + "loss": 6.0648, + "step": 11280 + }, + { + "epoch": 1.21, + "learning_rate": 1.9000299602241047e-06, + "loss": 6.0777, + "step": 11290 + }, + { + "epoch": 1.21, + "learning_rate": 1.8864561618602672e-06, + "loss": 6.0738, + "step": 11300 + }, + { + "epoch": 1.21, + "learning_rate": 1.872925971624785e-06, + "loss": 6.0734, + "step": 11310 + }, + { + "epoch": 1.22, + "learning_rate": 1.859439462238818e-06, + "loss": 6.0305, + "step": 11320 + }, + { + "epoch": 1.22, + "learning_rate": 1.8459967061887406e-06, + "loss": 6.0574, + "step": 11330 + }, + { + "epoch": 1.22, + "learning_rate": 1.8325977757257784e-06, + "loss": 6.0664, + "step": 11340 + }, + { + "epoch": 1.22, + "learning_rate": 1.8192427428655945e-06, + "loss": 6.0687, + "step": 11350 + }, + { + "epoch": 1.22, + "learning_rate": 1.8059316793879233e-06, + "loss": 6.0414, + "step": 11360 + }, + { + "epoch": 1.22, + "learning_rate": 1.792664656836166e-06, + "loss": 6.0719, + "step": 11370 + }, + { + "epoch": 1.22, + "learning_rate": 1.7794417465170233e-06, + "loss": 6.0297, + "step": 11380 + }, + { + "epoch": 1.22, + "learning_rate": 1.7662630195001051e-06, + "loss": 6.0582, + "step": 11390 + }, + { + "epoch": 1.22, + "learning_rate": 1.7531285466175408e-06, + "loss": 6.0719, + "step": 11400 + }, + { + "epoch": 1.23, + "learning_rate": 1.7400383984636127e-06, + "loss": 6.052, + "step": 11410 + }, + { + "epoch": 1.23, + "learning_rate": 1.7269926453943619e-06, + "loss": 6.0406, + "step": 11420 + }, + { + "epoch": 1.23, + "learning_rate": 1.7139913575272282e-06, + "loss": 6.091, + "step": 11430 + }, + { + "epoch": 1.23, + "learning_rate": 1.7010346047406522e-06, + "loss": 6.0695, + "step": 11440 + }, + { + "epoch": 1.23, + "learning_rate": 1.6881224566737187e-06, + "loss": 6.0621, + "step": 11450 + }, + { + "epoch": 1.23, + "learning_rate": 1.6752549827257669e-06, + "loss": 6.048, + "step": 11460 + }, + { + "epoch": 1.23, + "learning_rate": 1.6624322520560321e-06, + "loss": 6.0758, + "step": 11470 + }, + { + "epoch": 1.23, + "learning_rate": 1.6496543335832583e-06, + "loss": 6.0355, + "step": 11480 + }, + { + "epoch": 1.23, + "learning_rate": 1.6369212959853441e-06, + "loss": 6.0785, + "step": 11490 + }, + { + "epoch": 1.23, + "learning_rate": 1.6242332076989586e-06, + "loss": 6.0641, + "step": 11500 + }, + { + "epoch": 1.24, + "learning_rate": 1.6115901369191855e-06, + "loss": 6.0555, + "step": 11510 + }, + { + "epoch": 1.24, + "learning_rate": 1.598992151599147e-06, + "loss": 6.0336, + "step": 11520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5864393194496474e-06, + "loss": 6.0848, + "step": 11530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5739317079387994e-06, + "loss": 6.0563, + "step": 11540 + }, + { + "epoch": 1.24, + "learning_rate": 1.561469384291674e-06, + "loss": 6.0695, + "step": 11550 + }, + { + "epoch": 1.24, + "learning_rate": 1.5490524154899234e-06, + "loss": 6.0297, + "step": 11560 + }, + { + "epoch": 1.24, + "learning_rate": 1.5366808682714396e-06, + "loss": 6.0676, + "step": 11570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5243548091299753e-06, + "loss": 6.0496, + "step": 11580 + }, + { + "epoch": 1.24, + "learning_rate": 1.5120743043148066e-06, + "loss": 6.0711, + "step": 11590 + }, + { + "epoch": 1.25, + "learning_rate": 1.4998394198303589e-06, + "loss": 6.1078, + "step": 11600 + }, + { + "epoch": 1.25, + "learning_rate": 1.4876502214358678e-06, + "loss": 6.0664, + "step": 11610 + }, + { + "epoch": 1.25, + "learning_rate": 1.4755067746450113e-06, + "loss": 6.0578, + "step": 11620 + }, + { + "epoch": 1.25, + "learning_rate": 1.4634091447255705e-06, + "loss": 6.0715, + "step": 11630 + }, + { + "epoch": 1.25, + "learning_rate": 1.4513573966990735e-06, + "loss": 6.0664, + "step": 11640 + }, + { + "epoch": 1.25, + "learning_rate": 1.439351595340437e-06, + "loss": 6.0563, + "step": 11650 + }, + { + "epoch": 1.25, + "learning_rate": 1.4273918051776392e-06, + "loss": 6.0371, + "step": 11660 + }, + { + "epoch": 1.25, + "learning_rate": 1.415478090491348e-06, + "loss": 6.0574, + "step": 11670 + }, + { + "epoch": 1.25, + "learning_rate": 1.4036105153145996e-06, + "loss": 6.084, + "step": 11680 + }, + { + "epoch": 1.26, + "learning_rate": 1.3917891434324305e-06, + "loss": 6.0582, + "step": 11690 + }, + { + "epoch": 1.26, + "learning_rate": 1.3800140383815585e-06, + "loss": 6.0516, + "step": 11700 + }, + { + "epoch": 1.26, + "learning_rate": 1.368285263450021e-06, + "loss": 6.034, + "step": 11710 + }, + { + "epoch": 1.26, + "learning_rate": 1.3566028816768494e-06, + "loss": 6.0633, + "step": 11720 + }, + { + "epoch": 1.26, + "learning_rate": 1.3449669558517187e-06, + "loss": 6.0625, + "step": 11730 + }, + { + "epoch": 1.26, + "learning_rate": 1.3333775485146217e-06, + "loss": 6.034, + "step": 11740 + }, + { + "epoch": 1.26, + "learning_rate": 1.3218347219555195e-06, + "loss": 6.0598, + "step": 11750 + }, + { + "epoch": 1.26, + "learning_rate": 1.3103385382140222e-06, + "loss": 6.0699, + "step": 11760 + }, + { + "epoch": 1.26, + "learning_rate": 1.2988890590790394e-06, + "loss": 6.0492, + "step": 11770 + }, + { + "epoch": 1.26, + "learning_rate": 1.2874863460884635e-06, + "loss": 6.048, + "step": 11780 + }, + { + "epoch": 1.27, + "learning_rate": 1.2761304605288216e-06, + "loss": 6.0598, + "step": 11790 + }, + { + "epoch": 1.27, + "learning_rate": 1.2648214634349688e-06, + "loss": 6.073, + "step": 11800 + }, + { + "epoch": 1.27, + "learning_rate": 1.2535594155897346e-06, + "loss": 6.0625, + "step": 11810 + }, + { + "epoch": 1.27, + "learning_rate": 1.2423443775236177e-06, + "loss": 6.0449, + "step": 11820 + }, + { + "epoch": 1.27, + "learning_rate": 1.231176409514445e-06, + "loss": 6.0402, + "step": 11830 + }, + { + "epoch": 1.27, + "learning_rate": 1.2200555715870631e-06, + "loss": 6.0574, + "step": 11840 + }, + { + "epoch": 1.27, + "learning_rate": 1.2089819235129964e-06, + "loss": 6.084, + "step": 11850 + }, + { + "epoch": 1.27, + "learning_rate": 1.197955524810146e-06, + "loss": 6.0508, + "step": 11860 + }, + { + "epoch": 1.27, + "learning_rate": 1.186976434742454e-06, + "loss": 6.0547, + "step": 11870 + }, + { + "epoch": 1.28, + "learning_rate": 1.176044712319595e-06, + "loss": 6.0789, + "step": 11880 + }, + { + "epoch": 1.28, + "learning_rate": 1.1651604162966511e-06, + "loss": 6.0516, + "step": 11890 + }, + { + "epoch": 1.28, + "learning_rate": 1.154323605173806e-06, + "loss": 6.0617, + "step": 11900 + }, + { + "epoch": 1.28, + "learning_rate": 1.1435343371960183e-06, + "loss": 6.0625, + "step": 11910 + }, + { + "epoch": 1.28, + "learning_rate": 1.1327926703527203e-06, + "loss": 6.0047, + "step": 11920 + }, + { + "epoch": 1.28, + "learning_rate": 1.1220986623774966e-06, + "loss": 6.0684, + "step": 11930 + }, + { + "epoch": 1.28, + "learning_rate": 1.1114523707477809e-06, + "loss": 6.0711, + "step": 11940 + }, + { + "epoch": 1.28, + "learning_rate": 1.1008538526845468e-06, + "loss": 6.0625, + "step": 11950 + }, + { + "epoch": 1.28, + "learning_rate": 1.0903031651519902e-06, + "loss": 6.0687, + "step": 11960 + }, + { + "epoch": 1.29, + "learning_rate": 1.0798003648572387e-06, + "loss": 6.0473, + "step": 11970 + }, + { + "epoch": 1.29, + "learning_rate": 1.0693455082500303e-06, + "loss": 6.0625, + "step": 11980 + }, + { + "epoch": 1.29, + "learning_rate": 1.0589386515224286e-06, + "loss": 6.052, + "step": 11990 + }, + { + "epoch": 1.29, + "learning_rate": 1.048579850608502e-06, + "loss": 6.0332, + "step": 12000 + }, + { + "epoch": 1.29, + "learning_rate": 1.0382691611840367e-06, + "loss": 6.059, + "step": 12010 + }, + { + "epoch": 1.29, + "learning_rate": 1.0280066386662303e-06, + "loss": 6.0367, + "step": 12020 + }, + { + "epoch": 1.29, + "learning_rate": 1.0198313378998736e-06, + "loss": 6.0578, + "step": 12030 + }, + { + "epoch": 1.29, + "learning_rate": 1.009655654638846e-06, + "loss": 6.0637, + "step": 12040 + }, + { + "epoch": 1.29, + "learning_rate": 9.995282920744242e-07, + "loss": 6.0613, + "step": 12050 + }, + { + "epoch": 1.29, + "learning_rate": 9.894493046384711e-07, + "loss": 6.0344, + "step": 12060 + }, + { + "epoch": 1.3, + "learning_rate": 9.794187465028527e-07, + "loss": 6.0652, + "step": 12070 + }, + { + "epoch": 1.3, + "learning_rate": 9.694366715791327e-07, + "loss": 6.0383, + "step": 12080 + }, + { + "epoch": 1.3, + "learning_rate": 9.595031335182968e-07, + "loss": 6.0664, + "step": 12090 + }, + { + "epoch": 1.3, + "learning_rate": 9.496181857104536e-07, + "loss": 6.0648, + "step": 12100 + }, + { + "epoch": 1.3, + "learning_rate": 9.397818812845571e-07, + "loss": 6.0461, + "step": 12110 + }, + { + "epoch": 1.3, + "learning_rate": 9.299942731081091e-07, + "loss": 6.0785, + "step": 12120 + }, + { + "epoch": 1.3, + "learning_rate": 9.202554137868913e-07, + "loss": 6.0707, + "step": 12130 + }, + { + "epoch": 1.3, + "learning_rate": 9.105653556646621e-07, + "loss": 6.0617, + "step": 12140 + }, + { + "epoch": 1.3, + "learning_rate": 9.009241508228972e-07, + "loss": 6.0738, + "step": 12150 + }, + { + "epoch": 1.31, + "learning_rate": 8.913318510804914e-07, + "loss": 6.0594, + "step": 12160 + }, + { + "epoch": 1.31, + "learning_rate": 8.81788507993494e-07, + "loss": 6.0766, + "step": 12170 + }, + { + "epoch": 1.31, + "learning_rate": 8.722941728548173e-07, + "loss": 6.0461, + "step": 12180 + }, + { + "epoch": 1.31, + "learning_rate": 8.628488966939785e-07, + "loss": 6.0582, + "step": 12190 + }, + { + "epoch": 1.31, + "learning_rate": 8.534527302768058e-07, + "loss": 6.0766, + "step": 12200 + }, + { + "epoch": 1.31, + "learning_rate": 8.441057241051842e-07, + "loss": 6.0531, + "step": 12210 + }, + { + "epoch": 1.31, + "learning_rate": 8.348079284167743e-07, + "loss": 6.0723, + "step": 12220 + }, + { + "epoch": 1.31, + "learning_rate": 8.255593931847372e-07, + "loss": 6.0355, + "step": 12230 + }, + { + "epoch": 1.31, + "learning_rate": 8.163601681174793e-07, + "loss": 6.0605, + "step": 12240 + }, + { + "epoch": 1.32, + "learning_rate": 8.072103026583722e-07, + "loss": 6.0363, + "step": 12250 + }, + { + "epoch": 1.32, + "learning_rate": 7.981098459855008e-07, + "loss": 6.0711, + "step": 12260 + }, + { + "epoch": 1.32, + "learning_rate": 7.890588470113802e-07, + "loss": 6.0754, + "step": 12270 + }, + { + "epoch": 1.32, + "learning_rate": 7.800573543827139e-07, + "loss": 6.0641, + "step": 12280 + }, + { + "epoch": 1.32, + "learning_rate": 7.711054164801147e-07, + "loss": 6.0539, + "step": 12290 + }, + { + "epoch": 1.32, + "learning_rate": 7.622030814178582e-07, + "loss": 6.0367, + "step": 12300 + }, + { + "epoch": 1.32, + "learning_rate": 7.533503970436096e-07, + "loss": 6.0805, + "step": 12310 + }, + { + "epoch": 1.32, + "learning_rate": 7.445474109381856e-07, + "loss": 6.0582, + "step": 12320 + }, + { + "epoch": 1.32, + "learning_rate": 7.357941704152771e-07, + "loss": 6.0387, + "step": 12330 + }, + { + "epoch": 1.32, + "learning_rate": 7.270907225212154e-07, + "loss": 6.0566, + "step": 12340 + }, + { + "epoch": 1.33, + "learning_rate": 7.184371140347024e-07, + "loss": 6.0488, + "step": 12350 + }, + { + "epoch": 1.33, + "learning_rate": 7.098333914665723e-07, + "loss": 6.0426, + "step": 12360 + }, + { + "epoch": 1.33, + "learning_rate": 7.012796010595302e-07, + "loss": 6.0699, + "step": 12370 + }, + { + "epoch": 1.33, + "learning_rate": 6.927757887879139e-07, + "loss": 6.0508, + "step": 12380 + }, + { + "epoch": 1.33, + "learning_rate": 6.84322000357438e-07, + "loss": 6.0426, + "step": 12390 + }, + { + "epoch": 1.33, + "learning_rate": 6.75918281204958e-07, + "loss": 6.0449, + "step": 12400 + }, + { + "epoch": 1.33, + "learning_rate": 6.675646764982147e-07, + "loss": 6.0672, + "step": 12410 + }, + { + "epoch": 1.33, + "learning_rate": 6.592612311355994e-07, + "loss": 6.0793, + "step": 12420 + }, + { + "epoch": 1.33, + "learning_rate": 6.510079897459109e-07, + "loss": 6.0563, + "step": 12430 + }, + { + "epoch": 1.34, + "learning_rate": 6.428049966881156e-07, + "loss": 6.0484, + "step": 12440 + }, + { + "epoch": 1.34, + "learning_rate": 6.346522960511048e-07, + "loss": 6.0473, + "step": 12450 + }, + { + "epoch": 1.34, + "learning_rate": 6.265499316534651e-07, + "loss": 6.0539, + "step": 12460 + }, + { + "epoch": 1.34, + "learning_rate": 6.184979470432407e-07, + "loss": 6.0727, + "step": 12470 + }, + { + "epoch": 1.34, + "learning_rate": 6.104963854976931e-07, + "loss": 6.091, + "step": 12480 + }, + { + "epoch": 1.34, + "learning_rate": 6.025452900230777e-07, + "loss": 6.0512, + "step": 12490 + }, + { + "epoch": 1.34, + "learning_rate": 5.946447033544034e-07, + "loss": 6.0816, + "step": 12500 + }, + { + "epoch": 1.34, + "learning_rate": 5.867946679552138e-07, + "loss": 6.0738, + "step": 12510 + }, + { + "epoch": 1.34, + "learning_rate": 5.789952260173481e-07, + "loss": 6.0605, + "step": 12520 + }, + { + "epoch": 1.35, + "learning_rate": 5.712464194607214e-07, + "loss": 6.0344, + "step": 12530 + }, + { + "epoch": 1.35, + "learning_rate": 5.635482899330968e-07, + "loss": 6.048, + "step": 12540 + }, + { + "epoch": 1.35, + "learning_rate": 5.559008788098619e-07, + "loss": 6.0762, + "step": 12550 + }, + { + "epoch": 1.35, + "learning_rate": 5.483042271938055e-07, + "loss": 6.0738, + "step": 12560 + }, + { + "epoch": 1.35, + "learning_rate": 5.407583759149005e-07, + "loss": 6.0574, + "step": 12570 + }, + { + "epoch": 1.35, + "learning_rate": 5.332633655300767e-07, + "loss": 6.0637, + "step": 12580 + }, + { + "epoch": 1.35, + "learning_rate": 5.258192363230141e-07, + "loss": 6.0281, + "step": 12590 + }, + { + "epoch": 1.35, + "learning_rate": 5.184260283039133e-07, + "loss": 6.0754, + "step": 12600 + }, + { + "epoch": 1.35, + "learning_rate": 5.110837812092906e-07, + "loss": 6.0586, + "step": 12610 + }, + { + "epoch": 1.35, + "learning_rate": 5.037925345017635e-07, + "loss": 6.0395, + "step": 12620 + }, + { + "epoch": 1.36, + "learning_rate": 4.965523273698292e-07, + "loss": 6.0422, + "step": 12630 + }, + { + "epoch": 1.36, + "learning_rate": 4.893631987276682e-07, + "loss": 6.0191, + "step": 12640 + }, + { + "epoch": 1.36, + "learning_rate": 4.822251872149219e-07, + "loss": 6.0445, + "step": 12650 + }, + { + "epoch": 1.36, + "learning_rate": 4.7513833119649633e-07, + "loss": 6.075, + "step": 12660 + }, + { + "epoch": 1.36, + "learning_rate": 4.6810266876234247e-07, + "loss": 6.0559, + "step": 12670 + }, + { + "epoch": 1.36, + "learning_rate": 4.611182377272705e-07, + "loss": 6.034, + "step": 12680 + }, + { + "epoch": 1.36, + "learning_rate": 4.541850756307231e-07, + "loss": 6.0461, + "step": 12690 + }, + { + "epoch": 1.36, + "learning_rate": 4.4730321973659787e-07, + "loss": 6.0617, + "step": 12700 + }, + { + "epoch": 1.36, + "learning_rate": 4.4047270703302613e-07, + "loss": 6.0371, + "step": 12710 + }, + { + "epoch": 1.37, + "learning_rate": 4.3369357423219016e-07, + "loss": 6.0852, + "step": 12720 + }, + { + "epoch": 1.37, + "learning_rate": 4.26965857770113e-07, + "loss": 6.0535, + "step": 12730 + }, + { + "epoch": 1.37, + "learning_rate": 4.2028959380647327e-07, + "loss": 6.032, + "step": 12740 + }, + { + "epoch": 1.37, + "learning_rate": 4.1366481822440186e-07, + "loss": 6.0559, + "step": 12750 + }, + { + "epoch": 1.37, + "learning_rate": 4.070915666302999e-07, + "loss": 6.0488, + "step": 12760 + }, + { + "epoch": 1.37, + "learning_rate": 4.0056987435363346e-07, + "loss": 6.0687, + "step": 12770 + }, + { + "epoch": 1.37, + "learning_rate": 3.9409977644675577e-07, + "loss": 6.0582, + "step": 12780 + }, + { + "epoch": 1.37, + "learning_rate": 3.87681307684713e-07, + "loss": 6.0512, + "step": 12790 + }, + { + "epoch": 1.37, + "learning_rate": 3.8131450256505773e-07, + "loss": 6.0551, + "step": 12800 + }, + { + "epoch": 1.38, + "learning_rate": 3.749993953076647e-07, + "loss": 6.0609, + "step": 12810 + }, + { + "epoch": 1.38, + "learning_rate": 3.6873601985454863e-07, + "loss": 6.0809, + "step": 12820 + }, + { + "epoch": 1.38, + "learning_rate": 3.625244098696734e-07, + "loss": 6.0723, + "step": 12830 + }, + { + "epoch": 1.38, + "learning_rate": 3.563645987387865e-07, + "loss": 6.0578, + "step": 12840 + }, + { + "epoch": 1.38, + "learning_rate": 3.502566195692214e-07, + "loss": 6.0758, + "step": 12850 + }, + { + "epoch": 1.38, + "learning_rate": 3.442005051897357e-07, + "loss": 6.0406, + "step": 12860 + }, + { + "epoch": 1.38, + "learning_rate": 3.38196288150322e-07, + "loss": 6.0484, + "step": 12870 + }, + { + "epoch": 1.38, + "learning_rate": 3.322440007220429e-07, + "loss": 6.084, + "step": 12880 + }, + { + "epoch": 1.38, + "learning_rate": 3.263436748968507e-07, + "loss": 6.0461, + "step": 12890 + }, + { + "epoch": 1.39, + "learning_rate": 3.204953423874202e-07, + "loss": 6.0746, + "step": 12900 + }, + { + "epoch": 1.39, + "learning_rate": 3.146990346269729e-07, + "loss": 6.0555, + "step": 12910 + }, + { + "epoch": 1.39, + "learning_rate": 3.089547827691142e-07, + "loss": 6.0582, + "step": 12920 + }, + { + "epoch": 1.39, + "learning_rate": 3.0326261768766073e-07, + "loss": 6.0473, + "step": 12930 + }, + { + "epoch": 1.39, + "learning_rate": 2.97622569976479e-07, + "loss": 6.0652, + "step": 12940 + }, + { + "epoch": 1.39, + "learning_rate": 2.920346699493137e-07, + "loss": 6.0508, + "step": 12950 + }, + { + "epoch": 1.39, + "learning_rate": 2.864989476396385e-07, + "loss": 6.066, + "step": 12960 + }, + { + "epoch": 1.39, + "learning_rate": 2.810154328004755e-07, + "loss": 6.0574, + "step": 12970 + }, + { + "epoch": 1.39, + "learning_rate": 2.7558415490425375e-07, + "loss": 6.0547, + "step": 12980 + }, + { + "epoch": 1.39, + "learning_rate": 2.702051431426367e-07, + "loss": 6.0547, + "step": 12990 + }, + { + "epoch": 1.4, + "learning_rate": 2.6487842642637704e-07, + "loss": 6.0609, + "step": 13000 + }, + { + "epoch": 1.4, + "learning_rate": 2.5960403338515016e-07, + "loss": 6.0945, + "step": 13010 + }, + { + "epoch": 1.4, + "learning_rate": 2.5438199236740955e-07, + "loss": 6.0516, + "step": 13020 + }, + { + "epoch": 1.4, + "learning_rate": 2.502420718806597e-07, + "loss": 6.0559, + "step": 13030 + }, + { + "epoch": 1.4, + "learning_rate": 2.45114335045038e-07, + "loss": 6.0613, + "step": 13040 + }, + { + "epoch": 1.4, + "learning_rate": 2.4003902811115644e-07, + "loss": 6.057, + "step": 13050 + }, + { + "epoch": 1.4, + "learning_rate": 2.3501617835743007e-07, + "loss": 6.0691, + "step": 13060 + }, + { + "epoch": 1.4, + "learning_rate": 2.3004581278033734e-07, + "loss": 6.0527, + "step": 13070 + }, + { + "epoch": 1.4, + "learning_rate": 2.2512795809426003e-07, + "loss": 6.0363, + "step": 13080 + }, + { + "epoch": 1.41, + "learning_rate": 2.2026264073135462e-07, + "loss": 6.0641, + "step": 13090 + }, + { + "epoch": 1.41, + "learning_rate": 2.15449886841399e-07, + "loss": 6.0711, + "step": 13100 + }, + { + "epoch": 1.41, + "learning_rate": 2.106897222916593e-07, + "loss": 6.0664, + "step": 13110 + }, + { + "epoch": 1.41, + "learning_rate": 2.0598217266674658e-07, + "loss": 6.0656, + "step": 13120 + }, + { + "epoch": 1.41, + "learning_rate": 2.013272632684815e-07, + "loss": 6.0711, + "step": 13130 + }, + { + "epoch": 1.41, + "learning_rate": 1.9672501911575658e-07, + "loss": 6.0543, + "step": 13140 + }, + { + "epoch": 1.41, + "learning_rate": 1.921754649444041e-07, + "loss": 6.0691, + "step": 13150 + }, + { + "epoch": 1.41, + "learning_rate": 1.876786252070606e-07, + "loss": 6.0379, + "step": 13160 + }, + { + "epoch": 1.41, + "learning_rate": 1.832345240730371e-07, + "loss": 6.0695, + "step": 13170 + }, + { + "epoch": 1.42, + "learning_rate": 1.7884318542818912e-07, + "loss": 6.0922, + "step": 13180 + }, + { + "epoch": 1.42, + "learning_rate": 1.7450463287478792e-07, + "loss": 6.0777, + "step": 13190 + }, + { + "epoch": 1.42, + "learning_rate": 1.7021888973139166e-07, + "loss": 6.0594, + "step": 13200 + }, + { + "epoch": 1.42, + "learning_rate": 1.659859790327245e-07, + "loss": 6.0664, + "step": 13210 + }, + { + "epoch": 1.42, + "learning_rate": 1.6180592352955105e-07, + "loss": 6.0535, + "step": 13220 + }, + { + "epoch": 1.42, + "learning_rate": 1.5767874568854868e-07, + "loss": 6.068, + "step": 13230 + }, + { + "epoch": 1.42, + "learning_rate": 1.5360446769219663e-07, + "loss": 6.066, + "step": 13240 + }, + { + "epoch": 1.42, + "learning_rate": 1.4958311143864922e-07, + "loss": 6.0391, + "step": 13250 + }, + { + "epoch": 1.42, + "learning_rate": 1.456146985416207e-07, + "loss": 6.05, + "step": 13260 + }, + { + "epoch": 1.42, + "learning_rate": 1.4169925033026832e-07, + "loss": 6.0711, + "step": 13270 + }, + { + "epoch": 1.43, + "learning_rate": 1.3783678784908162e-07, + "loss": 6.0781, + "step": 13280 + }, + { + "epoch": 1.43, + "learning_rate": 1.3402733185776006e-07, + "loss": 6.0809, + "step": 13290 + }, + { + "epoch": 1.43, + "learning_rate": 1.3027090283111442e-07, + "loss": 6.0605, + "step": 13300 + }, + { + "epoch": 1.43, + "learning_rate": 1.2656752095894342e-07, + "loss": 6.0758, + "step": 13310 + }, + { + "epoch": 1.43, + "learning_rate": 1.2291720614593493e-07, + "loss": 6.059, + "step": 13320 + }, + { + "epoch": 1.43, + "learning_rate": 1.1931997801155614e-07, + "loss": 6.0414, + "step": 13330 + }, + { + "epoch": 1.43, + "learning_rate": 1.1577585588994466e-07, + "loss": 6.0504, + "step": 13340 + }, + { + "epoch": 1.43, + "learning_rate": 1.1228485882980977e-07, + "loss": 6.0492, + "step": 13350 + }, + { + "epoch": 1.43, + "learning_rate": 1.0884700559432693e-07, + "loss": 6.0301, + "step": 13360 + }, + { + "epoch": 1.44, + "learning_rate": 1.0546231466103785e-07, + "loss": 6.048, + "step": 13370 + }, + { + "epoch": 1.44, + "learning_rate": 1.0213080422175281e-07, + "loss": 6.0687, + "step": 13380 + }, + { + "epoch": 1.44, + "learning_rate": 9.88524921824463e-08, + "loss": 6.0645, + "step": 13390 + }, + { + "epoch": 1.44, + "learning_rate": 9.562739616317152e-08, + "loss": 6.0953, + "step": 13400 + }, + { + "epoch": 1.44, + "learning_rate": 9.2455533497956e-08, + "loss": 6.0484, + "step": 13410 + }, + { + "epoch": 1.44, + "learning_rate": 8.933692123471282e-08, + "loss": 6.041, + "step": 13420 + }, + { + "epoch": 1.44, + "learning_rate": 8.627157613514958e-08, + "loss": 6.0555, + "step": 13430 + }, + { + "epoch": 1.44, + "learning_rate": 8.32595146746773e-08, + "loss": 6.0844, + "step": 13440 + }, + { + "epoch": 1.44, + "learning_rate": 8.030075304231944e-08, + "loss": 6.0934, + "step": 13450 + }, + { + "epoch": 1.45, + "learning_rate": 7.73953071406297e-08, + "loss": 6.0422, + "step": 13460 + }, + { + "epoch": 1.45, + "learning_rate": 7.454319258560105e-08, + "loss": 6.0812, + "step": 13470 + }, + { + "epoch": 1.45, + "learning_rate": 7.174442470658794e-08, + "loss": 6.0656, + "step": 13480 + }, + { + "epoch": 1.45, + "learning_rate": 6.89990185462186e-08, + "loss": 6.0656, + "step": 13490 + }, + { + "epoch": 1.45, + "learning_rate": 6.630698886031738e-08, + "loss": 6.0613, + "step": 13500 + }, + { + "epoch": 1.45, + "learning_rate": 6.366835011782368e-08, + "loss": 6.0633, + "step": 13510 + }, + { + "epoch": 1.45, + "learning_rate": 6.10831165007153e-08, + "loss": 6.0691, + "step": 13520 + }, + { + "epoch": 1.45, + "learning_rate": 5.855130190393188e-08, + "loss": 6.0508, + "step": 13530 + }, + { + "epoch": 1.45, + "learning_rate": 5.6072919935298286e-08, + "loss": 6.0883, + "step": 13540 + }, + { + "epoch": 1.45, + "learning_rate": 5.3647983915456894e-08, + "loss": 6.032, + "step": 13550 + }, + { + "epoch": 1.46, + "learning_rate": 5.127650687778873e-08, + "loss": 6.0875, + "step": 13560 + }, + { + "epoch": 1.46, + "learning_rate": 4.895850156834914e-08, + "loss": 6.0285, + "step": 13570 + }, + { + "epoch": 1.46, + "learning_rate": 4.669398044579776e-08, + "loss": 6.0605, + "step": 13580 + }, + { + "epoch": 1.46, + "learning_rate": 4.448295568132866e-08, + "loss": 6.0648, + "step": 13590 + }, + { + "epoch": 1.46, + "learning_rate": 4.2325439158609204e-08, + "loss": 6.0863, + "step": 13600 + }, + { + "epoch": 1.46, + "learning_rate": 4.0221442473713514e-08, + "loss": 6.0613, + "step": 13610 + }, + { + "epoch": 1.46, + "learning_rate": 3.8170976935062445e-08, + "loss": 6.0828, + "step": 13620 + }, + { + "epoch": 1.46, + "learning_rate": 3.617405356335701e-08, + "loss": 6.0883, + "step": 13630 + }, + { + "epoch": 1.46, + "learning_rate": 3.4230683091529545e-08, + "loss": 6.0465, + "step": 13640 + }, + { + "epoch": 1.47, + "learning_rate": 3.2340875964674833e-08, + "loss": 6.0465, + "step": 13650 + }, + { + "epoch": 1.47, + "learning_rate": 3.050464234000017e-08, + "loss": 6.0324, + "step": 13660 + }, + { + "epoch": 1.47, + "learning_rate": 2.8721992086772065e-08, + "loss": 6.0449, + "step": 13670 + }, + { + "epoch": 1.47, + "learning_rate": 2.6992934786257418e-08, + "loss": 6.0422, + "step": 13680 + }, + { + "epoch": 1.47, + "learning_rate": 2.5317479731677973e-08, + "loss": 6.0754, + "step": 13690 + }, + { + "epoch": 1.47, + "learning_rate": 2.3695635928155935e-08, + "loss": 6.084, + "step": 13700 + }, + { + "epoch": 1.47, + "learning_rate": 2.2127412092668444e-08, + "loss": 6.0457, + "step": 13710 + }, + { + "epoch": 1.47, + "learning_rate": 2.0612816653998723e-08, + "loss": 6.0687, + "step": 13720 + }, + { + "epoch": 1.47, + "learning_rate": 1.9151857752691684e-08, + "loss": 6.0527, + "step": 13730 + }, + { + "epoch": 1.48, + "learning_rate": 1.7744543241012823e-08, + "loss": 6.059, + "step": 13740 + }, + { + "epoch": 1.48, + "learning_rate": 1.639088068289829e-08, + "loss": 6.0598, + "step": 13750 + }, + { + "epoch": 1.48, + "learning_rate": 1.509087735392489e-08, + "loss": 6.0668, + "step": 13760 + }, + { + "epoch": 1.48, + "learning_rate": 1.3844540241261251e-08, + "loss": 6.0816, + "step": 13770 + }, + { + "epoch": 1.48, + "learning_rate": 1.2651876043637822e-08, + "loss": 6.0895, + "step": 13780 + }, + { + "epoch": 1.48, + "learning_rate": 1.1512891171303608e-08, + "loss": 6.0895, + "step": 13790 + }, + { + "epoch": 1.48, + "learning_rate": 1.0427591745999499e-08, + "loss": 6.0285, + "step": 13800 + }, + { + "epoch": 1.48, + "learning_rate": 9.395983600918313e-09, + "loss": 6.0953, + "step": 13810 + }, + { + "epoch": 1.48, + "learning_rate": 8.418072280679257e-09, + "loss": 6.0426, + "step": 13820 + }, + { + "epoch": 1.48, + "learning_rate": 7.493863041292405e-09, + "loss": 6.0602, + "step": 13830 + }, + { + "epoch": 1.49, + "learning_rate": 6.623360850136484e-09, + "loss": 6.0742, + "step": 13840 + }, + { + "epoch": 1.49, + "learning_rate": 5.8065703859278014e-09, + "loss": 6.0805, + "step": 13850 + }, + { + "epoch": 1.49, + "learning_rate": 5.0434960386969154e-09, + "loss": 6.0574, + "step": 13860 + }, + { + "epoch": 1.49, + "learning_rate": 4.334141909764223e-09, + "loss": 6.0777, + "step": 13870 + }, + { + "epoch": 1.49, + "learning_rate": 3.6785118117188544e-09, + "loss": 6.05, + "step": 13880 + }, + { + "epoch": 1.49, + "learning_rate": 3.0766092683953697e-09, + "loss": 6.0445, + "step": 13890 + }, + { + "epoch": 1.49, + "learning_rate": 2.5284375148615368e-09, + "loss": 6.0648, + "step": 13900 + }, + { + "epoch": 1.49, + "learning_rate": 2.033999497391692e-09, + "loss": 6.0672, + "step": 13910 + }, + { + "epoch": 1.49, + "learning_rate": 1.5932978734600757e-09, + "loss": 6.0727, + "step": 13920 + }, + { + "epoch": 1.5, + "learning_rate": 1.2063350117175188e-09, + "loss": 6.0816, + "step": 13930 + }, + { + "epoch": 1.5, + "learning_rate": 8.731129919892223e-10, + "loss": 6.066, + "step": 13940 + }, + { + "epoch": 1.5, + "learning_rate": 5.936336052514424e-10, + "loss": 6.0367, + "step": 13950 + }, + { + "epoch": 1.5, + "learning_rate": 3.6789835363260087e-10, + "loss": 6.0438, + "step": 13960 + }, + { + "epoch": 1.5, + "learning_rate": 1.9590845039885175e-10, + "loss": 6.0586, + "step": 13970 + }, + { + "epoch": 1.5, + "step": 13971, + "total_flos": 4.912468616465613e+16, + "train_loss": 6.204258486328824, + "train_runtime": 12473.8331, + "train_samples_per_second": 17.92, + "train_steps_per_second": 1.12 + } + ], + "max_steps": 13971, + "num_train_epochs": 2, + "total_flos": 4.912468616465613e+16, + "trial_name": null, + "trial_params": null +}