instruct-bodo / trainer_state.json
Sanjib Narzary
initial bodo english instruction based machine translation
540381a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.5,
"global_step": 13971,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.8571428571428575e-07,
"loss": 10.6891,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 7.61904761904762e-07,
"loss": 10.6539,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 1.2380952380952382e-06,
"loss": 10.6617,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 1.7142857142857145e-06,
"loss": 10.6156,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 2.1904761904761908e-06,
"loss": 10.6289,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 2.666666666666667e-06,
"loss": 10.6148,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 3.142857142857143e-06,
"loss": 10.5672,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 3.6190476190476194e-06,
"loss": 10.5367,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 4.095238095238096e-06,
"loss": 10.5062,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 4.571428571428572e-06,
"loss": 10.4492,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 5.047619047619048e-06,
"loss": 10.4242,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 5.523809523809525e-06,
"loss": 10.3461,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 6e-06,
"loss": 10.2758,
"step": 130
},
{
"epoch": 0.02,
"learning_rate": 6.476190476190477e-06,
"loss": 10.2016,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 6.952380952380952e-06,
"loss": 10.132,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 7.428571428571429e-06,
"loss": 10.0852,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 7.904761904761904e-06,
"loss": 10.0258,
"step": 170
},
{
"epoch": 0.02,
"learning_rate": 8.380952380952382e-06,
"loss": 10.0055,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 8.857142857142858e-06,
"loss": 9.9734,
"step": 190
},
{
"epoch": 0.02,
"learning_rate": 9.333333333333334e-06,
"loss": 9.9578,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 9.80952380952381e-06,
"loss": 9.9516,
"step": 210
},
{
"epoch": 0.02,
"learning_rate": 1.0285714285714285e-05,
"loss": 9.9172,
"step": 220
},
{
"epoch": 0.02,
"learning_rate": 1.0761904761904763e-05,
"loss": 9.8992,
"step": 230
},
{
"epoch": 0.03,
"learning_rate": 1.1238095238095239e-05,
"loss": 9.8641,
"step": 240
},
{
"epoch": 0.03,
"learning_rate": 1.1714285714285716e-05,
"loss": 9.8609,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 1.2190476190476192e-05,
"loss": 9.8406,
"step": 260
},
{
"epoch": 0.03,
"learning_rate": 1.2666666666666667e-05,
"loss": 9.7977,
"step": 270
},
{
"epoch": 0.03,
"learning_rate": 1.3142857142857145e-05,
"loss": 9.7937,
"step": 280
},
{
"epoch": 0.03,
"learning_rate": 1.361904761904762e-05,
"loss": 9.7711,
"step": 290
},
{
"epoch": 0.03,
"learning_rate": 1.4095238095238097e-05,
"loss": 9.7352,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 1.4571428571428573e-05,
"loss": 9.7195,
"step": 310
},
{
"epoch": 0.03,
"learning_rate": 1.5047619047619049e-05,
"loss": 9.7,
"step": 320
},
{
"epoch": 0.04,
"learning_rate": 1.5523809523809525e-05,
"loss": 9.6523,
"step": 330
},
{
"epoch": 0.04,
"learning_rate": 1.6000000000000003e-05,
"loss": 9.593,
"step": 340
},
{
"epoch": 0.04,
"learning_rate": 1.6476190476190477e-05,
"loss": 9.5703,
"step": 350
},
{
"epoch": 0.04,
"learning_rate": 1.6952380952380955e-05,
"loss": 9.5336,
"step": 360
},
{
"epoch": 0.04,
"learning_rate": 1.742857142857143e-05,
"loss": 9.5039,
"step": 370
},
{
"epoch": 0.04,
"learning_rate": 1.7904761904761907e-05,
"loss": 9.418,
"step": 380
},
{
"epoch": 0.04,
"learning_rate": 1.838095238095238e-05,
"loss": 9.3305,
"step": 390
},
{
"epoch": 0.04,
"learning_rate": 1.885714285714286e-05,
"loss": 9.3203,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 1.9333333333333333e-05,
"loss": 9.207,
"step": 410
},
{
"epoch": 0.05,
"learning_rate": 1.980952380952381e-05,
"loss": 9.1086,
"step": 420
},
{
"epoch": 0.05,
"learning_rate": 1.9999990325478594e-05,
"loss": 8.9594,
"step": 430
},
{
"epoch": 0.05,
"learning_rate": 1.9999931203471123e-05,
"loss": 8.5867,
"step": 440
},
{
"epoch": 0.05,
"learning_rate": 1.9999818334507674e-05,
"loss": 8.1969,
"step": 450
},
{
"epoch": 0.05,
"learning_rate": 1.9999651719194886e-05,
"loss": 7.7641,
"step": 460
},
{
"epoch": 0.05,
"learning_rate": 1.9999431358428275e-05,
"loss": 7.366,
"step": 470
},
{
"epoch": 0.05,
"learning_rate": 1.999915725339222e-05,
"loss": 7.082,
"step": 480
},
{
"epoch": 0.05,
"learning_rate": 1.9998829405559963e-05,
"loss": 6.9242,
"step": 490
},
{
"epoch": 0.05,
"learning_rate": 1.9998447816693596e-05,
"loss": 6.7367,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 1.999801248884406e-05,
"loss": 6.6734,
"step": 510
},
{
"epoch": 0.06,
"learning_rate": 1.9997523424351122e-05,
"loss": 6.5676,
"step": 520
},
{
"epoch": 0.06,
"learning_rate": 1.999698062584338e-05,
"loss": 6.4836,
"step": 530
},
{
"epoch": 0.06,
"learning_rate": 1.999638409623822e-05,
"loss": 6.4262,
"step": 540
},
{
"epoch": 0.06,
"learning_rate": 1.999573383874184e-05,
"loss": 6.4371,
"step": 550
},
{
"epoch": 0.06,
"learning_rate": 1.9995029856849192e-05,
"loss": 6.3914,
"step": 560
},
{
"epoch": 0.06,
"learning_rate": 1.9994272154343995e-05,
"loss": 6.3473,
"step": 570
},
{
"epoch": 0.06,
"learning_rate": 1.9993460735298695e-05,
"loss": 6.3812,
"step": 580
},
{
"epoch": 0.06,
"learning_rate": 1.9992595604074457e-05,
"loss": 6.3656,
"step": 590
},
{
"epoch": 0.06,
"learning_rate": 1.9991676765321124e-05,
"loss": 6.3387,
"step": 600
},
{
"epoch": 0.07,
"learning_rate": 1.999070422397721e-05,
"loss": 6.2977,
"step": 610
},
{
"epoch": 0.07,
"learning_rate": 1.998967798526987e-05,
"loss": 6.3191,
"step": 620
},
{
"epoch": 0.07,
"learning_rate": 1.9988598054714854e-05,
"loss": 6.2926,
"step": 630
},
{
"epoch": 0.07,
"learning_rate": 1.9987464438116506e-05,
"loss": 6.2914,
"step": 640
},
{
"epoch": 0.07,
"learning_rate": 1.998627714156771e-05,
"loss": 6.2887,
"step": 650
},
{
"epoch": 0.07,
"learning_rate": 1.9985036171449868e-05,
"loss": 6.2414,
"step": 660
},
{
"epoch": 0.07,
"learning_rate": 1.998374153443286e-05,
"loss": 6.2742,
"step": 670
},
{
"epoch": 0.07,
"learning_rate": 1.998239323747502e-05,
"loss": 6.248,
"step": 680
},
{
"epoch": 0.07,
"learning_rate": 1.9980991287823076e-05,
"loss": 6.2629,
"step": 690
},
{
"epoch": 0.08,
"learning_rate": 1.997953569301214e-05,
"loss": 6.2496,
"step": 700
},
{
"epoch": 0.08,
"learning_rate": 1.9978026460865634e-05,
"loss": 6.2461,
"step": 710
},
{
"epoch": 0.08,
"learning_rate": 1.997646359949529e-05,
"loss": 6.2062,
"step": 720
},
{
"epoch": 0.08,
"learning_rate": 1.9974847117301062e-05,
"loss": 6.2539,
"step": 730
},
{
"epoch": 0.08,
"learning_rate": 1.997317702297111e-05,
"loss": 6.2402,
"step": 740
},
{
"epoch": 0.08,
"learning_rate": 1.997145332548175e-05,
"loss": 6.2105,
"step": 750
},
{
"epoch": 0.08,
"learning_rate": 1.9969676034097386e-05,
"loss": 6.1992,
"step": 760
},
{
"epoch": 0.08,
"learning_rate": 1.996784515837049e-05,
"loss": 6.2246,
"step": 770
},
{
"epoch": 0.08,
"learning_rate": 1.9965960708141532e-05,
"loss": 6.2129,
"step": 780
},
{
"epoch": 0.08,
"learning_rate": 1.996402269353892e-05,
"loss": 6.2012,
"step": 790
},
{
"epoch": 0.09,
"learning_rate": 1.9962031124978974e-05,
"loss": 6.1855,
"step": 800
},
{
"epoch": 0.09,
"learning_rate": 1.995998601316583e-05,
"loss": 6.1867,
"step": 810
},
{
"epoch": 0.09,
"learning_rate": 1.9957887369091427e-05,
"loss": 6.232,
"step": 820
},
{
"epoch": 0.09,
"learning_rate": 1.9955735204035412e-05,
"loss": 6.2332,
"step": 830
},
{
"epoch": 0.09,
"learning_rate": 1.9953529529565098e-05,
"loss": 6.1937,
"step": 840
},
{
"epoch": 0.09,
"learning_rate": 1.9951270357535397e-05,
"loss": 6.2062,
"step": 850
},
{
"epoch": 0.09,
"learning_rate": 1.9948957700088747e-05,
"loss": 6.2254,
"step": 860
},
{
"epoch": 0.09,
"learning_rate": 1.9946591569655073e-05,
"loss": 6.2078,
"step": 870
},
{
"epoch": 0.09,
"learning_rate": 1.9944171978951687e-05,
"loss": 6.1742,
"step": 880
},
{
"epoch": 0.1,
"learning_rate": 1.9941698940983243e-05,
"loss": 6.2,
"step": 890
},
{
"epoch": 0.1,
"learning_rate": 1.993917246904166e-05,
"loss": 6.1895,
"step": 900
},
{
"epoch": 0.1,
"learning_rate": 1.9936592576706048e-05,
"loss": 6.1953,
"step": 910
},
{
"epoch": 0.1,
"learning_rate": 1.993395927784264e-05,
"loss": 6.1914,
"step": 920
},
{
"epoch": 0.1,
"learning_rate": 1.9931272586604712e-05,
"loss": 6.1883,
"step": 930
},
{
"epoch": 0.1,
"learning_rate": 1.992853251743251e-05,
"loss": 6.202,
"step": 940
},
{
"epoch": 0.1,
"learning_rate": 1.9925739085053175e-05,
"loss": 6.193,
"step": 950
},
{
"epoch": 0.1,
"learning_rate": 1.9922892304480657e-05,
"loss": 6.177,
"step": 960
},
{
"epoch": 0.1,
"learning_rate": 1.991999219101564e-05,
"loss": 6.1566,
"step": 970
},
{
"epoch": 0.11,
"learning_rate": 1.9917038760245463e-05,
"loss": 6.1863,
"step": 980
},
{
"epoch": 0.11,
"learning_rate": 1.991403202804402e-05,
"loss": 6.1723,
"step": 990
},
{
"epoch": 0.11,
"learning_rate": 1.99109720105717e-05,
"loss": 6.1937,
"step": 1000
},
{
"epoch": 0.11,
"learning_rate": 1.9907858724275272e-05,
"loss": 6.1742,
"step": 1010
},
{
"epoch": 0.11,
"learning_rate": 1.990469218588782e-05,
"loss": 6.1789,
"step": 1020
},
{
"epoch": 0.11,
"learning_rate": 1.990147241242864e-05,
"loss": 6.1598,
"step": 1030
},
{
"epoch": 0.11,
"learning_rate": 1.989819942120315e-05,
"loss": 6.1691,
"step": 1040
},
{
"epoch": 0.11,
"learning_rate": 1.989487322980281e-05,
"loss": 6.1512,
"step": 1050
},
{
"epoch": 0.11,
"learning_rate": 1.9891493856105007e-05,
"loss": 6.1652,
"step": 1060
},
{
"epoch": 0.11,
"learning_rate": 1.988806131827297e-05,
"loss": 6.1574,
"step": 1070
},
{
"epoch": 0.12,
"learning_rate": 1.9884575634755667e-05,
"loss": 6.1645,
"step": 1080
},
{
"epoch": 0.12,
"learning_rate": 1.9881036824287724e-05,
"loss": 6.1684,
"step": 1090
},
{
"epoch": 0.12,
"learning_rate": 1.9877444905889293e-05,
"loss": 6.1473,
"step": 1100
},
{
"epoch": 0.12,
"learning_rate": 1.987379989886598e-05,
"loss": 6.1648,
"step": 1110
},
{
"epoch": 0.12,
"learning_rate": 1.9870101822808717e-05,
"loss": 6.1359,
"step": 1120
},
{
"epoch": 0.12,
"learning_rate": 1.9866350697593682e-05,
"loss": 6.184,
"step": 1130
},
{
"epoch": 0.12,
"learning_rate": 1.9862546543382163e-05,
"loss": 6.1445,
"step": 1140
},
{
"epoch": 0.12,
"learning_rate": 1.985868938062048e-05,
"loss": 6.098,
"step": 1150
},
{
"epoch": 0.12,
"learning_rate": 1.9854779230039838e-05,
"loss": 6.1434,
"step": 1160
},
{
"epoch": 0.13,
"learning_rate": 1.985081611265626e-05,
"loss": 6.157,
"step": 1170
},
{
"epoch": 0.13,
"learning_rate": 1.9846800049770444e-05,
"loss": 6.1484,
"step": 1180
},
{
"epoch": 0.13,
"learning_rate": 1.9842731062967647e-05,
"loss": 6.148,
"step": 1190
},
{
"epoch": 0.13,
"learning_rate": 1.9838609174117588e-05,
"loss": 6.1594,
"step": 1200
},
{
"epoch": 0.13,
"learning_rate": 1.983443440537432e-05,
"loss": 6.1492,
"step": 1210
},
{
"epoch": 0.13,
"learning_rate": 1.9830206779176103e-05,
"loss": 6.1266,
"step": 1220
},
{
"epoch": 0.13,
"learning_rate": 1.9825926318245302e-05,
"loss": 6.1281,
"step": 1230
},
{
"epoch": 0.13,
"learning_rate": 1.982159304558825e-05,
"loss": 6.1383,
"step": 1240
},
{
"epoch": 0.13,
"learning_rate": 1.9817206984495123e-05,
"loss": 6.1152,
"step": 1250
},
{
"epoch": 0.14,
"learning_rate": 1.981276815853983e-05,
"loss": 6.1406,
"step": 1260
},
{
"epoch": 0.14,
"learning_rate": 1.9808276591579875e-05,
"loss": 6.1707,
"step": 1270
},
{
"epoch": 0.14,
"learning_rate": 1.980373230775623e-05,
"loss": 6.143,
"step": 1280
},
{
"epoch": 0.14,
"learning_rate": 1.9799135331493202e-05,
"loss": 6.118,
"step": 1290
},
{
"epoch": 0.14,
"learning_rate": 1.979448568749831e-05,
"loss": 6.1461,
"step": 1300
},
{
"epoch": 0.14,
"learning_rate": 1.9789783400762148e-05,
"loss": 6.1363,
"step": 1310
},
{
"epoch": 0.14,
"learning_rate": 1.9785028496558247e-05,
"loss": 6.1434,
"step": 1320
},
{
"epoch": 0.14,
"learning_rate": 1.978022100044295e-05,
"loss": 6.1383,
"step": 1330
},
{
"epoch": 0.14,
"learning_rate": 1.977536093825526e-05,
"loss": 6.1434,
"step": 1340
},
{
"epoch": 0.14,
"learning_rate": 1.977044833611671e-05,
"loss": 6.1512,
"step": 1350
},
{
"epoch": 0.15,
"learning_rate": 1.9765483220431227e-05,
"loss": 6.1508,
"step": 1360
},
{
"epoch": 0.15,
"learning_rate": 1.9760465617884978e-05,
"loss": 6.1277,
"step": 1370
},
{
"epoch": 0.15,
"learning_rate": 1.9755395555446233e-05,
"loss": 6.1164,
"step": 1380
},
{
"epoch": 0.15,
"learning_rate": 1.9750273060365225e-05,
"loss": 6.1246,
"step": 1390
},
{
"epoch": 0.15,
"learning_rate": 1.974509816017399e-05,
"loss": 6.123,
"step": 1400
},
{
"epoch": 0.15,
"learning_rate": 1.973987088268624e-05,
"loss": 6.1355,
"step": 1410
},
{
"epoch": 0.15,
"learning_rate": 1.973459125599719e-05,
"loss": 6.1129,
"step": 1420
},
{
"epoch": 0.15,
"learning_rate": 1.9729259308483418e-05,
"loss": 6.1164,
"step": 1430
},
{
"epoch": 0.15,
"learning_rate": 1.9723875068802722e-05,
"loss": 6.1363,
"step": 1440
},
{
"epoch": 0.16,
"learning_rate": 1.971843856589395e-05,
"loss": 6.1133,
"step": 1450
},
{
"epoch": 0.16,
"learning_rate": 1.9712949828976844e-05,
"loss": 6.1063,
"step": 1460
},
{
"epoch": 0.16,
"learning_rate": 1.9707408887551906e-05,
"loss": 6.1172,
"step": 1470
},
{
"epoch": 0.16,
"learning_rate": 1.9701815771400206e-05,
"loss": 6.1316,
"step": 1480
},
{
"epoch": 0.16,
"learning_rate": 1.9696170510583255e-05,
"loss": 6.1344,
"step": 1490
},
{
"epoch": 0.16,
"learning_rate": 1.9690473135442815e-05,
"loss": 6.1324,
"step": 1500
},
{
"epoch": 0.16,
"learning_rate": 1.9684723676600758e-05,
"loss": 6.1133,
"step": 1510
},
{
"epoch": 0.16,
"learning_rate": 1.9678922164958886e-05,
"loss": 6.1039,
"step": 1520
},
{
"epoch": 0.16,
"learning_rate": 1.9673068631698773e-05,
"loss": 6.1102,
"step": 1530
},
{
"epoch": 0.17,
"learning_rate": 1.9667163108281594e-05,
"loss": 6.1246,
"step": 1540
},
{
"epoch": 0.17,
"learning_rate": 1.9661205626447954e-05,
"loss": 6.1172,
"step": 1550
},
{
"epoch": 0.17,
"learning_rate": 1.9655196218217734e-05,
"loss": 6.118,
"step": 1560
},
{
"epoch": 0.17,
"learning_rate": 1.9649134915889886e-05,
"loss": 6.1008,
"step": 1570
},
{
"epoch": 0.17,
"learning_rate": 1.96430217520423e-05,
"loss": 6.1324,
"step": 1580
},
{
"epoch": 0.17,
"learning_rate": 1.9636856759531586e-05,
"loss": 6.1219,
"step": 1590
},
{
"epoch": 0.17,
"learning_rate": 1.9630639971492938e-05,
"loss": 6.1375,
"step": 1600
},
{
"epoch": 0.17,
"learning_rate": 1.9624371421339926e-05,
"loss": 6.0957,
"step": 1610
},
{
"epoch": 0.17,
"learning_rate": 1.961805114276433e-05,
"loss": 6.0938,
"step": 1620
},
{
"epoch": 0.18,
"learning_rate": 1.961167916973596e-05,
"loss": 6.1246,
"step": 1630
},
{
"epoch": 0.18,
"learning_rate": 1.9605255536502463e-05,
"loss": 6.1367,
"step": 1640
},
{
"epoch": 0.18,
"learning_rate": 1.959878027758915e-05,
"loss": 6.1066,
"step": 1650
},
{
"epoch": 0.18,
"learning_rate": 1.959225342779881e-05,
"loss": 6.1473,
"step": 1660
},
{
"epoch": 0.18,
"learning_rate": 1.9585675022211514e-05,
"loss": 6.118,
"step": 1670
},
{
"epoch": 0.18,
"learning_rate": 1.9579045096184433e-05,
"loss": 6.1016,
"step": 1680
},
{
"epoch": 0.18,
"learning_rate": 1.9572363685351642e-05,
"loss": 6.116,
"step": 1690
},
{
"epoch": 0.18,
"learning_rate": 1.9565630825623945e-05,
"loss": 6.1184,
"step": 1700
},
{
"epoch": 0.18,
"learning_rate": 1.955884655318865e-05,
"loss": 6.1043,
"step": 1710
},
{
"epoch": 0.18,
"learning_rate": 1.9552010904509424e-05,
"loss": 6.1344,
"step": 1720
},
{
"epoch": 0.19,
"learning_rate": 1.954512391632604e-05,
"loss": 6.1055,
"step": 1730
},
{
"epoch": 0.19,
"learning_rate": 1.9538185625654216e-05,
"loss": 6.0887,
"step": 1740
},
{
"epoch": 0.19,
"learning_rate": 1.9531196069785414e-05,
"loss": 6.107,
"step": 1750
},
{
"epoch": 0.19,
"learning_rate": 1.952415528628663e-05,
"loss": 6.1047,
"step": 1760
},
{
"epoch": 0.19,
"learning_rate": 1.9517063313000184e-05,
"loss": 6.0969,
"step": 1770
},
{
"epoch": 0.19,
"learning_rate": 1.950992018804354e-05,
"loss": 6.1191,
"step": 1780
},
{
"epoch": 0.19,
"learning_rate": 1.9502725949809086e-05,
"loss": 6.1414,
"step": 1790
},
{
"epoch": 0.19,
"learning_rate": 1.949548063696393e-05,
"loss": 6.1145,
"step": 1800
},
{
"epoch": 0.19,
"learning_rate": 1.9488184288449684e-05,
"loss": 6.1063,
"step": 1810
},
{
"epoch": 0.2,
"learning_rate": 1.9480836943482275e-05,
"loss": 6.0973,
"step": 1820
},
{
"epoch": 0.2,
"learning_rate": 1.9473438641551715e-05,
"loss": 6.1227,
"step": 1830
},
{
"epoch": 0.2,
"learning_rate": 1.9465989422421903e-05,
"loss": 6.1027,
"step": 1840
},
{
"epoch": 0.2,
"learning_rate": 1.9458489326130395e-05,
"loss": 6.0707,
"step": 1850
},
{
"epoch": 0.2,
"learning_rate": 1.9450938392988208e-05,
"loss": 6.1297,
"step": 1860
},
{
"epoch": 0.2,
"learning_rate": 1.9443336663579583e-05,
"loss": 6.1074,
"step": 1870
},
{
"epoch": 0.2,
"learning_rate": 1.943568417876178e-05,
"loss": 6.134,
"step": 1880
},
{
"epoch": 0.2,
"learning_rate": 1.942798097966487e-05,
"loss": 6.1004,
"step": 1890
},
{
"epoch": 0.2,
"learning_rate": 1.942022710769148e-05,
"loss": 6.1297,
"step": 1900
},
{
"epoch": 0.21,
"learning_rate": 1.9412422604516594e-05,
"loss": 6.1051,
"step": 1910
},
{
"epoch": 0.21,
"learning_rate": 1.9404567512087338e-05,
"loss": 6.0984,
"step": 1920
},
{
"epoch": 0.21,
"learning_rate": 1.9396661872622728e-05,
"loss": 6.0879,
"step": 1930
},
{
"epoch": 0.21,
"learning_rate": 1.9388705728613465e-05,
"loss": 6.0891,
"step": 1940
},
{
"epoch": 0.21,
"learning_rate": 1.9380699122821698e-05,
"loss": 6.1137,
"step": 1950
},
{
"epoch": 0.21,
"learning_rate": 1.9372642098280785e-05,
"loss": 6.0914,
"step": 1960
},
{
"epoch": 0.21,
"learning_rate": 1.936453469829508e-05,
"loss": 6.1051,
"step": 1970
},
{
"epoch": 0.21,
"learning_rate": 1.9356376966439694e-05,
"loss": 6.1188,
"step": 1980
},
{
"epoch": 0.21,
"learning_rate": 1.934816894656025e-05,
"loss": 6.0949,
"step": 1990
},
{
"epoch": 0.21,
"learning_rate": 1.9339910682772664e-05,
"loss": 6.1266,
"step": 2000
},
{
"epoch": 0.22,
"learning_rate": 1.9331602219462888e-05,
"loss": 6.1113,
"step": 2010
},
{
"epoch": 0.22,
"learning_rate": 1.9323243601286696e-05,
"loss": 6.0949,
"step": 2020
},
{
"epoch": 0.22,
"learning_rate": 1.9314834873169426e-05,
"loss": 6.1105,
"step": 2030
},
{
"epoch": 0.22,
"learning_rate": 1.930637608030574e-05,
"loss": 6.1004,
"step": 2040
},
{
"epoch": 0.22,
"learning_rate": 1.9297867268159393e-05,
"loss": 6.0992,
"step": 2050
},
{
"epoch": 0.22,
"learning_rate": 1.9289308482462964e-05,
"loss": 6.0793,
"step": 2060
},
{
"epoch": 0.22,
"learning_rate": 1.9280699769217647e-05,
"loss": 6.0789,
"step": 2070
},
{
"epoch": 0.22,
"learning_rate": 1.927204117469297e-05,
"loss": 6.1199,
"step": 2080
},
{
"epoch": 0.22,
"learning_rate": 1.9263332745426564e-05,
"loss": 6.1172,
"step": 2090
},
{
"epoch": 0.23,
"learning_rate": 1.9254574528223907e-05,
"loss": 6.1051,
"step": 2100
},
{
"epoch": 0.23,
"learning_rate": 1.9245766570158072e-05,
"loss": 6.1023,
"step": 2110
},
{
"epoch": 0.23,
"learning_rate": 1.9236908918569485e-05,
"loss": 6.0949,
"step": 2120
},
{
"epoch": 0.23,
"learning_rate": 1.9228001621065644e-05,
"loss": 6.0934,
"step": 2130
},
{
"epoch": 0.23,
"learning_rate": 1.92190447255209e-05,
"loss": 6.0762,
"step": 2140
},
{
"epoch": 0.23,
"learning_rate": 1.921003828007617e-05,
"loss": 6.0863,
"step": 2150
},
{
"epoch": 0.23,
"learning_rate": 1.9200982333138684e-05,
"loss": 6.0848,
"step": 2160
},
{
"epoch": 0.23,
"learning_rate": 1.9191876933381742e-05,
"loss": 6.0895,
"step": 2170
},
{
"epoch": 0.23,
"learning_rate": 1.9182722129744426e-05,
"loss": 6.0594,
"step": 2180
},
{
"epoch": 0.24,
"learning_rate": 1.9173517971431362e-05,
"loss": 6.098,
"step": 2190
},
{
"epoch": 0.24,
"learning_rate": 1.916426450791244e-05,
"loss": 6.1074,
"step": 2200
},
{
"epoch": 0.24,
"learning_rate": 1.915496178892255e-05,
"loss": 6.1078,
"step": 2210
},
{
"epoch": 0.24,
"learning_rate": 1.9145609864461317e-05,
"loss": 6.0586,
"step": 2220
},
{
"epoch": 0.24,
"learning_rate": 1.9136208784792838e-05,
"loss": 6.0922,
"step": 2230
},
{
"epoch": 0.24,
"learning_rate": 1.91267586004454e-05,
"loss": 6.1285,
"step": 2240
},
{
"epoch": 0.24,
"learning_rate": 1.9117259362211212e-05,
"loss": 6.0855,
"step": 2250
},
{
"epoch": 0.24,
"learning_rate": 1.9107711121146152e-05,
"loss": 6.084,
"step": 2260
},
{
"epoch": 0.24,
"learning_rate": 1.9098113928569448e-05,
"loss": 6.1105,
"step": 2270
},
{
"epoch": 0.24,
"learning_rate": 1.9088467836063452e-05,
"loss": 6.0988,
"step": 2280
},
{
"epoch": 0.25,
"learning_rate": 1.9078772895473326e-05,
"loss": 6.0832,
"step": 2290
},
{
"epoch": 0.25,
"learning_rate": 1.9069029158906792e-05,
"loss": 6.0965,
"step": 2300
},
{
"epoch": 0.25,
"learning_rate": 1.9059236678733817e-05,
"loss": 6.1094,
"step": 2310
},
{
"epoch": 0.25,
"learning_rate": 1.904939550758637e-05,
"loss": 6.0738,
"step": 2320
},
{
"epoch": 0.25,
"learning_rate": 1.9039505698358116e-05,
"loss": 6.0941,
"step": 2330
},
{
"epoch": 0.25,
"learning_rate": 1.902956730420413e-05,
"loss": 6.1012,
"step": 2340
},
{
"epoch": 0.25,
"learning_rate": 1.9019580378540622e-05,
"loss": 6.0891,
"step": 2350
},
{
"epoch": 0.25,
"learning_rate": 1.9009544975044652e-05,
"loss": 6.1082,
"step": 2360
},
{
"epoch": 0.25,
"learning_rate": 1.8999461147653828e-05,
"loss": 6.1094,
"step": 2370
},
{
"epoch": 0.26,
"learning_rate": 1.8989328950566023e-05,
"loss": 6.077,
"step": 2380
},
{
"epoch": 0.26,
"learning_rate": 1.897914843823909e-05,
"loss": 6.0977,
"step": 2390
},
{
"epoch": 0.26,
"learning_rate": 1.8968919665390556e-05,
"loss": 6.1102,
"step": 2400
},
{
"epoch": 0.26,
"learning_rate": 1.8958642686997348e-05,
"loss": 6.084,
"step": 2410
},
{
"epoch": 0.26,
"learning_rate": 1.8948317558295464e-05,
"loss": 6.0949,
"step": 2420
},
{
"epoch": 0.26,
"learning_rate": 1.893794433477972e-05,
"loss": 6.0895,
"step": 2430
},
{
"epoch": 0.26,
"learning_rate": 1.8927523072203417e-05,
"loss": 6.0824,
"step": 2440
},
{
"epoch": 0.26,
"learning_rate": 1.8917053826578047e-05,
"loss": 6.0855,
"step": 2450
},
{
"epoch": 0.26,
"learning_rate": 1.8906536654173013e-05,
"loss": 6.1,
"step": 2460
},
{
"epoch": 0.27,
"learning_rate": 1.88959716115153e-05,
"loss": 6.0941,
"step": 2470
},
{
"epoch": 0.27,
"learning_rate": 1.8885358755389192e-05,
"loss": 6.0848,
"step": 2480
},
{
"epoch": 0.27,
"learning_rate": 1.887469814283595e-05,
"loss": 6.0926,
"step": 2490
},
{
"epoch": 0.27,
"learning_rate": 1.8863989831153513e-05,
"loss": 6.0637,
"step": 2500
},
{
"epoch": 0.27,
"learning_rate": 1.8853233877896197e-05,
"loss": 6.0922,
"step": 2510
},
{
"epoch": 0.27,
"learning_rate": 1.8842430340874366e-05,
"loss": 6.0785,
"step": 2520
},
{
"epoch": 0.27,
"learning_rate": 1.883157927815415e-05,
"loss": 6.0625,
"step": 2530
},
{
"epoch": 0.27,
"learning_rate": 1.8820680748057113e-05,
"loss": 6.073,
"step": 2540
},
{
"epoch": 0.27,
"learning_rate": 1.880973480915993e-05,
"loss": 6.0723,
"step": 2550
},
{
"epoch": 0.27,
"learning_rate": 1.8798741520294097e-05,
"loss": 6.0812,
"step": 2560
},
{
"epoch": 0.28,
"learning_rate": 1.8787700940545608e-05,
"loss": 6.082,
"step": 2570
},
{
"epoch": 0.28,
"learning_rate": 1.877661312925462e-05,
"loss": 6.0664,
"step": 2580
},
{
"epoch": 0.28,
"learning_rate": 1.8765478146015156e-05,
"loss": 6.0715,
"step": 2590
},
{
"epoch": 0.28,
"learning_rate": 1.8754296050674776e-05,
"loss": 6.0715,
"step": 2600
},
{
"epoch": 0.28,
"learning_rate": 1.8743066903334252e-05,
"loss": 6.0875,
"step": 2610
},
{
"epoch": 0.28,
"learning_rate": 1.873179076434724e-05,
"loss": 6.0754,
"step": 2620
},
{
"epoch": 0.28,
"learning_rate": 1.872046769431998e-05,
"loss": 6.0766,
"step": 2630
},
{
"epoch": 0.28,
"learning_rate": 1.8709097754110945e-05,
"loss": 6.0887,
"step": 2640
},
{
"epoch": 0.28,
"learning_rate": 1.8697681004830514e-05,
"loss": 6.0492,
"step": 2650
},
{
"epoch": 0.29,
"learning_rate": 1.868621750784067e-05,
"loss": 6.0684,
"step": 2660
},
{
"epoch": 0.29,
"learning_rate": 1.8674707324754643e-05,
"loss": 6.0809,
"step": 2670
},
{
"epoch": 0.29,
"learning_rate": 1.8663150517436586e-05,
"loss": 6.0875,
"step": 2680
},
{
"epoch": 0.29,
"learning_rate": 1.865154714800125e-05,
"loss": 6.0891,
"step": 2690
},
{
"epoch": 0.29,
"learning_rate": 1.863989727881364e-05,
"loss": 6.0816,
"step": 2700
},
{
"epoch": 0.29,
"learning_rate": 1.862820097248869e-05,
"loss": 6.082,
"step": 2710
},
{
"epoch": 0.29,
"learning_rate": 1.861645829189092e-05,
"loss": 6.0672,
"step": 2720
},
{
"epoch": 0.29,
"learning_rate": 1.8604669300134094e-05,
"loss": 6.0852,
"step": 2730
},
{
"epoch": 0.29,
"learning_rate": 1.859283406058089e-05,
"loss": 6.0879,
"step": 2740
},
{
"epoch": 0.3,
"learning_rate": 1.8580952636842557e-05,
"loss": 6.0723,
"step": 2750
},
{
"epoch": 0.3,
"learning_rate": 1.856902509277857e-05,
"loss": 6.0891,
"step": 2760
},
{
"epoch": 0.3,
"learning_rate": 1.855705149249629e-05,
"loss": 6.077,
"step": 2770
},
{
"epoch": 0.3,
"learning_rate": 1.854503190035062e-05,
"loss": 6.0484,
"step": 2780
},
{
"epoch": 0.3,
"learning_rate": 1.8532966380943643e-05,
"loss": 6.0797,
"step": 2790
},
{
"epoch": 0.3,
"learning_rate": 1.8520854999124308e-05,
"loss": 6.0738,
"step": 2800
},
{
"epoch": 0.3,
"learning_rate": 1.850869781998805e-05,
"loss": 6.0965,
"step": 2810
},
{
"epoch": 0.3,
"learning_rate": 1.8496494908876454e-05,
"loss": 6.0895,
"step": 2820
},
{
"epoch": 0.3,
"learning_rate": 1.8484246331376908e-05,
"loss": 6.0742,
"step": 2830
},
{
"epoch": 0.3,
"learning_rate": 1.8471952153322237e-05,
"loss": 6.0703,
"step": 2840
},
{
"epoch": 0.31,
"learning_rate": 1.8459612440790364e-05,
"loss": 6.0773,
"step": 2850
},
{
"epoch": 0.31,
"learning_rate": 1.8447227260103942e-05,
"loss": 6.0758,
"step": 2860
},
{
"epoch": 0.31,
"learning_rate": 1.843479667783e-05,
"loss": 6.0645,
"step": 2870
},
{
"epoch": 0.31,
"learning_rate": 1.8422320760779602e-05,
"loss": 6.0914,
"step": 2880
},
{
"epoch": 0.31,
"learning_rate": 1.8409799576007465e-05,
"loss": 6.0863,
"step": 2890
},
{
"epoch": 0.31,
"learning_rate": 1.8397233190811597e-05,
"loss": 6.0805,
"step": 2900
},
{
"epoch": 0.31,
"learning_rate": 1.8384621672732975e-05,
"loss": 6.0824,
"step": 2910
},
{
"epoch": 0.31,
"learning_rate": 1.837196508955512e-05,
"loss": 6.1023,
"step": 2920
},
{
"epoch": 0.31,
"learning_rate": 1.8359263509303792e-05,
"loss": 6.0867,
"step": 2930
},
{
"epoch": 0.32,
"learning_rate": 1.834651700024659e-05,
"loss": 6.107,
"step": 2940
},
{
"epoch": 0.32,
"learning_rate": 1.8333725630892584e-05,
"loss": 6.0941,
"step": 2950
},
{
"epoch": 0.32,
"learning_rate": 1.8320889469991965e-05,
"loss": 6.1039,
"step": 2960
},
{
"epoch": 0.32,
"learning_rate": 1.8308008586535666e-05,
"loss": 6.0805,
"step": 2970
},
{
"epoch": 0.32,
"learning_rate": 1.8295083049754994e-05,
"loss": 6.1336,
"step": 2980
},
{
"epoch": 0.32,
"learning_rate": 1.828211292912125e-05,
"loss": 6.0762,
"step": 2990
},
{
"epoch": 0.32,
"learning_rate": 1.826909829434536e-05,
"loss": 6.0906,
"step": 3000
},
{
"epoch": 0.32,
"learning_rate": 1.82586545833408e-05,
"loss": 6.0547,
"step": 3010
},
{
"epoch": 0.32,
"learning_rate": 1.8245559999544053e-05,
"loss": 6.1059,
"step": 3020
},
{
"epoch": 0.33,
"learning_rate": 1.8232421098067383e-05,
"loss": 6.0973,
"step": 3030
},
{
"epoch": 0.33,
"learning_rate": 1.8219237949528875e-05,
"loss": 6.0801,
"step": 3040
},
{
"epoch": 0.33,
"learning_rate": 1.8206010624784426e-05,
"loss": 6.082,
"step": 3050
},
{
"epoch": 0.33,
"learning_rate": 1.8192739194927366e-05,
"loss": 6.066,
"step": 3060
},
{
"epoch": 0.33,
"learning_rate": 1.8179423731288088e-05,
"loss": 6.0605,
"step": 3070
},
{
"epoch": 0.33,
"learning_rate": 1.816606430543364e-05,
"loss": 6.1137,
"step": 3080
},
{
"epoch": 0.33,
"learning_rate": 1.8152660989167373e-05,
"loss": 6.0871,
"step": 3090
},
{
"epoch": 0.33,
"learning_rate": 1.813921385452852e-05,
"loss": 6.0836,
"step": 3100
},
{
"epoch": 0.33,
"learning_rate": 1.8125722973791836e-05,
"loss": 6.0656,
"step": 3110
},
{
"epoch": 0.33,
"learning_rate": 1.81121884194672e-05,
"loss": 6.093,
"step": 3120
},
{
"epoch": 0.34,
"learning_rate": 1.8098610264299213e-05,
"loss": 6.0535,
"step": 3130
},
{
"epoch": 0.34,
"learning_rate": 1.8084988581266837e-05,
"loss": 6.0766,
"step": 3140
},
{
"epoch": 0.34,
"learning_rate": 1.8071323443582973e-05,
"loss": 6.0719,
"step": 3150
},
{
"epoch": 0.34,
"learning_rate": 1.805761492469408e-05,
"loss": 6.0809,
"step": 3160
},
{
"epoch": 0.34,
"learning_rate": 1.804386309827978e-05,
"loss": 6.0996,
"step": 3170
},
{
"epoch": 0.34,
"learning_rate": 1.803006803825247e-05,
"loss": 6.082,
"step": 3180
},
{
"epoch": 0.34,
"learning_rate": 1.80162298187569e-05,
"loss": 6.0656,
"step": 3190
},
{
"epoch": 0.34,
"learning_rate": 1.8002348514169802e-05,
"loss": 6.0848,
"step": 3200
},
{
"epoch": 0.34,
"learning_rate": 1.7988424199099476e-05,
"loss": 6.0855,
"step": 3210
},
{
"epoch": 0.35,
"learning_rate": 1.797445694838539e-05,
"loss": 6.0918,
"step": 3220
},
{
"epoch": 0.35,
"learning_rate": 1.7960446837097784e-05,
"loss": 6.0789,
"step": 3230
},
{
"epoch": 0.35,
"learning_rate": 1.7946393940537262e-05,
"loss": 6.0563,
"step": 3240
},
{
"epoch": 0.35,
"learning_rate": 1.793229833423438e-05,
"loss": 6.0473,
"step": 3250
},
{
"epoch": 0.35,
"learning_rate": 1.7918160093949254e-05,
"loss": 6.0777,
"step": 3260
},
{
"epoch": 0.35,
"learning_rate": 1.790397929567114e-05,
"loss": 6.077,
"step": 3270
},
{
"epoch": 0.35,
"learning_rate": 1.7889756015618047e-05,
"loss": 6.0852,
"step": 3280
},
{
"epoch": 0.35,
"learning_rate": 1.787549033023629e-05,
"loss": 6.0738,
"step": 3290
},
{
"epoch": 0.35,
"learning_rate": 1.786118231620012e-05,
"loss": 6.0914,
"step": 3300
},
{
"epoch": 0.36,
"learning_rate": 1.784683205041129e-05,
"loss": 6.0785,
"step": 3310
},
{
"epoch": 0.36,
"learning_rate": 1.783243960999863e-05,
"loss": 6.1156,
"step": 3320
},
{
"epoch": 0.36,
"learning_rate": 1.7818005072317665e-05,
"loss": 6.057,
"step": 3330
},
{
"epoch": 0.36,
"learning_rate": 1.7803528514950173e-05,
"loss": 6.0777,
"step": 3340
},
{
"epoch": 0.36,
"learning_rate": 1.778901001570378e-05,
"loss": 6.0508,
"step": 3350
},
{
"epoch": 0.36,
"learning_rate": 1.7774449652611538e-05,
"loss": 6.1152,
"step": 3360
},
{
"epoch": 0.36,
"learning_rate": 1.77598475039315e-05,
"loss": 6.0715,
"step": 3370
},
{
"epoch": 0.36,
"learning_rate": 1.7745203648146314e-05,
"loss": 6.0727,
"step": 3380
},
{
"epoch": 0.36,
"learning_rate": 1.7730518163962788e-05,
"loss": 6.066,
"step": 3390
},
{
"epoch": 0.37,
"learning_rate": 1.771579113031147e-05,
"loss": 6.107,
"step": 3400
},
{
"epoch": 0.37,
"learning_rate": 1.770102262634623e-05,
"loss": 6.1012,
"step": 3410
},
{
"epoch": 0.37,
"learning_rate": 1.768621273144383e-05,
"loss": 6.1059,
"step": 3420
},
{
"epoch": 0.37,
"learning_rate": 1.767136152520349e-05,
"loss": 6.0902,
"step": 3430
},
{
"epoch": 0.37,
"learning_rate": 1.765646908744647e-05,
"loss": 6.0848,
"step": 3440
},
{
"epoch": 0.37,
"learning_rate": 1.7641535498215645e-05,
"loss": 6.0773,
"step": 3450
},
{
"epoch": 0.37,
"learning_rate": 1.7626560837775062e-05,
"loss": 6.073,
"step": 3460
},
{
"epoch": 0.37,
"learning_rate": 1.7611545186609516e-05,
"loss": 6.0805,
"step": 3470
},
{
"epoch": 0.37,
"learning_rate": 1.7596488625424118e-05,
"loss": 6.0848,
"step": 3480
},
{
"epoch": 0.37,
"learning_rate": 1.7581391235143854e-05,
"loss": 6.0793,
"step": 3490
},
{
"epoch": 0.38,
"learning_rate": 1.7566253096913162e-05,
"loss": 6.0805,
"step": 3500
},
{
"epoch": 0.38,
"learning_rate": 1.755107429209549e-05,
"loss": 6.0691,
"step": 3510
},
{
"epoch": 0.38,
"learning_rate": 1.753585490227285e-05,
"loss": 6.0836,
"step": 3520
},
{
"epoch": 0.38,
"learning_rate": 1.7520595009245394e-05,
"loss": 6.0691,
"step": 3530
},
{
"epoch": 0.38,
"learning_rate": 1.7505294695030964e-05,
"loss": 6.0746,
"step": 3540
},
{
"epoch": 0.38,
"learning_rate": 1.748995404186466e-05,
"loss": 6.0852,
"step": 3550
},
{
"epoch": 0.38,
"learning_rate": 1.7474573132198387e-05,
"loss": 6.082,
"step": 3560
},
{
"epoch": 0.38,
"learning_rate": 1.7459152048700423e-05,
"loss": 6.0758,
"step": 3570
},
{
"epoch": 0.38,
"learning_rate": 1.744369087425497e-05,
"loss": 6.0777,
"step": 3580
},
{
"epoch": 0.39,
"learning_rate": 1.7428189691961703e-05,
"loss": 6.077,
"step": 3590
},
{
"epoch": 0.39,
"learning_rate": 1.741264858513533e-05,
"loss": 6.0918,
"step": 3600
},
{
"epoch": 0.39,
"learning_rate": 1.7397067637305153e-05,
"loss": 6.066,
"step": 3610
},
{
"epoch": 0.39,
"learning_rate": 1.7381446932214587e-05,
"loss": 6.0945,
"step": 3620
},
{
"epoch": 0.39,
"learning_rate": 1.7365786553820757e-05,
"loss": 6.075,
"step": 3630
},
{
"epoch": 0.39,
"learning_rate": 1.7350086586293997e-05,
"loss": 6.0746,
"step": 3640
},
{
"epoch": 0.39,
"learning_rate": 1.733434711401744e-05,
"loss": 6.0836,
"step": 3650
},
{
"epoch": 0.39,
"learning_rate": 1.7318568221586543e-05,
"loss": 6.0609,
"step": 3660
},
{
"epoch": 0.39,
"learning_rate": 1.7302749993808626e-05,
"loss": 6.0836,
"step": 3670
},
{
"epoch": 0.4,
"learning_rate": 1.728689251570244e-05,
"loss": 6.0742,
"step": 3680
},
{
"epoch": 0.4,
"learning_rate": 1.7270995872497686e-05,
"loss": 6.1008,
"step": 3690
},
{
"epoch": 0.4,
"learning_rate": 1.725506014963457e-05,
"loss": 6.0758,
"step": 3700
},
{
"epoch": 0.4,
"learning_rate": 1.723908543276334e-05,
"loss": 6.0637,
"step": 3710
},
{
"epoch": 0.4,
"learning_rate": 1.722307180774383e-05,
"loss": 6.0566,
"step": 3720
},
{
"epoch": 0.4,
"learning_rate": 1.720701936064499e-05,
"loss": 6.0754,
"step": 3730
},
{
"epoch": 0.4,
"learning_rate": 1.719092817774443e-05,
"loss": 6.0473,
"step": 3740
},
{
"epoch": 0.4,
"learning_rate": 1.7174798345527953e-05,
"loss": 6.0602,
"step": 3750
},
{
"epoch": 0.4,
"learning_rate": 1.71586299506891e-05,
"loss": 6.0664,
"step": 3760
},
{
"epoch": 0.4,
"learning_rate": 1.7142423080128666e-05,
"loss": 6.0672,
"step": 3770
},
{
"epoch": 0.41,
"learning_rate": 1.7126177820954242e-05,
"loss": 6.0629,
"step": 3780
},
{
"epoch": 0.41,
"learning_rate": 1.710989426047976e-05,
"loss": 6.0539,
"step": 3790
},
{
"epoch": 0.41,
"learning_rate": 1.709357248622499e-05,
"loss": 6.0848,
"step": 3800
},
{
"epoch": 0.41,
"learning_rate": 1.7077212585915118e-05,
"loss": 6.032,
"step": 3810
},
{
"epoch": 0.41,
"learning_rate": 1.7060814647480228e-05,
"loss": 6.0855,
"step": 3820
},
{
"epoch": 0.41,
"learning_rate": 1.7044378759054846e-05,
"loss": 6.0738,
"step": 3830
},
{
"epoch": 0.41,
"learning_rate": 1.702790500897749e-05,
"loss": 6.0582,
"step": 3840
},
{
"epoch": 0.41,
"learning_rate": 1.701139348579015e-05,
"loss": 6.0563,
"step": 3850
},
{
"epoch": 0.41,
"learning_rate": 1.6994844278237857e-05,
"loss": 6.1,
"step": 3860
},
{
"epoch": 0.42,
"learning_rate": 1.6978257475268173e-05,
"loss": 6.0598,
"step": 3870
},
{
"epoch": 0.42,
"learning_rate": 1.6961633166030723e-05,
"loss": 6.0801,
"step": 3880
},
{
"epoch": 0.42,
"learning_rate": 1.6944971439876727e-05,
"loss": 6.0754,
"step": 3890
},
{
"epoch": 0.42,
"learning_rate": 1.692827238635851e-05,
"loss": 6.0941,
"step": 3900
},
{
"epoch": 0.42,
"learning_rate": 1.691153609522901e-05,
"loss": 6.077,
"step": 3910
},
{
"epoch": 0.42,
"learning_rate": 1.6894762656441328e-05,
"loss": 6.0566,
"step": 3920
},
{
"epoch": 0.42,
"learning_rate": 1.6877952160148203e-05,
"loss": 6.0734,
"step": 3930
},
{
"epoch": 0.42,
"learning_rate": 1.686110469670156e-05,
"loss": 6.0445,
"step": 3940
},
{
"epoch": 0.42,
"learning_rate": 1.6844220356652013e-05,
"loss": 6.0613,
"step": 3950
},
{
"epoch": 0.43,
"learning_rate": 1.6827299230748368e-05,
"loss": 6.0809,
"step": 3960
},
{
"epoch": 0.43,
"learning_rate": 1.681034140993716e-05,
"loss": 6.059,
"step": 3970
},
{
"epoch": 0.43,
"learning_rate": 1.6793346985362128e-05,
"loss": 6.0805,
"step": 3980
},
{
"epoch": 0.43,
"learning_rate": 1.677631604836377e-05,
"loss": 6.0687,
"step": 3990
},
{
"epoch": 0.43,
"learning_rate": 1.6759248690478814e-05,
"loss": 6.0637,
"step": 4000
},
{
"epoch": 0.43,
"learning_rate": 1.6745568642770166e-05,
"loss": 6.0637,
"step": 4010
},
{
"epoch": 0.43,
"learning_rate": 1.6728435958586427e-05,
"loss": 6.0512,
"step": 4020
},
{
"epoch": 0.43,
"learning_rate": 1.671126711085877e-05,
"loss": 6.0793,
"step": 4030
},
{
"epoch": 0.43,
"learning_rate": 1.6694062191865163e-05,
"loss": 6.0711,
"step": 4040
},
{
"epoch": 0.43,
"learning_rate": 1.6676821294077435e-05,
"loss": 6.0551,
"step": 4050
},
{
"epoch": 0.44,
"learning_rate": 1.6659544510160808e-05,
"loss": 6.0746,
"step": 4060
},
{
"epoch": 0.44,
"learning_rate": 1.664223193297337e-05,
"loss": 6.0773,
"step": 4070
},
{
"epoch": 0.44,
"learning_rate": 1.6624883655565602e-05,
"loss": 6.0914,
"step": 4080
},
{
"epoch": 0.44,
"learning_rate": 1.6607499771179853e-05,
"loss": 6.0336,
"step": 4090
},
{
"epoch": 0.44,
"learning_rate": 1.6590080373249846e-05,
"loss": 6.0766,
"step": 4100
},
{
"epoch": 0.44,
"learning_rate": 1.6572625555400194e-05,
"loss": 6.068,
"step": 4110
},
{
"epoch": 0.44,
"learning_rate": 1.655513541144587e-05,
"loss": 6.0473,
"step": 4120
},
{
"epoch": 0.44,
"learning_rate": 1.6537610035391726e-05,
"loss": 6.0852,
"step": 4130
},
{
"epoch": 0.44,
"learning_rate": 1.6520049521431966e-05,
"loss": 6.0773,
"step": 4140
},
{
"epoch": 0.45,
"learning_rate": 1.6502453963949662e-05,
"loss": 6.0762,
"step": 4150
},
{
"epoch": 0.45,
"learning_rate": 1.6484823457516225e-05,
"loss": 6.0727,
"step": 4160
},
{
"epoch": 0.45,
"learning_rate": 1.6467158096890915e-05,
"loss": 6.0875,
"step": 4170
},
{
"epoch": 0.45,
"learning_rate": 1.6449457977020315e-05,
"loss": 6.0645,
"step": 4180
},
{
"epoch": 0.45,
"learning_rate": 1.6431723193037847e-05,
"loss": 6.0766,
"step": 4190
},
{
"epoch": 0.45,
"learning_rate": 1.641395384026322e-05,
"loss": 6.0719,
"step": 4200
},
{
"epoch": 0.45,
"learning_rate": 1.6396150014201965e-05,
"loss": 6.0652,
"step": 4210
},
{
"epoch": 0.45,
"learning_rate": 1.6378311810544877e-05,
"loss": 6.0836,
"step": 4220
},
{
"epoch": 0.45,
"learning_rate": 1.6360439325167536e-05,
"loss": 6.0828,
"step": 4230
},
{
"epoch": 0.46,
"learning_rate": 1.6342532654129764e-05,
"loss": 6.0891,
"step": 4240
},
{
"epoch": 0.46,
"learning_rate": 1.632459189367514e-05,
"loss": 6.0687,
"step": 4250
},
{
"epoch": 0.46,
"learning_rate": 1.6306617140230442e-05,
"loss": 6.0883,
"step": 4260
},
{
"epoch": 0.46,
"learning_rate": 1.6288608490405172e-05,
"loss": 6.0848,
"step": 4270
},
{
"epoch": 0.46,
"learning_rate": 1.6270566040991004e-05,
"loss": 6.0578,
"step": 4280
},
{
"epoch": 0.46,
"learning_rate": 1.6252489888961275e-05,
"loss": 6.075,
"step": 4290
},
{
"epoch": 0.46,
"learning_rate": 1.623438013147047e-05,
"loss": 6.073,
"step": 4300
},
{
"epoch": 0.46,
"learning_rate": 1.6216236865853695e-05,
"loss": 6.0512,
"step": 4310
},
{
"epoch": 0.46,
"learning_rate": 1.6198060189626147e-05,
"loss": 6.0789,
"step": 4320
},
{
"epoch": 0.46,
"learning_rate": 1.6179850200482606e-05,
"loss": 6.0766,
"step": 4330
},
{
"epoch": 0.47,
"learning_rate": 1.6161606996296888e-05,
"loss": 6.0711,
"step": 4340
},
{
"epoch": 0.47,
"learning_rate": 1.614333067512134e-05,
"loss": 6.0738,
"step": 4350
},
{
"epoch": 0.47,
"learning_rate": 1.6125021335186295e-05,
"loss": 6.0469,
"step": 4360
},
{
"epoch": 0.47,
"learning_rate": 1.6106679074899565e-05,
"loss": 6.0555,
"step": 4370
},
{
"epoch": 0.47,
"learning_rate": 1.608830399284589e-05,
"loss": 6.0801,
"step": 4380
},
{
"epoch": 0.47,
"learning_rate": 1.6069896187786428e-05,
"loss": 6.0465,
"step": 4390
},
{
"epoch": 0.47,
"learning_rate": 1.6051455758658202e-05,
"loss": 6.0773,
"step": 4400
},
{
"epoch": 0.47,
"learning_rate": 1.603298280457359e-05,
"loss": 6.075,
"step": 4410
},
{
"epoch": 0.47,
"learning_rate": 1.601447742481978e-05,
"loss": 6.0625,
"step": 4420
},
{
"epoch": 0.48,
"learning_rate": 1.5995939718858238e-05,
"loss": 6.0566,
"step": 4430
},
{
"epoch": 0.48,
"learning_rate": 1.5977369786324177e-05,
"loss": 6.0723,
"step": 4440
},
{
"epoch": 0.48,
"learning_rate": 1.595876772702602e-05,
"loss": 6.0676,
"step": 4450
},
{
"epoch": 0.48,
"learning_rate": 1.5940133640944858e-05,
"loss": 6.0609,
"step": 4460
},
{
"epoch": 0.48,
"learning_rate": 1.592146762823392e-05,
"loss": 6.0832,
"step": 4470
},
{
"epoch": 0.48,
"learning_rate": 1.5902769789218037e-05,
"loss": 6.0895,
"step": 4480
},
{
"epoch": 0.48,
"learning_rate": 1.588404022439309e-05,
"loss": 6.0391,
"step": 4490
},
{
"epoch": 0.48,
"learning_rate": 1.5865279034425484e-05,
"loss": 6.0934,
"step": 4500
},
{
"epoch": 0.48,
"learning_rate": 1.5846486320151593e-05,
"loss": 6.0816,
"step": 4510
},
{
"epoch": 0.49,
"learning_rate": 1.5827662182577234e-05,
"loss": 6.0523,
"step": 4520
},
{
"epoch": 0.49,
"learning_rate": 1.580880672287711e-05,
"loss": 6.0605,
"step": 4530
},
{
"epoch": 0.49,
"learning_rate": 1.578992004239428e-05,
"loss": 6.0941,
"step": 4540
},
{
"epoch": 0.49,
"learning_rate": 1.5771002242639595e-05,
"loss": 6.0656,
"step": 4550
},
{
"epoch": 0.49,
"learning_rate": 1.5752053425291173e-05,
"loss": 6.0797,
"step": 4560
},
{
"epoch": 0.49,
"learning_rate": 1.5733073692193833e-05,
"loss": 6.0359,
"step": 4570
},
{
"epoch": 0.49,
"learning_rate": 1.5714063145358577e-05,
"loss": 6.0625,
"step": 4580
},
{
"epoch": 0.49,
"learning_rate": 1.5695021886961998e-05,
"loss": 6.0734,
"step": 4590
},
{
"epoch": 0.49,
"learning_rate": 1.5675950019345775e-05,
"loss": 6.0371,
"step": 4600
},
{
"epoch": 0.49,
"learning_rate": 1.56568476450161e-05,
"loss": 6.075,
"step": 4610
},
{
"epoch": 0.5,
"learning_rate": 1.563771486664311e-05,
"loss": 6.0828,
"step": 4620
},
{
"epoch": 0.5,
"learning_rate": 1.561855178706039e-05,
"loss": 6.0777,
"step": 4630
},
{
"epoch": 0.5,
"learning_rate": 1.5599358509264363e-05,
"loss": 6.082,
"step": 4640
},
{
"epoch": 0.5,
"learning_rate": 1.5580135136413757e-05,
"loss": 6.0879,
"step": 4650
},
{
"epoch": 0.5,
"learning_rate": 1.556088177182907e-05,
"loss": 6.052,
"step": 4660
},
{
"epoch": 0.5,
"learning_rate": 1.5541598518991983e-05,
"loss": 6.0824,
"step": 4670
},
{
"epoch": 0.5,
"learning_rate": 1.552228548154482e-05,
"loss": 6.0477,
"step": 4680
},
{
"epoch": 0.5,
"learning_rate": 1.550294276329e-05,
"loss": 6.0664,
"step": 4690
},
{
"epoch": 0.5,
"learning_rate": 1.5483570468189455e-05,
"loss": 6.0559,
"step": 4700
},
{
"epoch": 0.51,
"learning_rate": 1.5464168700364093e-05,
"loss": 6.0566,
"step": 4710
},
{
"epoch": 0.51,
"learning_rate": 1.544473756409323e-05,
"loss": 6.059,
"step": 4720
},
{
"epoch": 0.51,
"learning_rate": 1.5425277163814017e-05,
"loss": 6.0934,
"step": 4730
},
{
"epoch": 0.51,
"learning_rate": 1.5405787604120915e-05,
"loss": 6.0863,
"step": 4740
},
{
"epoch": 0.51,
"learning_rate": 1.5386268989765085e-05,
"loss": 6.0492,
"step": 4750
},
{
"epoch": 0.51,
"learning_rate": 1.5366721425653867e-05,
"loss": 6.0723,
"step": 4760
},
{
"epoch": 0.51,
"learning_rate": 1.5347145016850183e-05,
"loss": 6.0586,
"step": 4770
},
{
"epoch": 0.51,
"learning_rate": 1.5327539868571998e-05,
"loss": 6.0461,
"step": 4780
},
{
"epoch": 0.51,
"learning_rate": 1.5307906086191744e-05,
"loss": 6.0434,
"step": 4790
},
{
"epoch": 0.52,
"learning_rate": 1.528824377523575e-05,
"loss": 6.1023,
"step": 4800
},
{
"epoch": 0.52,
"learning_rate": 1.5268553041383675e-05,
"loss": 6.0734,
"step": 4810
},
{
"epoch": 0.52,
"learning_rate": 1.524883399046795e-05,
"loss": 6.0918,
"step": 4820
},
{
"epoch": 0.52,
"learning_rate": 1.5229086728473203e-05,
"loss": 6.0586,
"step": 4830
},
{
"epoch": 0.52,
"learning_rate": 1.5209311361535682e-05,
"loss": 6.0738,
"step": 4840
},
{
"epoch": 0.52,
"learning_rate": 1.5189507995942698e-05,
"loss": 6.0602,
"step": 4850
},
{
"epoch": 0.52,
"learning_rate": 1.5169676738132046e-05,
"loss": 6.0789,
"step": 4860
},
{
"epoch": 0.52,
"learning_rate": 1.514981769469143e-05,
"loss": 6.0687,
"step": 4870
},
{
"epoch": 0.52,
"learning_rate": 1.5129930972357902e-05,
"loss": 6.0465,
"step": 4880
},
{
"epoch": 0.53,
"learning_rate": 1.5110016678017277e-05,
"loss": 6.0559,
"step": 4890
},
{
"epoch": 0.53,
"learning_rate": 1.5090074918703553e-05,
"loss": 6.0629,
"step": 4900
},
{
"epoch": 0.53,
"learning_rate": 1.5070105801598363e-05,
"loss": 6.059,
"step": 4910
},
{
"epoch": 0.53,
"learning_rate": 1.5050109434030366e-05,
"loss": 6.0633,
"step": 4920
},
{
"epoch": 0.53,
"learning_rate": 1.503008592347469e-05,
"loss": 6.0691,
"step": 4930
},
{
"epoch": 0.53,
"learning_rate": 1.5010035377552354e-05,
"loss": 6.0527,
"step": 4940
},
{
"epoch": 0.53,
"learning_rate": 1.4989957904029675e-05,
"loss": 6.0695,
"step": 4950
},
{
"epoch": 0.53,
"learning_rate": 1.4969853610817707e-05,
"loss": 6.0676,
"step": 4960
},
{
"epoch": 0.53,
"learning_rate": 1.4949722605971647e-05,
"loss": 6.0664,
"step": 4970
},
{
"epoch": 0.53,
"learning_rate": 1.4929564997690268e-05,
"loss": 6.0586,
"step": 4980
},
{
"epoch": 0.54,
"learning_rate": 1.4909380894315316e-05,
"loss": 6.0492,
"step": 4990
},
{
"epoch": 0.54,
"learning_rate": 1.4889170404330953e-05,
"loss": 6.0539,
"step": 5000
},
{
"epoch": 0.54,
"learning_rate": 1.4872983086976594e-05,
"loss": 6.0746,
"step": 5010
},
{
"epoch": 0.54,
"learning_rate": 1.485272537492769e-05,
"loss": 6.0453,
"step": 5020
},
{
"epoch": 0.54,
"learning_rate": 1.4832441580777642e-05,
"loss": 6.0875,
"step": 5030
},
{
"epoch": 0.54,
"learning_rate": 1.4812131813546416e-05,
"loss": 6.075,
"step": 5040
},
{
"epoch": 0.54,
"learning_rate": 1.4791796182393578e-05,
"loss": 6.077,
"step": 5050
},
{
"epoch": 0.54,
"learning_rate": 1.4771434796617716e-05,
"loss": 6.084,
"step": 5060
},
{
"epoch": 0.54,
"learning_rate": 1.4751047765655824e-05,
"loss": 6.0563,
"step": 5070
},
{
"epoch": 0.55,
"learning_rate": 1.4730635199082746e-05,
"loss": 6.1125,
"step": 5080
},
{
"epoch": 0.55,
"learning_rate": 1.4710197206610567e-05,
"loss": 6.0598,
"step": 5090
},
{
"epoch": 0.55,
"learning_rate": 1.4689733898088032e-05,
"loss": 6.0789,
"step": 5100
},
{
"epoch": 0.55,
"learning_rate": 1.4669245383499952e-05,
"loss": 6.0664,
"step": 5110
},
{
"epoch": 0.55,
"learning_rate": 1.4648731772966613e-05,
"loss": 6.0781,
"step": 5120
},
{
"epoch": 0.55,
"learning_rate": 1.4628193176743185e-05,
"loss": 6.073,
"step": 5130
},
{
"epoch": 0.55,
"learning_rate": 1.460762970521913e-05,
"loss": 6.0598,
"step": 5140
},
{
"epoch": 0.55,
"learning_rate": 1.4587041468917609e-05,
"loss": 6.0699,
"step": 5150
},
{
"epoch": 0.55,
"learning_rate": 1.4566428578494888e-05,
"loss": 6.068,
"step": 5160
},
{
"epoch": 0.56,
"learning_rate": 1.4545791144739737e-05,
"loss": 6.0422,
"step": 5170
},
{
"epoch": 0.56,
"learning_rate": 1.452512927857285e-05,
"loss": 6.0852,
"step": 5180
},
{
"epoch": 0.56,
"learning_rate": 1.4504443091046225e-05,
"loss": 6.0863,
"step": 5190
},
{
"epoch": 0.56,
"learning_rate": 1.448373269334259e-05,
"loss": 6.0699,
"step": 5200
},
{
"epoch": 0.56,
"learning_rate": 1.4462998196774796e-05,
"loss": 6.0621,
"step": 5210
},
{
"epoch": 0.56,
"learning_rate": 1.4442239712785215e-05,
"loss": 6.0805,
"step": 5220
},
{
"epoch": 0.56,
"learning_rate": 1.4421457352945148e-05,
"loss": 6.0809,
"step": 5230
},
{
"epoch": 0.56,
"learning_rate": 1.440065122895422e-05,
"loss": 6.0785,
"step": 5240
},
{
"epoch": 0.56,
"learning_rate": 1.4379821452639789e-05,
"loss": 6.0484,
"step": 5250
},
{
"epoch": 0.56,
"learning_rate": 1.4358968135956323e-05,
"loss": 6.0797,
"step": 5260
},
{
"epoch": 0.57,
"learning_rate": 1.4338091390984831e-05,
"loss": 6.0863,
"step": 5270
},
{
"epoch": 0.57,
"learning_rate": 1.431719132993223e-05,
"loss": 6.0324,
"step": 5280
},
{
"epoch": 0.57,
"learning_rate": 1.4296268065130759e-05,
"loss": 6.0422,
"step": 5290
},
{
"epoch": 0.57,
"learning_rate": 1.4275321709037371e-05,
"loss": 6.0672,
"step": 5300
},
{
"epoch": 0.57,
"learning_rate": 1.425435237423313e-05,
"loss": 6.0785,
"step": 5310
},
{
"epoch": 0.57,
"learning_rate": 1.4233360173422602e-05,
"loss": 6.0828,
"step": 5320
},
{
"epoch": 0.57,
"learning_rate": 1.421234521943325e-05,
"loss": 6.0781,
"step": 5330
},
{
"epoch": 0.57,
"learning_rate": 1.4191307625214834e-05,
"loss": 6.0445,
"step": 5340
},
{
"epoch": 0.57,
"learning_rate": 1.41702475038388e-05,
"loss": 6.0738,
"step": 5350
},
{
"epoch": 0.58,
"learning_rate": 1.4149164968497661e-05,
"loss": 6.0773,
"step": 5360
},
{
"epoch": 0.58,
"learning_rate": 1.4128060132504413e-05,
"loss": 6.0598,
"step": 5370
},
{
"epoch": 0.58,
"learning_rate": 1.4106933109291903e-05,
"loss": 6.0621,
"step": 5380
},
{
"epoch": 0.58,
"learning_rate": 1.4085784012412232e-05,
"loss": 6.0539,
"step": 5390
},
{
"epoch": 0.58,
"learning_rate": 1.4064612955536138e-05,
"loss": 6.0656,
"step": 5400
},
{
"epoch": 0.58,
"learning_rate": 1.4043420052452393e-05,
"loss": 6.0668,
"step": 5410
},
{
"epoch": 0.58,
"learning_rate": 1.4022205417067178e-05,
"loss": 6.098,
"step": 5420
},
{
"epoch": 0.58,
"learning_rate": 1.4000969163403491e-05,
"loss": 6.0449,
"step": 5430
},
{
"epoch": 0.58,
"learning_rate": 1.3979711405600512e-05,
"loss": 6.0687,
"step": 5440
},
{
"epoch": 0.59,
"learning_rate": 1.3958432257913005e-05,
"loss": 6.0707,
"step": 5450
},
{
"epoch": 0.59,
"learning_rate": 1.3937131834710702e-05,
"loss": 6.084,
"step": 5460
},
{
"epoch": 0.59,
"learning_rate": 1.3915810250477679e-05,
"loss": 6.066,
"step": 5470
},
{
"epoch": 0.59,
"learning_rate": 1.3894467619811746e-05,
"loss": 6.0598,
"step": 5480
},
{
"epoch": 0.59,
"learning_rate": 1.387310405742384e-05,
"loss": 6.0687,
"step": 5490
},
{
"epoch": 0.59,
"learning_rate": 1.3851719678137395e-05,
"loss": 6.0605,
"step": 5500
},
{
"epoch": 0.59,
"learning_rate": 1.3830314596887728e-05,
"loss": 6.0625,
"step": 5510
},
{
"epoch": 0.59,
"learning_rate": 1.380888892872143e-05,
"loss": 6.0684,
"step": 5520
},
{
"epoch": 0.59,
"learning_rate": 1.3787442788795733e-05,
"loss": 6.0762,
"step": 5530
},
{
"epoch": 0.59,
"learning_rate": 1.3765976292377907e-05,
"loss": 6.0555,
"step": 5540
},
{
"epoch": 0.6,
"learning_rate": 1.3744489554844633e-05,
"loss": 6.0848,
"step": 5550
},
{
"epoch": 0.6,
"learning_rate": 1.3722982691681372e-05,
"loss": 6.0367,
"step": 5560
},
{
"epoch": 0.6,
"learning_rate": 1.3701455818481767e-05,
"loss": 6.0672,
"step": 5570
},
{
"epoch": 0.6,
"learning_rate": 1.3679909050947003e-05,
"loss": 6.0445,
"step": 5580
},
{
"epoch": 0.6,
"learning_rate": 1.3658342504885192e-05,
"loss": 6.0809,
"step": 5590
},
{
"epoch": 0.6,
"learning_rate": 1.3636756296210751e-05,
"loss": 6.0789,
"step": 5600
},
{
"epoch": 0.6,
"learning_rate": 1.3615150540943785e-05,
"loss": 6.0805,
"step": 5610
},
{
"epoch": 0.6,
"learning_rate": 1.3593525355209444e-05,
"loss": 6.0609,
"step": 5620
},
{
"epoch": 0.6,
"learning_rate": 1.3571880855237324e-05,
"loss": 6.0566,
"step": 5630
},
{
"epoch": 0.61,
"learning_rate": 1.3550217157360816e-05,
"loss": 6.059,
"step": 5640
},
{
"epoch": 0.61,
"learning_rate": 1.3528534378016509e-05,
"loss": 6.0766,
"step": 5650
},
{
"epoch": 0.61,
"learning_rate": 1.3506832633743543e-05,
"loss": 6.0742,
"step": 5660
},
{
"epoch": 0.61,
"learning_rate": 1.3485112041182988e-05,
"loss": 6.0438,
"step": 5670
},
{
"epoch": 0.61,
"learning_rate": 1.3463372717077221e-05,
"loss": 6.0406,
"step": 5680
},
{
"epoch": 0.61,
"learning_rate": 1.3441614778269297e-05,
"loss": 6.0574,
"step": 5690
},
{
"epoch": 0.61,
"learning_rate": 1.3419838341702314e-05,
"loss": 6.0656,
"step": 5700
},
{
"epoch": 0.61,
"learning_rate": 1.33980435244188e-05,
"loss": 6.0535,
"step": 5710
},
{
"epoch": 0.61,
"learning_rate": 1.3376230443560066e-05,
"loss": 6.0488,
"step": 5720
},
{
"epoch": 0.62,
"learning_rate": 1.335439921636559e-05,
"loss": 6.0727,
"step": 5730
},
{
"epoch": 0.62,
"learning_rate": 1.333254996017238e-05,
"loss": 6.1,
"step": 5740
},
{
"epoch": 0.62,
"learning_rate": 1.3310682792414344e-05,
"loss": 6.0625,
"step": 5750
},
{
"epoch": 0.62,
"learning_rate": 1.3288797830621661e-05,
"loss": 6.0465,
"step": 5760
},
{
"epoch": 0.62,
"learning_rate": 1.3266895192420149e-05,
"loss": 6.0668,
"step": 5770
},
{
"epoch": 0.62,
"learning_rate": 1.324497499553063e-05,
"loss": 6.0781,
"step": 5780
},
{
"epoch": 0.62,
"learning_rate": 1.32230373577683e-05,
"loss": 6.0762,
"step": 5790
},
{
"epoch": 0.62,
"learning_rate": 1.3201082397042097e-05,
"loss": 6.0711,
"step": 5800
},
{
"epoch": 0.62,
"learning_rate": 1.3179110231354062e-05,
"loss": 6.0719,
"step": 5810
},
{
"epoch": 0.62,
"learning_rate": 1.315712097879871e-05,
"loss": 6.0715,
"step": 5820
},
{
"epoch": 0.63,
"learning_rate": 1.313511475756239e-05,
"loss": 6.0609,
"step": 5830
},
{
"epoch": 0.63,
"learning_rate": 1.3113091685922663e-05,
"loss": 6.0629,
"step": 5840
},
{
"epoch": 0.63,
"learning_rate": 1.3091051882247646e-05,
"loss": 6.0691,
"step": 5850
},
{
"epoch": 0.63,
"learning_rate": 1.3068995464995387e-05,
"loss": 6.0664,
"step": 5860
},
{
"epoch": 0.63,
"learning_rate": 1.3046922552713232e-05,
"loss": 6.0926,
"step": 5870
},
{
"epoch": 0.63,
"learning_rate": 1.3024833264037185e-05,
"loss": 6.0512,
"step": 5880
},
{
"epoch": 0.63,
"learning_rate": 1.300272771769126e-05,
"loss": 6.0441,
"step": 5890
},
{
"epoch": 0.63,
"learning_rate": 1.2980606032486862e-05,
"loss": 6.0664,
"step": 5900
},
{
"epoch": 0.63,
"learning_rate": 1.295846832732213e-05,
"loss": 6.0781,
"step": 5910
},
{
"epoch": 0.64,
"learning_rate": 1.2936314721181309e-05,
"loss": 6.0445,
"step": 5920
},
{
"epoch": 0.64,
"learning_rate": 1.291414533313411e-05,
"loss": 6.0512,
"step": 5930
},
{
"epoch": 0.64,
"learning_rate": 1.2891960282335063e-05,
"loss": 6.0586,
"step": 5940
},
{
"epoch": 0.64,
"learning_rate": 1.2869759688022882e-05,
"loss": 6.0816,
"step": 5950
},
{
"epoch": 0.64,
"learning_rate": 1.2847543669519828e-05,
"loss": 6.0602,
"step": 5960
},
{
"epoch": 0.64,
"learning_rate": 1.2825312346231058e-05,
"loss": 6.0816,
"step": 5970
},
{
"epoch": 0.64,
"learning_rate": 1.2803065837643987e-05,
"loss": 6.0789,
"step": 5980
},
{
"epoch": 0.64,
"learning_rate": 1.2780804263327653e-05,
"loss": 6.0754,
"step": 5990
},
{
"epoch": 0.64,
"learning_rate": 1.2758527742932061e-05,
"loss": 6.0805,
"step": 6000
},
{
"epoch": 0.65,
"learning_rate": 1.2736236396187554e-05,
"loss": 6.0617,
"step": 6010
},
{
"epoch": 0.65,
"learning_rate": 1.2718392724330404e-05,
"loss": 6.0637,
"step": 6020
},
{
"epoch": 0.65,
"learning_rate": 1.2696074992132255e-05,
"loss": 6.0383,
"step": 6030
},
{
"epoch": 0.65,
"learning_rate": 1.2673742769252024e-05,
"loss": 6.0625,
"step": 6040
},
{
"epoch": 0.65,
"learning_rate": 1.2651396175719437e-05,
"loss": 6.0523,
"step": 6050
},
{
"epoch": 0.65,
"learning_rate": 1.2629035331641457e-05,
"loss": 6.0496,
"step": 6060
},
{
"epoch": 0.65,
"learning_rate": 1.260666035720164e-05,
"loss": 6.0875,
"step": 6070
},
{
"epoch": 0.65,
"learning_rate": 1.2584271372659495e-05,
"loss": 6.0566,
"step": 6080
},
{
"epoch": 0.65,
"learning_rate": 1.256186849834982e-05,
"loss": 6.0273,
"step": 6090
},
{
"epoch": 0.65,
"learning_rate": 1.2539451854682078e-05,
"loss": 6.0637,
"step": 6100
},
{
"epoch": 0.66,
"learning_rate": 1.2517021562139732e-05,
"loss": 6.0664,
"step": 6110
},
{
"epoch": 0.66,
"learning_rate": 1.249457774127961e-05,
"loss": 6.0543,
"step": 6120
},
{
"epoch": 0.66,
"learning_rate": 1.2472120512731239e-05,
"loss": 6.0859,
"step": 6130
},
{
"epoch": 0.66,
"learning_rate": 1.2449649997196223e-05,
"loss": 6.0438,
"step": 6140
},
{
"epoch": 0.66,
"learning_rate": 1.2427166315447572e-05,
"loss": 6.0711,
"step": 6150
},
{
"epoch": 0.66,
"learning_rate": 1.240466958832906e-05,
"loss": 6.0512,
"step": 6160
},
{
"epoch": 0.66,
"learning_rate": 1.2382159936754583e-05,
"loss": 6.0273,
"step": 6170
},
{
"epoch": 0.66,
"learning_rate": 1.2359637481707499e-05,
"loss": 6.0586,
"step": 6180
},
{
"epoch": 0.66,
"learning_rate": 1.233710234423998e-05,
"loss": 6.0613,
"step": 6190
},
{
"epoch": 0.67,
"learning_rate": 1.231455464547236e-05,
"loss": 6.082,
"step": 6200
},
{
"epoch": 0.67,
"learning_rate": 1.2291994506592493e-05,
"loss": 6.0703,
"step": 6210
},
{
"epoch": 0.67,
"learning_rate": 1.2269422048855093e-05,
"loss": 6.1051,
"step": 6220
},
{
"epoch": 0.67,
"learning_rate": 1.2246837393581081e-05,
"loss": 6.066,
"step": 6230
},
{
"epoch": 0.67,
"learning_rate": 1.2224240662156943e-05,
"loss": 6.057,
"step": 6240
},
{
"epoch": 0.67,
"learning_rate": 1.2201631976034064e-05,
"loss": 6.0687,
"step": 6250
},
{
"epoch": 0.67,
"learning_rate": 1.217901145672809e-05,
"loss": 6.0809,
"step": 6260
},
{
"epoch": 0.67,
"learning_rate": 1.2156379225818257e-05,
"loss": 6.0367,
"step": 6270
},
{
"epoch": 0.67,
"learning_rate": 1.213373540494676e-05,
"loss": 6.0555,
"step": 6280
},
{
"epoch": 0.68,
"learning_rate": 1.2111080115818076e-05,
"loss": 6.052,
"step": 6290
},
{
"epoch": 0.68,
"learning_rate": 1.2088413480198326e-05,
"loss": 6.0613,
"step": 6300
},
{
"epoch": 0.68,
"learning_rate": 1.2065735619914618e-05,
"loss": 6.059,
"step": 6310
},
{
"epoch": 0.68,
"learning_rate": 1.2043046656854385e-05,
"loss": 6.0816,
"step": 6320
},
{
"epoch": 0.68,
"learning_rate": 1.2020346712964732e-05,
"loss": 6.0734,
"step": 6330
},
{
"epoch": 0.68,
"learning_rate": 1.1997635910251793e-05,
"loss": 6.0582,
"step": 6340
},
{
"epoch": 0.68,
"learning_rate": 1.1974914370780053e-05,
"loss": 6.0648,
"step": 6350
},
{
"epoch": 0.68,
"learning_rate": 1.1952182216671715e-05,
"loss": 6.0746,
"step": 6360
},
{
"epoch": 0.68,
"learning_rate": 1.1929439570106028e-05,
"loss": 6.0434,
"step": 6370
},
{
"epoch": 0.68,
"learning_rate": 1.1906686553318632e-05,
"loss": 6.084,
"step": 6380
},
{
"epoch": 0.69,
"learning_rate": 1.188392328860091e-05,
"loss": 6.0395,
"step": 6390
},
{
"epoch": 0.69,
"learning_rate": 1.186114989829932e-05,
"loss": 6.0074,
"step": 6400
},
{
"epoch": 0.69,
"learning_rate": 1.1838366504814749e-05,
"loss": 6.0477,
"step": 6410
},
{
"epoch": 0.69,
"learning_rate": 1.181557323060184e-05,
"loss": 6.0625,
"step": 6420
},
{
"epoch": 0.69,
"learning_rate": 1.1792770198168348e-05,
"loss": 6.0648,
"step": 6430
},
{
"epoch": 0.69,
"learning_rate": 1.1769957530074474e-05,
"loss": 6.0895,
"step": 6440
},
{
"epoch": 0.69,
"learning_rate": 1.1747135348932208e-05,
"loss": 6.0687,
"step": 6450
},
{
"epoch": 0.69,
"learning_rate": 1.1724303777404671e-05,
"loss": 6.066,
"step": 6460
},
{
"epoch": 0.69,
"learning_rate": 1.1701462938205455e-05,
"loss": 6.0695,
"step": 6470
},
{
"epoch": 0.7,
"learning_rate": 1.167861295409796e-05,
"loss": 6.059,
"step": 6480
},
{
"epoch": 0.7,
"learning_rate": 1.1655753947894743e-05,
"loss": 6.0645,
"step": 6490
},
{
"epoch": 0.7,
"learning_rate": 1.1632886042456847e-05,
"loss": 6.0977,
"step": 6500
},
{
"epoch": 0.7,
"learning_rate": 1.1610009360693151e-05,
"loss": 6.0563,
"step": 6510
},
{
"epoch": 0.7,
"learning_rate": 1.1587124025559697e-05,
"loss": 6.0621,
"step": 6520
},
{
"epoch": 0.7,
"learning_rate": 1.1564230160059044e-05,
"loss": 6.0812,
"step": 6530
},
{
"epoch": 0.7,
"learning_rate": 1.1541327887239597e-05,
"loss": 6.0484,
"step": 6540
},
{
"epoch": 0.7,
"learning_rate": 1.1518417330194949e-05,
"loss": 6.041,
"step": 6550
},
{
"epoch": 0.7,
"learning_rate": 1.1495498612063212e-05,
"loss": 6.0566,
"step": 6560
},
{
"epoch": 0.71,
"learning_rate": 1.147257185602637e-05,
"loss": 6.0738,
"step": 6570
},
{
"epoch": 0.71,
"learning_rate": 1.144963718530961e-05,
"loss": 6.0492,
"step": 6580
},
{
"epoch": 0.71,
"learning_rate": 1.1426694723180647e-05,
"loss": 6.0906,
"step": 6590
},
{
"epoch": 0.71,
"learning_rate": 1.1403744592949082e-05,
"loss": 6.0648,
"step": 6600
},
{
"epoch": 0.71,
"learning_rate": 1.1380786917965727e-05,
"loss": 6.0492,
"step": 6610
},
{
"epoch": 0.71,
"learning_rate": 1.1357821821621948e-05,
"loss": 6.0648,
"step": 6620
},
{
"epoch": 0.71,
"learning_rate": 1.1334849427348992e-05,
"loss": 6.0504,
"step": 6630
},
{
"epoch": 0.71,
"learning_rate": 1.1311869858617342e-05,
"loss": 6.057,
"step": 6640
},
{
"epoch": 0.71,
"learning_rate": 1.1288883238936026e-05,
"loss": 6.0496,
"step": 6650
},
{
"epoch": 0.72,
"learning_rate": 1.1265889691851981e-05,
"loss": 6.0852,
"step": 6660
},
{
"epoch": 0.72,
"learning_rate": 1.1242889340949376e-05,
"loss": 6.0457,
"step": 6670
},
{
"epoch": 0.72,
"learning_rate": 1.1219882309848945e-05,
"loss": 6.0379,
"step": 6680
},
{
"epoch": 0.72,
"learning_rate": 1.1196868722207323e-05,
"loss": 6.0691,
"step": 6690
},
{
"epoch": 0.72,
"learning_rate": 1.1173848701716396e-05,
"loss": 6.0727,
"step": 6700
},
{
"epoch": 0.72,
"learning_rate": 1.1150822372102612e-05,
"loss": 6.0672,
"step": 6710
},
{
"epoch": 0.72,
"learning_rate": 1.1127789857126341e-05,
"loss": 6.0758,
"step": 6720
},
{
"epoch": 0.72,
"learning_rate": 1.1104751280581187e-05,
"loss": 6.0453,
"step": 6730
},
{
"epoch": 0.72,
"learning_rate": 1.1081706766293339e-05,
"loss": 6.0609,
"step": 6740
},
{
"epoch": 0.72,
"learning_rate": 1.1058656438120898e-05,
"loss": 6.0738,
"step": 6750
},
{
"epoch": 0.73,
"learning_rate": 1.1035600419953216e-05,
"loss": 6.084,
"step": 6760
},
{
"epoch": 0.73,
"learning_rate": 1.1012538835710223e-05,
"loss": 6.0277,
"step": 6770
},
{
"epoch": 0.73,
"learning_rate": 1.0989471809341764e-05,
"loss": 6.0465,
"step": 6780
},
{
"epoch": 0.73,
"learning_rate": 1.0966399464826944e-05,
"loss": 6.0648,
"step": 6790
},
{
"epoch": 0.73,
"learning_rate": 1.0943321926173441e-05,
"loss": 6.0539,
"step": 6800
},
{
"epoch": 0.73,
"learning_rate": 1.0920239317416851e-05,
"loss": 6.0652,
"step": 6810
},
{
"epoch": 0.73,
"learning_rate": 1.0897151762620028e-05,
"loss": 6.0965,
"step": 6820
},
{
"epoch": 0.73,
"learning_rate": 1.0874059385872403e-05,
"loss": 6.0551,
"step": 6830
},
{
"epoch": 0.73,
"learning_rate": 1.0850962311289322e-05,
"loss": 6.0621,
"step": 6840
},
{
"epoch": 0.74,
"learning_rate": 1.082786066301139e-05,
"loss": 6.0477,
"step": 6850
},
{
"epoch": 0.74,
"learning_rate": 1.0804754565203784e-05,
"loss": 6.0488,
"step": 6860
},
{
"epoch": 0.74,
"learning_rate": 1.0781644142055603e-05,
"loss": 6.0551,
"step": 6870
},
{
"epoch": 0.74,
"learning_rate": 1.075852951777919e-05,
"loss": 6.0863,
"step": 6880
},
{
"epoch": 0.74,
"learning_rate": 1.0735410816609468e-05,
"loss": 6.0699,
"step": 6890
},
{
"epoch": 0.74,
"learning_rate": 1.0712288162803278e-05,
"loss": 6.0406,
"step": 6900
},
{
"epoch": 0.74,
"learning_rate": 1.0689161680638692e-05,
"loss": 6.0809,
"step": 6910
},
{
"epoch": 0.74,
"learning_rate": 1.0666031494414377e-05,
"loss": 6.0766,
"step": 6920
},
{
"epoch": 0.74,
"learning_rate": 1.0642897728448893e-05,
"loss": 6.0379,
"step": 6930
},
{
"epoch": 0.75,
"learning_rate": 1.0619760507080045e-05,
"loss": 6.0586,
"step": 6940
},
{
"epoch": 0.75,
"learning_rate": 1.059661995466421e-05,
"loss": 6.0617,
"step": 6950
},
{
"epoch": 0.75,
"learning_rate": 1.0573476195575673e-05,
"loss": 6.1004,
"step": 6960
},
{
"epoch": 0.75,
"learning_rate": 1.0550329354205948e-05,
"loss": 6.0496,
"step": 6970
},
{
"epoch": 0.75,
"learning_rate": 1.0527179554963116e-05,
"loss": 6.057,
"step": 6980
},
{
"epoch": 0.75,
"learning_rate": 1.0504026922271156e-05,
"loss": 6.0441,
"step": 6990
},
{
"epoch": 0.75,
"learning_rate": 1.0480871580569281e-05,
"loss": 6.0758,
"step": 7000
},
{
"epoch": 0.75,
"learning_rate": 1.0457713654311255e-05,
"loss": 6.0684,
"step": 7010
},
{
"epoch": 0.75,
"learning_rate": 1.0439185536066226e-05,
"loss": 6.066,
"step": 7020
},
{
"epoch": 0.75,
"learning_rate": 1.0416023271274547e-05,
"loss": 6.0633,
"step": 7030
},
{
"epoch": 0.76,
"learning_rate": 1.0392858770469041e-05,
"loss": 6.082,
"step": 7040
},
{
"epoch": 0.76,
"learning_rate": 1.0369692158152705e-05,
"loss": 6.0398,
"step": 7050
},
{
"epoch": 0.76,
"learning_rate": 1.0346523558839883e-05,
"loss": 6.0672,
"step": 7060
},
{
"epoch": 0.76,
"learning_rate": 1.0323353097055601e-05,
"loss": 6.0828,
"step": 7070
},
{
"epoch": 0.76,
"learning_rate": 1.0300180897334897e-05,
"loss": 6.05,
"step": 7080
},
{
"epoch": 0.76,
"learning_rate": 1.0277007084222145e-05,
"loss": 6.0773,
"step": 7090
},
{
"epoch": 0.76,
"learning_rate": 1.0253831782270395e-05,
"loss": 6.0387,
"step": 7100
},
{
"epoch": 0.76,
"learning_rate": 1.0230655116040695e-05,
"loss": 6.0566,
"step": 7110
},
{
"epoch": 0.76,
"learning_rate": 1.020747721010143e-05,
"loss": 6.0617,
"step": 7120
},
{
"epoch": 0.77,
"learning_rate": 1.0184298189027648e-05,
"loss": 6.0375,
"step": 7130
},
{
"epoch": 0.77,
"learning_rate": 1.0161118177400384e-05,
"loss": 6.0359,
"step": 7140
},
{
"epoch": 0.77,
"learning_rate": 1.0137937299806005e-05,
"loss": 6.0812,
"step": 7150
},
{
"epoch": 0.77,
"learning_rate": 1.0114755680835524e-05,
"loss": 6.0723,
"step": 7160
},
{
"epoch": 0.77,
"learning_rate": 1.0091573445083951e-05,
"loss": 6.0695,
"step": 7170
},
{
"epoch": 0.77,
"learning_rate": 1.0068390717149597e-05,
"loss": 6.0531,
"step": 7180
},
{
"epoch": 0.77,
"learning_rate": 1.004520762163343e-05,
"loss": 6.0613,
"step": 7190
},
{
"epoch": 0.77,
"learning_rate": 1.0022024283138385e-05,
"loss": 6.0469,
"step": 7200
},
{
"epoch": 0.77,
"learning_rate": 9.998840826268708e-06,
"loss": 6.0887,
"step": 7210
},
{
"epoch": 0.78,
"learning_rate": 9.975657375629279e-06,
"loss": 6.0621,
"step": 7220
},
{
"epoch": 0.78,
"learning_rate": 9.952474055824947e-06,
"loss": 6.084,
"step": 7230
},
{
"epoch": 0.78,
"learning_rate": 9.929290991459855e-06,
"loss": 6.0441,
"step": 7240
},
{
"epoch": 0.78,
"learning_rate": 9.906108307136776e-06,
"loss": 6.0609,
"step": 7250
},
{
"epoch": 0.78,
"learning_rate": 9.882926127456437e-06,
"loss": 6.0621,
"step": 7260
},
{
"epoch": 0.78,
"learning_rate": 9.859744577016852e-06,
"loss": 6.0531,
"step": 7270
},
{
"epoch": 0.78,
"learning_rate": 9.83656378041266e-06,
"loss": 6.0738,
"step": 7280
},
{
"epoch": 0.78,
"learning_rate": 9.813383862234441e-06,
"loss": 6.0812,
"step": 7290
},
{
"epoch": 0.78,
"learning_rate": 9.790204947068054e-06,
"loss": 6.0414,
"step": 7300
},
{
"epoch": 0.78,
"learning_rate": 9.767027159493975e-06,
"loss": 6.0625,
"step": 7310
},
{
"epoch": 0.79,
"learning_rate": 9.743850624086608e-06,
"loss": 6.057,
"step": 7320
},
{
"epoch": 0.79,
"learning_rate": 9.720675465413639e-06,
"loss": 6.0988,
"step": 7330
},
{
"epoch": 0.79,
"learning_rate": 9.697501808035341e-06,
"loss": 6.0418,
"step": 7340
},
{
"epoch": 0.79,
"learning_rate": 9.674329776503932e-06,
"loss": 6.0586,
"step": 7350
},
{
"epoch": 0.79,
"learning_rate": 9.65115949536288e-06,
"loss": 6.0746,
"step": 7360
},
{
"epoch": 0.79,
"learning_rate": 9.627991089146249e-06,
"loss": 6.0832,
"step": 7370
},
{
"epoch": 0.79,
"learning_rate": 9.604824682378032e-06,
"loss": 6.0473,
"step": 7380
},
{
"epoch": 0.79,
"learning_rate": 9.581660399571466e-06,
"loss": 6.0551,
"step": 7390
},
{
"epoch": 0.79,
"learning_rate": 9.558498365228379e-06,
"loss": 6.0707,
"step": 7400
},
{
"epoch": 0.8,
"learning_rate": 9.53533870383851e-06,
"loss": 6.0773,
"step": 7410
},
{
"epoch": 0.8,
"learning_rate": 9.512181539878843e-06,
"loss": 6.0504,
"step": 7420
},
{
"epoch": 0.8,
"learning_rate": 9.489026997812946e-06,
"loss": 6.0781,
"step": 7430
},
{
"epoch": 0.8,
"learning_rate": 9.465875202090288e-06,
"loss": 6.041,
"step": 7440
},
{
"epoch": 0.8,
"learning_rate": 9.442726277145578e-06,
"loss": 6.0574,
"step": 7450
},
{
"epoch": 0.8,
"learning_rate": 9.419580347398102e-06,
"loss": 6.0664,
"step": 7460
},
{
"epoch": 0.8,
"learning_rate": 9.396437537251038e-06,
"loss": 6.0613,
"step": 7470
},
{
"epoch": 0.8,
"learning_rate": 9.373297971090803e-06,
"loss": 6.0637,
"step": 7480
},
{
"epoch": 0.8,
"learning_rate": 9.350161773286377e-06,
"loss": 6.0715,
"step": 7490
},
{
"epoch": 0.81,
"learning_rate": 9.327029068188636e-06,
"loss": 6.0676,
"step": 7500
},
{
"epoch": 0.81,
"learning_rate": 9.303899980129682e-06,
"loss": 6.0578,
"step": 7510
},
{
"epoch": 0.81,
"learning_rate": 9.280774633422183e-06,
"loss": 6.0598,
"step": 7520
},
{
"epoch": 0.81,
"learning_rate": 9.257653152358687e-06,
"loss": 6.0488,
"step": 7530
},
{
"epoch": 0.81,
"learning_rate": 9.234535661210975e-06,
"loss": 6.0543,
"step": 7540
},
{
"epoch": 0.81,
"learning_rate": 9.21142228422938e-06,
"loss": 6.0477,
"step": 7550
},
{
"epoch": 0.81,
"learning_rate": 9.18831314564212e-06,
"loss": 6.0438,
"step": 7560
},
{
"epoch": 0.81,
"learning_rate": 9.165208369654636e-06,
"loss": 6.0848,
"step": 7570
},
{
"epoch": 0.81,
"learning_rate": 9.14210808044892e-06,
"loss": 6.0656,
"step": 7580
},
{
"epoch": 0.81,
"learning_rate": 9.119012402182851e-06,
"loss": 6.0582,
"step": 7590
},
{
"epoch": 0.82,
"learning_rate": 9.09592145898952e-06,
"loss": 6.0762,
"step": 7600
},
{
"epoch": 0.82,
"learning_rate": 9.072835374976573e-06,
"loss": 6.0777,
"step": 7610
},
{
"epoch": 0.82,
"learning_rate": 9.049754274225536e-06,
"loss": 6.0641,
"step": 7620
},
{
"epoch": 0.82,
"learning_rate": 9.026678280791157e-06,
"loss": 6.0488,
"step": 7630
},
{
"epoch": 0.82,
"learning_rate": 9.003607518700726e-06,
"loss": 6.0465,
"step": 7640
},
{
"epoch": 0.82,
"learning_rate": 8.98054211195342e-06,
"loss": 6.0781,
"step": 7650
},
{
"epoch": 0.82,
"learning_rate": 8.957482184519635e-06,
"loss": 6.0633,
"step": 7660
},
{
"epoch": 0.82,
"learning_rate": 8.93442786034031e-06,
"loss": 6.0531,
"step": 7670
},
{
"epoch": 0.82,
"learning_rate": 8.911379263326275e-06,
"loss": 6.0781,
"step": 7680
},
{
"epoch": 0.83,
"learning_rate": 8.888336517357574e-06,
"loss": 6.0863,
"step": 7690
},
{
"epoch": 0.83,
"learning_rate": 8.865299746282805e-06,
"loss": 6.0824,
"step": 7700
},
{
"epoch": 0.83,
"learning_rate": 8.84226907391845e-06,
"loss": 6.0797,
"step": 7710
},
{
"epoch": 0.83,
"learning_rate": 8.819244624048216e-06,
"loss": 6.0871,
"step": 7720
},
{
"epoch": 0.83,
"learning_rate": 8.796226520422364e-06,
"loss": 6.0461,
"step": 7730
},
{
"epoch": 0.83,
"learning_rate": 8.773214886757045e-06,
"loss": 6.0598,
"step": 7740
},
{
"epoch": 0.83,
"learning_rate": 8.750209846733634e-06,
"loss": 6.0531,
"step": 7750
},
{
"epoch": 0.83,
"learning_rate": 8.72721152399807e-06,
"loss": 6.0422,
"step": 7760
},
{
"epoch": 0.83,
"learning_rate": 8.704220042160188e-06,
"loss": 6.0687,
"step": 7770
},
{
"epoch": 0.84,
"learning_rate": 8.681235524793052e-06,
"loss": 6.0547,
"step": 7780
},
{
"epoch": 0.84,
"learning_rate": 8.658258095432302e-06,
"loss": 6.0434,
"step": 7790
},
{
"epoch": 0.84,
"learning_rate": 8.635287877575471e-06,
"loss": 6.0555,
"step": 7800
},
{
"epoch": 0.84,
"learning_rate": 8.61232499468134e-06,
"loss": 6.0824,
"step": 7810
},
{
"epoch": 0.84,
"learning_rate": 8.58936957016926e-06,
"loss": 6.057,
"step": 7820
},
{
"epoch": 0.84,
"learning_rate": 8.566421727418504e-06,
"loss": 6.0461,
"step": 7830
},
{
"epoch": 0.84,
"learning_rate": 8.543481589767586e-06,
"loss": 6.0723,
"step": 7840
},
{
"epoch": 0.84,
"learning_rate": 8.520549280513612e-06,
"loss": 6.0836,
"step": 7850
},
{
"epoch": 0.84,
"learning_rate": 8.497624922911613e-06,
"loss": 6.0816,
"step": 7860
},
{
"epoch": 0.84,
"learning_rate": 8.474708640173878e-06,
"loss": 6.0609,
"step": 7870
},
{
"epoch": 0.85,
"learning_rate": 8.4518005554693e-06,
"loss": 6.0523,
"step": 7880
},
{
"epoch": 0.85,
"learning_rate": 8.428900791922707e-06,
"loss": 6.0352,
"step": 7890
},
{
"epoch": 0.85,
"learning_rate": 8.406009472614205e-06,
"loss": 6.0711,
"step": 7900
},
{
"epoch": 0.85,
"learning_rate": 8.383126720578513e-06,
"loss": 6.0758,
"step": 7910
},
{
"epoch": 0.85,
"learning_rate": 8.360252658804304e-06,
"loss": 6.0695,
"step": 7920
},
{
"epoch": 0.85,
"learning_rate": 8.337387410233544e-06,
"loss": 6.05,
"step": 7930
},
{
"epoch": 0.85,
"learning_rate": 8.314531097760827e-06,
"loss": 6.0324,
"step": 7940
},
{
"epoch": 0.85,
"learning_rate": 8.291683844232721e-06,
"loss": 6.0859,
"step": 7950
},
{
"epoch": 0.85,
"learning_rate": 8.268845772447106e-06,
"loss": 6.0648,
"step": 7960
},
{
"epoch": 0.86,
"learning_rate": 8.246017005152508e-06,
"loss": 6.0598,
"step": 7970
},
{
"epoch": 0.86,
"learning_rate": 8.223197665047447e-06,
"loss": 6.0504,
"step": 7980
},
{
"epoch": 0.86,
"learning_rate": 8.200387874779771e-06,
"loss": 6.0684,
"step": 7990
},
{
"epoch": 0.86,
"learning_rate": 8.177587756946008e-06,
"loss": 6.0723,
"step": 8000
},
{
"epoch": 0.86,
"learning_rate": 8.15479743409069e-06,
"loss": 6.0941,
"step": 8010
},
{
"epoch": 0.86,
"learning_rate": 8.136572310507251e-06,
"loss": 6.073,
"step": 8020
},
{
"epoch": 0.86,
"learning_rate": 8.113799927256902e-06,
"loss": 6.0602,
"step": 8030
},
{
"epoch": 0.86,
"learning_rate": 8.091037681827527e-06,
"loss": 6.0617,
"step": 8040
},
{
"epoch": 0.86,
"learning_rate": 8.068285696560103e-06,
"loss": 6.0793,
"step": 8050
},
{
"epoch": 0.87,
"learning_rate": 8.045544093740472e-06,
"loss": 6.0555,
"step": 8060
},
{
"epoch": 0.87,
"learning_rate": 8.022812995598664e-06,
"loss": 6.0637,
"step": 8070
},
{
"epoch": 0.87,
"learning_rate": 8.000092524308252e-06,
"loss": 6.0867,
"step": 8080
},
{
"epoch": 0.87,
"learning_rate": 7.977382801985697e-06,
"loss": 6.0465,
"step": 8090
},
{
"epoch": 0.87,
"learning_rate": 7.95468395068968e-06,
"loss": 6.0605,
"step": 8100
},
{
"epoch": 0.87,
"learning_rate": 7.931996092420458e-06,
"loss": 6.0438,
"step": 8110
},
{
"epoch": 0.87,
"learning_rate": 7.9093193491192e-06,
"loss": 6.0602,
"step": 8120
},
{
"epoch": 0.87,
"learning_rate": 7.88665384266734e-06,
"loss": 6.0512,
"step": 8130
},
{
"epoch": 0.87,
"learning_rate": 7.863999694885911e-06,
"loss": 6.0555,
"step": 8140
},
{
"epoch": 0.88,
"learning_rate": 7.8413570275349e-06,
"loss": 6.0625,
"step": 8150
},
{
"epoch": 0.88,
"learning_rate": 7.81872596231259e-06,
"loss": 6.0555,
"step": 8160
},
{
"epoch": 0.88,
"learning_rate": 7.796106620854902e-06,
"loss": 6.0285,
"step": 8170
},
{
"epoch": 0.88,
"learning_rate": 7.77349912473475e-06,
"loss": 6.0684,
"step": 8180
},
{
"epoch": 0.88,
"learning_rate": 7.750903595461376e-06,
"loss": 6.0668,
"step": 8190
},
{
"epoch": 0.88,
"learning_rate": 7.728320154479712e-06,
"loss": 6.0996,
"step": 8200
},
{
"epoch": 0.88,
"learning_rate": 7.705748923169711e-06,
"loss": 6.084,
"step": 8210
},
{
"epoch": 0.88,
"learning_rate": 7.683190022845704e-06,
"loss": 6.0563,
"step": 8220
},
{
"epoch": 0.88,
"learning_rate": 7.660643574755751e-06,
"loss": 6.0223,
"step": 8230
},
{
"epoch": 0.88,
"learning_rate": 7.63810970008098e-06,
"loss": 6.0863,
"step": 8240
},
{
"epoch": 0.89,
"learning_rate": 7.615588519934938e-06,
"loss": 6.0617,
"step": 8250
},
{
"epoch": 0.89,
"learning_rate": 7.593080155362949e-06,
"loss": 6.0738,
"step": 8260
},
{
"epoch": 0.89,
"learning_rate": 7.570584727341451e-06,
"loss": 6.1051,
"step": 8270
},
{
"epoch": 0.89,
"learning_rate": 7.548102356777356e-06,
"loss": 6.0566,
"step": 8280
},
{
"epoch": 0.89,
"learning_rate": 7.52563316450739e-06,
"loss": 6.0719,
"step": 8290
},
{
"epoch": 0.89,
"learning_rate": 7.5031772712974575e-06,
"loss": 6.0516,
"step": 8300
},
{
"epoch": 0.89,
"learning_rate": 7.4807347978419754e-06,
"loss": 6.068,
"step": 8310
},
{
"epoch": 0.89,
"learning_rate": 7.458305864763238e-06,
"loss": 6.0652,
"step": 8320
},
{
"epoch": 0.89,
"learning_rate": 7.435890592610764e-06,
"loss": 6.0605,
"step": 8330
},
{
"epoch": 0.9,
"learning_rate": 7.413489101860642e-06,
"loss": 6.0855,
"step": 8340
},
{
"epoch": 0.9,
"learning_rate": 7.3911015129149e-06,
"loss": 6.0824,
"step": 8350
},
{
"epoch": 0.9,
"learning_rate": 7.368727946100837e-06,
"loss": 6.0805,
"step": 8360
},
{
"epoch": 0.9,
"learning_rate": 7.346368521670396e-06,
"loss": 6.0691,
"step": 8370
},
{
"epoch": 0.9,
"learning_rate": 7.3240233597995e-06,
"loss": 6.0656,
"step": 8380
},
{
"epoch": 0.9,
"learning_rate": 7.3016925805874196e-06,
"loss": 6.0633,
"step": 8390
},
{
"epoch": 0.9,
"learning_rate": 7.279376304056121e-06,
"loss": 6.0461,
"step": 8400
},
{
"epoch": 0.9,
"learning_rate": 7.257074650149622e-06,
"loss": 6.0367,
"step": 8410
},
{
"epoch": 0.9,
"learning_rate": 7.234787738733351e-06,
"loss": 6.0711,
"step": 8420
},
{
"epoch": 0.91,
"learning_rate": 7.2125156895934936e-06,
"loss": 6.0535,
"step": 8430
},
{
"epoch": 0.91,
"learning_rate": 7.190258622436359e-06,
"loss": 6.0566,
"step": 8440
},
{
"epoch": 0.91,
"learning_rate": 7.1680166568877304e-06,
"loss": 6.0527,
"step": 8450
},
{
"epoch": 0.91,
"learning_rate": 7.145789912492227e-06,
"loss": 6.0687,
"step": 8460
},
{
"epoch": 0.91,
"learning_rate": 7.123578508712652e-06,
"loss": 6.0715,
"step": 8470
},
{
"epoch": 0.91,
"learning_rate": 7.101382564929365e-06,
"loss": 6.0637,
"step": 8480
},
{
"epoch": 0.91,
"learning_rate": 7.079202200439625e-06,
"loss": 6.0809,
"step": 8490
},
{
"epoch": 0.91,
"learning_rate": 7.057037534456959e-06,
"loss": 6.0855,
"step": 8500
},
{
"epoch": 0.91,
"learning_rate": 7.034888686110523e-06,
"loss": 6.0664,
"step": 8510
},
{
"epoch": 0.91,
"learning_rate": 7.012755774444451e-06,
"loss": 6.0859,
"step": 8520
},
{
"epoch": 0.92,
"learning_rate": 6.990638918417224e-06,
"loss": 6.0438,
"step": 8530
},
{
"epoch": 0.92,
"learning_rate": 6.96853823690103e-06,
"loss": 6.0734,
"step": 8540
},
{
"epoch": 0.92,
"learning_rate": 6.946453848681121e-06,
"loss": 6.0523,
"step": 8550
},
{
"epoch": 0.92,
"learning_rate": 6.9243858724551774e-06,
"loss": 6.0426,
"step": 8560
},
{
"epoch": 0.92,
"learning_rate": 6.902334426832671e-06,
"loss": 6.0676,
"step": 8570
},
{
"epoch": 0.92,
"learning_rate": 6.880299630334224e-06,
"loss": 6.0551,
"step": 8580
},
{
"epoch": 0.92,
"learning_rate": 6.858281601390975e-06,
"loss": 6.0504,
"step": 8590
},
{
"epoch": 0.92,
"learning_rate": 6.83628045834394e-06,
"loss": 6.0348,
"step": 8600
},
{
"epoch": 0.92,
"learning_rate": 6.814296319443383e-06,
"loss": 6.0727,
"step": 8610
},
{
"epoch": 0.93,
"learning_rate": 6.792329302848169e-06,
"loss": 6.0426,
"step": 8620
},
{
"epoch": 0.93,
"learning_rate": 6.770379526625136e-06,
"loss": 6.0582,
"step": 8630
},
{
"epoch": 0.93,
"learning_rate": 6.7484471087484636e-06,
"loss": 6.0605,
"step": 8640
},
{
"epoch": 0.93,
"learning_rate": 6.726532167099034e-06,
"loss": 6.0586,
"step": 8650
},
{
"epoch": 0.93,
"learning_rate": 6.704634819463793e-06,
"loss": 6.0531,
"step": 8660
},
{
"epoch": 0.93,
"learning_rate": 6.682755183535135e-06,
"loss": 6.0531,
"step": 8670
},
{
"epoch": 0.93,
"learning_rate": 6.660893376910244e-06,
"loss": 6.0574,
"step": 8680
},
{
"epoch": 0.93,
"learning_rate": 6.639049517090491e-06,
"loss": 6.0445,
"step": 8690
},
{
"epoch": 0.93,
"learning_rate": 6.6172237214807775e-06,
"loss": 6.0668,
"step": 8700
},
{
"epoch": 0.94,
"learning_rate": 6.595416107388919e-06,
"loss": 6.0625,
"step": 8710
},
{
"epoch": 0.94,
"learning_rate": 6.573626792025009e-06,
"loss": 6.0789,
"step": 8720
},
{
"epoch": 0.94,
"learning_rate": 6.551855892500792e-06,
"loss": 6.0617,
"step": 8730
},
{
"epoch": 0.94,
"learning_rate": 6.530103525829024e-06,
"loss": 6.0758,
"step": 8740
},
{
"epoch": 0.94,
"learning_rate": 6.508369808922869e-06,
"loss": 6.0699,
"step": 8750
},
{
"epoch": 0.94,
"learning_rate": 6.4866548585952384e-06,
"loss": 6.0676,
"step": 8760
},
{
"epoch": 0.94,
"learning_rate": 6.464958791558182e-06,
"loss": 6.0633,
"step": 8770
},
{
"epoch": 0.94,
"learning_rate": 6.443281724422261e-06,
"loss": 6.0492,
"step": 8780
},
{
"epoch": 0.94,
"learning_rate": 6.421623773695913e-06,
"loss": 6.0652,
"step": 8790
},
{
"epoch": 0.94,
"learning_rate": 6.39998505578483e-06,
"loss": 6.0867,
"step": 8800
},
{
"epoch": 0.95,
"learning_rate": 6.3783656869913335e-06,
"loss": 6.0602,
"step": 8810
},
{
"epoch": 0.95,
"learning_rate": 6.356765783513752e-06,
"loss": 6.0492,
"step": 8820
},
{
"epoch": 0.95,
"learning_rate": 6.335185461445787e-06,
"loss": 6.0496,
"step": 8830
},
{
"epoch": 0.95,
"learning_rate": 6.313624836775902e-06,
"loss": 6.066,
"step": 8840
},
{
"epoch": 0.95,
"learning_rate": 6.292084025386685e-06,
"loss": 6.0574,
"step": 8850
},
{
"epoch": 0.95,
"learning_rate": 6.27056314305424e-06,
"loss": 6.0613,
"step": 8860
},
{
"epoch": 0.95,
"learning_rate": 6.249062305447553e-06,
"loss": 6.066,
"step": 8870
},
{
"epoch": 0.95,
"learning_rate": 6.227581628127877e-06,
"loss": 6.0906,
"step": 8880
},
{
"epoch": 0.95,
"learning_rate": 6.206121226548105e-06,
"loss": 6.082,
"step": 8890
},
{
"epoch": 0.96,
"learning_rate": 6.184681216052159e-06,
"loss": 6.091,
"step": 8900
},
{
"epoch": 0.96,
"learning_rate": 6.163261711874365e-06,
"loss": 6.0473,
"step": 8910
},
{
"epoch": 0.96,
"learning_rate": 6.141862829138823e-06,
"loss": 6.0504,
"step": 8920
},
{
"epoch": 0.96,
"learning_rate": 6.120484682858809e-06,
"loss": 6.0543,
"step": 8930
},
{
"epoch": 0.96,
"learning_rate": 6.099127387936141e-06,
"loss": 6.0477,
"step": 8940
},
{
"epoch": 0.96,
"learning_rate": 6.077791059160568e-06,
"loss": 6.0777,
"step": 8950
},
{
"epoch": 0.96,
"learning_rate": 6.056475811209153e-06,
"loss": 6.0684,
"step": 8960
},
{
"epoch": 0.96,
"learning_rate": 6.035181758645652e-06,
"loss": 6.0426,
"step": 8970
},
{
"epoch": 0.96,
"learning_rate": 6.013909015919901e-06,
"loss": 6.0434,
"step": 8980
},
{
"epoch": 0.97,
"learning_rate": 5.992657697367208e-06,
"loss": 6.0734,
"step": 8990
},
{
"epoch": 0.97,
"learning_rate": 5.971427917207722e-06,
"loss": 6.0742,
"step": 9000
},
{
"epoch": 0.97,
"learning_rate": 5.950219789545838e-06,
"loss": 6.073,
"step": 9010
},
{
"epoch": 0.97,
"learning_rate": 5.933268953818473e-06,
"loss": 6.0773,
"step": 9020
},
{
"epoch": 0.97,
"learning_rate": 5.912100087823469e-06,
"loss": 6.0406,
"step": 9030
},
{
"epoch": 0.97,
"learning_rate": 5.890953193197284e-06,
"loss": 6.0602,
"step": 9040
},
{
"epoch": 0.97,
"learning_rate": 5.869828383598818e-06,
"loss": 6.0809,
"step": 9050
},
{
"epoch": 0.97,
"learning_rate": 5.8487257725682756e-06,
"loss": 6.0312,
"step": 9060
},
{
"epoch": 0.97,
"learning_rate": 5.827645473526549e-06,
"loss": 6.0629,
"step": 9070
},
{
"epoch": 0.97,
"learning_rate": 5.806587599774602e-06,
"loss": 6.0398,
"step": 9080
},
{
"epoch": 0.98,
"learning_rate": 5.785552264492884e-06,
"loss": 6.0652,
"step": 9090
},
{
"epoch": 0.98,
"learning_rate": 5.764539580740687e-06,
"loss": 6.0402,
"step": 9100
},
{
"epoch": 0.98,
"learning_rate": 5.7435496614555764e-06,
"loss": 6.0566,
"step": 9110
},
{
"epoch": 0.98,
"learning_rate": 5.722582619452746e-06,
"loss": 6.0563,
"step": 9120
},
{
"epoch": 0.98,
"learning_rate": 5.701638567424447e-06,
"loss": 6.0449,
"step": 9130
},
{
"epoch": 0.98,
"learning_rate": 5.680717617939351e-06,
"loss": 6.0469,
"step": 9140
},
{
"epoch": 0.98,
"learning_rate": 5.659819883441975e-06,
"loss": 6.059,
"step": 9150
},
{
"epoch": 0.98,
"learning_rate": 5.638945476252044e-06,
"loss": 6.0656,
"step": 9160
},
{
"epoch": 0.98,
"learning_rate": 5.618094508563923e-06,
"loss": 6.0656,
"step": 9170
},
{
"epoch": 0.99,
"learning_rate": 5.597267092445979e-06,
"loss": 6.0695,
"step": 9180
},
{
"epoch": 0.99,
"learning_rate": 5.576463339840013e-06,
"loss": 6.034,
"step": 9190
},
{
"epoch": 0.99,
"learning_rate": 5.555683362560622e-06,
"loss": 6.0699,
"step": 9200
},
{
"epoch": 0.99,
"learning_rate": 5.534927272294637e-06,
"loss": 6.0695,
"step": 9210
},
{
"epoch": 0.99,
"learning_rate": 5.5141951806004815e-06,
"loss": 6.043,
"step": 9220
},
{
"epoch": 0.99,
"learning_rate": 5.493487198907615e-06,
"loss": 6.0656,
"step": 9230
},
{
"epoch": 0.99,
"learning_rate": 5.47280343851589e-06,
"loss": 6.0617,
"step": 9240
},
{
"epoch": 0.99,
"learning_rate": 5.452144010594998e-06,
"loss": 6.0727,
"step": 9250
},
{
"epoch": 0.99,
"learning_rate": 5.431509026183831e-06,
"loss": 6.0746,
"step": 9260
},
{
"epoch": 1.0,
"learning_rate": 5.41089859618992e-06,
"loss": 6.0449,
"step": 9270
},
{
"epoch": 1.0,
"learning_rate": 5.390312831388805e-06,
"loss": 6.0375,
"step": 9280
},
{
"epoch": 1.0,
"learning_rate": 5.369751842423474e-06,
"loss": 6.0453,
"step": 9290
},
{
"epoch": 1.0,
"learning_rate": 5.349215739803735e-06,
"loss": 6.0547,
"step": 9300
},
{
"epoch": 1.0,
"learning_rate": 5.328704633905662e-06,
"loss": 6.0473,
"step": 9310
},
{
"epoch": 1.0,
"learning_rate": 5.3082186349709495e-06,
"loss": 6.066,
"step": 9320
},
{
"epoch": 1.0,
"learning_rate": 5.287757853106377e-06,
"loss": 6.0684,
"step": 9330
},
{
"epoch": 1.0,
"learning_rate": 5.267322398283164e-06,
"loss": 6.073,
"step": 9340
},
{
"epoch": 1.0,
"learning_rate": 5.246912380336422e-06,
"loss": 6.0559,
"step": 9350
},
{
"epoch": 1.0,
"learning_rate": 5.226527908964534e-06,
"loss": 6.0539,
"step": 9360
},
{
"epoch": 1.01,
"learning_rate": 5.206169093728588e-06,
"loss": 6.0598,
"step": 9370
},
{
"epoch": 1.01,
"learning_rate": 5.185836044051767e-06,
"loss": 6.0469,
"step": 9380
},
{
"epoch": 1.01,
"learning_rate": 5.165528869218776e-06,
"loss": 6.0734,
"step": 9390
},
{
"epoch": 1.01,
"learning_rate": 5.145247678375251e-06,
"loss": 6.1031,
"step": 9400
},
{
"epoch": 1.01,
"learning_rate": 5.1249925805271686e-06,
"loss": 6.1199,
"step": 9410
},
{
"epoch": 1.01,
"learning_rate": 5.1047636845402594e-06,
"loss": 6.0594,
"step": 9420
},
{
"epoch": 1.01,
"learning_rate": 5.084561099139438e-06,
"loss": 6.0523,
"step": 9430
},
{
"epoch": 1.01,
"learning_rate": 5.064384932908186e-06,
"loss": 6.0441,
"step": 9440
},
{
"epoch": 1.01,
"learning_rate": 5.044235294288014e-06,
"loss": 6.0598,
"step": 9450
},
{
"epoch": 1.02,
"learning_rate": 5.024112291577832e-06,
"loss": 6.0555,
"step": 9460
},
{
"epoch": 1.02,
"learning_rate": 5.004016032933403e-06,
"loss": 6.048,
"step": 9470
},
{
"epoch": 1.02,
"learning_rate": 4.983946626366739e-06,
"loss": 6.0832,
"step": 9480
},
{
"epoch": 1.02,
"learning_rate": 4.963904179745538e-06,
"loss": 6.0852,
"step": 9490
},
{
"epoch": 1.02,
"learning_rate": 4.943888800792586e-06,
"loss": 6.0695,
"step": 9500
},
{
"epoch": 1.02,
"learning_rate": 4.923900597085196e-06,
"loss": 6.0523,
"step": 9510
},
{
"epoch": 1.02,
"learning_rate": 4.903939676054614e-06,
"loss": 6.0629,
"step": 9520
},
{
"epoch": 1.02,
"learning_rate": 4.884006144985457e-06,
"loss": 6.0723,
"step": 9530
},
{
"epoch": 1.02,
"learning_rate": 4.8641001110151185e-06,
"loss": 6.0551,
"step": 9540
},
{
"epoch": 1.03,
"learning_rate": 4.844221681133213e-06,
"loss": 6.0723,
"step": 9550
},
{
"epoch": 1.03,
"learning_rate": 4.82437096218098e-06,
"loss": 6.084,
"step": 9560
},
{
"epoch": 1.03,
"learning_rate": 4.804548060850731e-06,
"loss": 6.0527,
"step": 9570
},
{
"epoch": 1.03,
"learning_rate": 4.784753083685253e-06,
"loss": 6.0852,
"step": 9580
},
{
"epoch": 1.03,
"learning_rate": 4.764986137077261e-06,
"loss": 6.0668,
"step": 9590
},
{
"epoch": 1.03,
"learning_rate": 4.745247327268799e-06,
"loss": 6.0766,
"step": 9600
},
{
"epoch": 1.03,
"learning_rate": 4.725536760350701e-06,
"loss": 6.0684,
"step": 9610
},
{
"epoch": 1.03,
"learning_rate": 4.705854542261983e-06,
"loss": 6.059,
"step": 9620
},
{
"epoch": 1.03,
"learning_rate": 4.686200778789313e-06,
"loss": 6.0395,
"step": 9630
},
{
"epoch": 1.04,
"learning_rate": 4.666575575566405e-06,
"loss": 6.0379,
"step": 9640
},
{
"epoch": 1.04,
"learning_rate": 4.646979038073486e-06,
"loss": 6.0262,
"step": 9650
},
{
"epoch": 1.04,
"learning_rate": 4.627411271636697e-06,
"loss": 6.0336,
"step": 9660
},
{
"epoch": 1.04,
"learning_rate": 4.607872381427557e-06,
"loss": 6.0793,
"step": 9670
},
{
"epoch": 1.04,
"learning_rate": 4.588362472462368e-06,
"loss": 6.0629,
"step": 9680
},
{
"epoch": 1.04,
"learning_rate": 4.568881649601682e-06,
"loss": 6.0723,
"step": 9690
},
{
"epoch": 1.04,
"learning_rate": 4.549430017549703e-06,
"loss": 6.0797,
"step": 9700
},
{
"epoch": 1.04,
"learning_rate": 4.530007680853756e-06,
"loss": 6.0531,
"step": 9710
},
{
"epoch": 1.04,
"learning_rate": 4.51061474390371e-06,
"loss": 6.0703,
"step": 9720
},
{
"epoch": 1.04,
"learning_rate": 4.491251310931407e-06,
"loss": 6.0699,
"step": 9730
},
{
"epoch": 1.05,
"learning_rate": 4.47191748601013e-06,
"loss": 6.0664,
"step": 9740
},
{
"epoch": 1.05,
"learning_rate": 4.45261337305401e-06,
"loss": 6.0828,
"step": 9750
},
{
"epoch": 1.05,
"learning_rate": 4.433339075817498e-06,
"loss": 6.0355,
"step": 9760
},
{
"epoch": 1.05,
"learning_rate": 4.414094697894779e-06,
"loss": 6.0551,
"step": 9770
},
{
"epoch": 1.05,
"learning_rate": 4.394880342719248e-06,
"loss": 6.0555,
"step": 9780
},
{
"epoch": 1.05,
"learning_rate": 4.375696113562915e-06,
"loss": 6.0559,
"step": 9790
},
{
"epoch": 1.05,
"learning_rate": 4.356542113535892e-06,
"loss": 6.0434,
"step": 9800
},
{
"epoch": 1.05,
"learning_rate": 4.337418445585797e-06,
"loss": 6.0785,
"step": 9810
},
{
"epoch": 1.05,
"learning_rate": 4.3183252124972365e-06,
"loss": 6.0602,
"step": 9820
},
{
"epoch": 1.06,
"learning_rate": 4.299262516891235e-06,
"loss": 6.0578,
"step": 9830
},
{
"epoch": 1.06,
"learning_rate": 4.280230461224676e-06,
"loss": 6.0754,
"step": 9840
},
{
"epoch": 1.06,
"learning_rate": 4.261229147789777e-06,
"loss": 6.0383,
"step": 9850
},
{
"epoch": 1.06,
"learning_rate": 4.242258678713509e-06,
"loss": 6.0645,
"step": 9860
},
{
"epoch": 1.06,
"learning_rate": 4.223319155957078e-06,
"loss": 6.0598,
"step": 9870
},
{
"epoch": 1.06,
"learning_rate": 4.204410681315344e-06,
"loss": 6.0629,
"step": 9880
},
{
"epoch": 1.06,
"learning_rate": 4.185533356416311e-06,
"loss": 6.0445,
"step": 9890
},
{
"epoch": 1.06,
"learning_rate": 4.166687282720545e-06,
"loss": 6.0281,
"step": 9900
},
{
"epoch": 1.06,
"learning_rate": 4.147872561520658e-06,
"loss": 6.0914,
"step": 9910
},
{
"epoch": 1.07,
"learning_rate": 4.12908929394074e-06,
"loss": 6.0781,
"step": 9920
},
{
"epoch": 1.07,
"learning_rate": 4.110337580935836e-06,
"loss": 6.0656,
"step": 9930
},
{
"epoch": 1.07,
"learning_rate": 4.091617523291381e-06,
"loss": 6.0648,
"step": 9940
},
{
"epoch": 1.07,
"learning_rate": 4.072929221622689e-06,
"loss": 6.0781,
"step": 9950
},
{
"epoch": 1.07,
"learning_rate": 4.054272776374374e-06,
"loss": 6.0648,
"step": 9960
},
{
"epoch": 1.07,
"learning_rate": 4.035648287819848e-06,
"loss": 6.0438,
"step": 9970
},
{
"epoch": 1.07,
"learning_rate": 4.017055856060748e-06,
"loss": 6.0199,
"step": 9980
},
{
"epoch": 1.07,
"learning_rate": 3.9984955810264315e-06,
"loss": 6.0652,
"step": 9990
},
{
"epoch": 1.07,
"learning_rate": 3.979967562473406e-06,
"loss": 6.0758,
"step": 10000
},
{
"epoch": 1.07,
"learning_rate": 3.961471899984822e-06,
"loss": 6.0762,
"step": 10010
},
{
"epoch": 1.08,
"learning_rate": 3.946698733169213e-06,
"loss": 6.0438,
"step": 10020
},
{
"epoch": 1.08,
"learning_rate": 3.928261561990697e-06,
"loss": 6.0422,
"step": 10030
},
{
"epoch": 1.08,
"learning_rate": 3.909857024782542e-06,
"loss": 6.0523,
"step": 10040
},
{
"epoch": 1.08,
"learning_rate": 3.891485220464221e-06,
"loss": 6.0848,
"step": 10050
},
{
"epoch": 1.08,
"learning_rate": 3.873146247779259e-06,
"loss": 6.077,
"step": 10060
},
{
"epoch": 1.08,
"learning_rate": 3.854840205294741e-06,
"loss": 6.073,
"step": 10070
},
{
"epoch": 1.08,
"learning_rate": 3.83656719140074e-06,
"loss": 6.0477,
"step": 10080
},
{
"epoch": 1.08,
"learning_rate": 3.818327304309827e-06,
"loss": 6.0641,
"step": 10090
},
{
"epoch": 1.08,
"learning_rate": 3.8001206420565093e-06,
"loss": 6.0656,
"step": 10100
},
{
"epoch": 1.09,
"learning_rate": 3.781947302496737e-06,
"loss": 6.0543,
"step": 10110
},
{
"epoch": 1.09,
"learning_rate": 3.763807383307341e-06,
"loss": 6.0824,
"step": 10120
},
{
"epoch": 1.09,
"learning_rate": 3.7457009819855428e-06,
"loss": 6.016,
"step": 10130
},
{
"epoch": 1.09,
"learning_rate": 3.7276281958484018e-06,
"loss": 6.0512,
"step": 10140
},
{
"epoch": 1.09,
"learning_rate": 3.7095891220323166e-06,
"loss": 6.0512,
"step": 10150
},
{
"epoch": 1.09,
"learning_rate": 3.691583857492478e-06,
"loss": 6.0605,
"step": 10160
},
{
"epoch": 1.09,
"learning_rate": 3.673612499002374e-06,
"loss": 6.0547,
"step": 10170
},
{
"epoch": 1.09,
"learning_rate": 3.6556751431532445e-06,
"loss": 6.0281,
"step": 10180
},
{
"epoch": 1.09,
"learning_rate": 3.637771886353587e-06,
"loss": 6.0656,
"step": 10190
},
{
"epoch": 1.1,
"learning_rate": 3.6199028248286116e-06,
"loss": 6.0617,
"step": 10200
},
{
"epoch": 1.1,
"learning_rate": 3.602068054619754e-06,
"loss": 6.048,
"step": 10210
},
{
"epoch": 1.1,
"learning_rate": 3.5842676715841252e-06,
"loss": 6.0641,
"step": 10220
},
{
"epoch": 1.1,
"learning_rate": 3.566501771394032e-06,
"loss": 6.0449,
"step": 10230
},
{
"epoch": 1.1,
"learning_rate": 3.5487704495364294e-06,
"loss": 6.0367,
"step": 10240
},
{
"epoch": 1.1,
"learning_rate": 3.531073801312438e-06,
"loss": 6.0535,
"step": 10250
},
{
"epoch": 1.1,
"learning_rate": 3.5134119218368034e-06,
"loss": 6.0859,
"step": 10260
},
{
"epoch": 1.1,
"learning_rate": 3.495784906037406e-06,
"loss": 6.0684,
"step": 10270
},
{
"epoch": 1.1,
"learning_rate": 3.4781928486547458e-06,
"loss": 6.0687,
"step": 10280
},
{
"epoch": 1.1,
"learning_rate": 3.4606358442414213e-06,
"loss": 6.0574,
"step": 10290
},
{
"epoch": 1.11,
"learning_rate": 3.4431139871616403e-06,
"loss": 6.0543,
"step": 10300
},
{
"epoch": 1.11,
"learning_rate": 3.425627371590695e-06,
"loss": 6.0414,
"step": 10310
},
{
"epoch": 1.11,
"learning_rate": 3.408176091514469e-06,
"loss": 6.0793,
"step": 10320
},
{
"epoch": 1.11,
"learning_rate": 3.39076024072893e-06,
"loss": 6.0566,
"step": 10330
},
{
"epoch": 1.11,
"learning_rate": 3.3733799128396106e-06,
"loss": 6.0461,
"step": 10340
},
{
"epoch": 1.11,
"learning_rate": 3.356035201261133e-06,
"loss": 6.05,
"step": 10350
},
{
"epoch": 1.11,
"learning_rate": 3.3387261992166776e-06,
"loss": 6.0539,
"step": 10360
},
{
"epoch": 1.11,
"learning_rate": 3.3214529997375067e-06,
"loss": 6.048,
"step": 10370
},
{
"epoch": 1.11,
"learning_rate": 3.3042156956624415e-06,
"loss": 6.0641,
"step": 10380
},
{
"epoch": 1.12,
"learning_rate": 3.28701437963739e-06,
"loss": 6.0387,
"step": 10390
},
{
"epoch": 1.12,
"learning_rate": 3.2698491441148183e-06,
"loss": 6.052,
"step": 10400
},
{
"epoch": 1.12,
"learning_rate": 3.2527200813532823e-06,
"loss": 6.0738,
"step": 10410
},
{
"epoch": 1.12,
"learning_rate": 3.2356272834169087e-06,
"loss": 6.0867,
"step": 10420
},
{
"epoch": 1.12,
"learning_rate": 3.2185708421749207e-06,
"loss": 6.0539,
"step": 10430
},
{
"epoch": 1.12,
"learning_rate": 3.2015508493011226e-06,
"loss": 6.0617,
"step": 10440
},
{
"epoch": 1.12,
"learning_rate": 3.1845673962734314e-06,
"loss": 6.0508,
"step": 10450
},
{
"epoch": 1.12,
"learning_rate": 3.167620574373359e-06,
"loss": 6.0344,
"step": 10460
},
{
"epoch": 1.12,
"learning_rate": 3.1507104746855464e-06,
"loss": 6.048,
"step": 10470
},
{
"epoch": 1.13,
"learning_rate": 3.1338371880972506e-06,
"loss": 6.0645,
"step": 10480
},
{
"epoch": 1.13,
"learning_rate": 3.1170008052978827e-06,
"loss": 6.0695,
"step": 10490
},
{
"epoch": 1.13,
"learning_rate": 3.100201416778491e-06,
"loss": 6.0512,
"step": 10500
},
{
"epoch": 1.13,
"learning_rate": 3.0834391128313014e-06,
"loss": 6.0305,
"step": 10510
},
{
"epoch": 1.13,
"learning_rate": 3.06671398354921e-06,
"loss": 6.0707,
"step": 10520
},
{
"epoch": 1.13,
"learning_rate": 3.050026118825319e-06,
"loss": 6.0586,
"step": 10530
},
{
"epoch": 1.13,
"learning_rate": 3.0333756083524335e-06,
"loss": 6.0375,
"step": 10540
},
{
"epoch": 1.13,
"learning_rate": 3.016762541622599e-06,
"loss": 6.0598,
"step": 10550
},
{
"epoch": 1.13,
"learning_rate": 3.0001870079266016e-06,
"loss": 6.048,
"step": 10560
},
{
"epoch": 1.13,
"learning_rate": 2.9836490963535083e-06,
"loss": 6.0422,
"step": 10570
},
{
"epoch": 1.14,
"learning_rate": 2.9671488957901652e-06,
"loss": 6.0711,
"step": 10580
},
{
"epoch": 1.14,
"learning_rate": 2.9506864949207425e-06,
"loss": 6.0664,
"step": 10590
},
{
"epoch": 1.14,
"learning_rate": 2.9342619822262374e-06,
"loss": 6.0543,
"step": 10600
},
{
"epoch": 1.14,
"learning_rate": 2.9178754459840196e-06,
"loss": 6.0805,
"step": 10610
},
{
"epoch": 1.14,
"learning_rate": 2.9015269742673324e-06,
"loss": 6.0648,
"step": 10620
},
{
"epoch": 1.14,
"learning_rate": 2.8852166549448458e-06,
"loss": 6.077,
"step": 10630
},
{
"epoch": 1.14,
"learning_rate": 2.8689445756801583e-06,
"loss": 6.0559,
"step": 10640
},
{
"epoch": 1.14,
"learning_rate": 2.8527108239313506e-06,
"loss": 6.052,
"step": 10650
},
{
"epoch": 1.14,
"learning_rate": 2.8365154869504895e-06,
"loss": 6.0586,
"step": 10660
},
{
"epoch": 1.15,
"learning_rate": 2.820358651783186e-06,
"loss": 6.0676,
"step": 10670
},
{
"epoch": 1.15,
"learning_rate": 2.804240405268108e-06,
"loss": 6.0453,
"step": 10680
},
{
"epoch": 1.15,
"learning_rate": 2.7881608340365176e-06,
"loss": 6.0551,
"step": 10690
},
{
"epoch": 1.15,
"learning_rate": 2.7721200245118128e-06,
"loss": 6.0504,
"step": 10700
},
{
"epoch": 1.15,
"learning_rate": 2.7561180629090513e-06,
"loss": 6.0453,
"step": 10710
},
{
"epoch": 1.15,
"learning_rate": 2.740155035234503e-06,
"loss": 6.0605,
"step": 10720
},
{
"epoch": 1.15,
"learning_rate": 2.7242310272851656e-06,
"loss": 6.0258,
"step": 10730
},
{
"epoch": 1.15,
"learning_rate": 2.7083461246483313e-06,
"loss": 6.0469,
"step": 10740
},
{
"epoch": 1.15,
"learning_rate": 2.692500412701096e-06,
"loss": 6.0734,
"step": 10750
},
{
"epoch": 1.16,
"learning_rate": 2.6766939766099297e-06,
"loss": 6.0605,
"step": 10760
},
{
"epoch": 1.16,
"learning_rate": 2.660926901330194e-06,
"loss": 6.0586,
"step": 10770
},
{
"epoch": 1.16,
"learning_rate": 2.6451992716057096e-06,
"loss": 6.05,
"step": 10780
},
{
"epoch": 1.16,
"learning_rate": 2.629511171968271e-06,
"loss": 6.0668,
"step": 10790
},
{
"epoch": 1.16,
"learning_rate": 2.6138626867372274e-06,
"loss": 6.0887,
"step": 10800
},
{
"epoch": 1.16,
"learning_rate": 2.5982539000189945e-06,
"loss": 6.0582,
"step": 10810
},
{
"epoch": 1.16,
"learning_rate": 2.582684895706632e-06,
"loss": 6.0406,
"step": 10820
},
{
"epoch": 1.16,
"learning_rate": 2.5671557574793703e-06,
"loss": 6.057,
"step": 10830
},
{
"epoch": 1.16,
"learning_rate": 2.5516665688021804e-06,
"loss": 6.0586,
"step": 10840
},
{
"epoch": 1.16,
"learning_rate": 2.5362174129253014e-06,
"loss": 6.0723,
"step": 10850
},
{
"epoch": 1.17,
"learning_rate": 2.520808372883823e-06,
"loss": 6.0563,
"step": 10860
},
{
"epoch": 1.17,
"learning_rate": 2.5054395314972068e-06,
"loss": 6.066,
"step": 10870
},
{
"epoch": 1.17,
"learning_rate": 2.4901109713688686e-06,
"loss": 6.0977,
"step": 10880
},
{
"epoch": 1.17,
"learning_rate": 2.4748227748857235e-06,
"loss": 6.0535,
"step": 10890
},
{
"epoch": 1.17,
"learning_rate": 2.459575024217733e-06,
"loss": 6.0781,
"step": 10900
},
{
"epoch": 1.17,
"learning_rate": 2.4443678013174843e-06,
"loss": 6.0625,
"step": 10910
},
{
"epoch": 1.17,
"learning_rate": 2.4292011879197284e-06,
"loss": 6.0766,
"step": 10920
},
{
"epoch": 1.17,
"learning_rate": 2.4140752655409625e-06,
"loss": 6.0574,
"step": 10930
},
{
"epoch": 1.17,
"learning_rate": 2.3989901154789684e-06,
"loss": 6.0797,
"step": 10940
},
{
"epoch": 1.18,
"learning_rate": 2.3839458188124e-06,
"loss": 6.0488,
"step": 10950
},
{
"epoch": 1.18,
"learning_rate": 2.3689424564003206e-06,
"loss": 6.0523,
"step": 10960
},
{
"epoch": 1.18,
"learning_rate": 2.353980108881799e-06,
"loss": 6.0395,
"step": 10970
},
{
"epoch": 1.18,
"learning_rate": 2.3390588566754457e-06,
"loss": 6.0938,
"step": 10980
},
{
"epoch": 1.18,
"learning_rate": 2.324178779979005e-06,
"loss": 6.0621,
"step": 10990
},
{
"epoch": 1.18,
"learning_rate": 2.309339958768906e-06,
"loss": 6.0187,
"step": 11000
},
{
"epoch": 1.18,
"learning_rate": 2.2945424727998487e-06,
"loss": 6.0496,
"step": 11010
},
{
"epoch": 1.18,
"learning_rate": 2.279786401604359e-06,
"loss": 6.0656,
"step": 11020
},
{
"epoch": 1.18,
"learning_rate": 2.2680114165887057e-06,
"loss": 6.0383,
"step": 11030
},
{
"epoch": 1.19,
"learning_rate": 2.2533300916968257e-06,
"loss": 6.0406,
"step": 11040
},
{
"epoch": 1.19,
"learning_rate": 2.238690403084045e-06,
"loss": 6.0652,
"step": 11050
},
{
"epoch": 1.19,
"learning_rate": 2.2240924294347697e-06,
"loss": 6.0559,
"step": 11060
},
{
"epoch": 1.19,
"learning_rate": 2.2095362492092085e-06,
"loss": 6.043,
"step": 11070
},
{
"epoch": 1.19,
"learning_rate": 2.195021940642934e-06,
"loss": 6.0832,
"step": 11080
},
{
"epoch": 1.19,
"learning_rate": 2.1805495817464773e-06,
"loss": 6.052,
"step": 11090
},
{
"epoch": 1.19,
"learning_rate": 2.1661192503048913e-06,
"loss": 6.0715,
"step": 11100
},
{
"epoch": 1.19,
"learning_rate": 2.151731023877356e-06,
"loss": 6.057,
"step": 11110
},
{
"epoch": 1.19,
"learning_rate": 2.1373849797967326e-06,
"loss": 6.0594,
"step": 11120
},
{
"epoch": 1.19,
"learning_rate": 2.123081195169179e-06,
"loss": 6.0684,
"step": 11130
},
{
"epoch": 1.2,
"learning_rate": 2.108819746873706e-06,
"loss": 6.0496,
"step": 11140
},
{
"epoch": 1.2,
"learning_rate": 2.0946007115617895e-06,
"loss": 6.0328,
"step": 11150
},
{
"epoch": 1.2,
"learning_rate": 2.0804241656569366e-06,
"loss": 6.0742,
"step": 11160
},
{
"epoch": 1.2,
"learning_rate": 2.0662901853542973e-06,
"loss": 6.0895,
"step": 11170
},
{
"epoch": 1.2,
"learning_rate": 2.052198846620228e-06,
"loss": 6.0766,
"step": 11180
},
{
"epoch": 1.2,
"learning_rate": 2.0381502251919127e-06,
"loss": 6.0469,
"step": 11190
},
{
"epoch": 1.2,
"learning_rate": 2.0241443965769293e-06,
"loss": 6.0355,
"step": 11200
},
{
"epoch": 1.2,
"learning_rate": 2.010181436052866e-06,
"loss": 6.0438,
"step": 11210
},
{
"epoch": 1.2,
"learning_rate": 1.996261418666896e-06,
"loss": 6.0629,
"step": 11220
},
{
"epoch": 1.21,
"learning_rate": 1.9823844192353936e-06,
"loss": 6.0422,
"step": 11230
},
{
"epoch": 1.21,
"learning_rate": 1.9685505123435224e-06,
"loss": 6.0852,
"step": 11240
},
{
"epoch": 1.21,
"learning_rate": 1.9547597723448264e-06,
"loss": 6.0723,
"step": 11250
},
{
"epoch": 1.21,
"learning_rate": 1.9410122733608505e-06,
"loss": 6.0531,
"step": 11260
},
{
"epoch": 1.21,
"learning_rate": 1.9273080892807205e-06,
"loss": 6.0398,
"step": 11270
},
{
"epoch": 1.21,
"learning_rate": 1.9136472937607666e-06,
"loss": 6.0648,
"step": 11280
},
{
"epoch": 1.21,
"learning_rate": 1.9000299602241047e-06,
"loss": 6.0777,
"step": 11290
},
{
"epoch": 1.21,
"learning_rate": 1.8864561618602672e-06,
"loss": 6.0738,
"step": 11300
},
{
"epoch": 1.21,
"learning_rate": 1.872925971624785e-06,
"loss": 6.0734,
"step": 11310
},
{
"epoch": 1.22,
"learning_rate": 1.859439462238818e-06,
"loss": 6.0305,
"step": 11320
},
{
"epoch": 1.22,
"learning_rate": 1.8459967061887406e-06,
"loss": 6.0574,
"step": 11330
},
{
"epoch": 1.22,
"learning_rate": 1.8325977757257784e-06,
"loss": 6.0664,
"step": 11340
},
{
"epoch": 1.22,
"learning_rate": 1.8192427428655945e-06,
"loss": 6.0687,
"step": 11350
},
{
"epoch": 1.22,
"learning_rate": 1.8059316793879233e-06,
"loss": 6.0414,
"step": 11360
},
{
"epoch": 1.22,
"learning_rate": 1.792664656836166e-06,
"loss": 6.0719,
"step": 11370
},
{
"epoch": 1.22,
"learning_rate": 1.7794417465170233e-06,
"loss": 6.0297,
"step": 11380
},
{
"epoch": 1.22,
"learning_rate": 1.7662630195001051e-06,
"loss": 6.0582,
"step": 11390
},
{
"epoch": 1.22,
"learning_rate": 1.7531285466175408e-06,
"loss": 6.0719,
"step": 11400
},
{
"epoch": 1.23,
"learning_rate": 1.7400383984636127e-06,
"loss": 6.052,
"step": 11410
},
{
"epoch": 1.23,
"learning_rate": 1.7269926453943619e-06,
"loss": 6.0406,
"step": 11420
},
{
"epoch": 1.23,
"learning_rate": 1.7139913575272282e-06,
"loss": 6.091,
"step": 11430
},
{
"epoch": 1.23,
"learning_rate": 1.7010346047406522e-06,
"loss": 6.0695,
"step": 11440
},
{
"epoch": 1.23,
"learning_rate": 1.6881224566737187e-06,
"loss": 6.0621,
"step": 11450
},
{
"epoch": 1.23,
"learning_rate": 1.6752549827257669e-06,
"loss": 6.048,
"step": 11460
},
{
"epoch": 1.23,
"learning_rate": 1.6624322520560321e-06,
"loss": 6.0758,
"step": 11470
},
{
"epoch": 1.23,
"learning_rate": 1.6496543335832583e-06,
"loss": 6.0355,
"step": 11480
},
{
"epoch": 1.23,
"learning_rate": 1.6369212959853441e-06,
"loss": 6.0785,
"step": 11490
},
{
"epoch": 1.23,
"learning_rate": 1.6242332076989586e-06,
"loss": 6.0641,
"step": 11500
},
{
"epoch": 1.24,
"learning_rate": 1.6115901369191855e-06,
"loss": 6.0555,
"step": 11510
},
{
"epoch": 1.24,
"learning_rate": 1.598992151599147e-06,
"loss": 6.0336,
"step": 11520
},
{
"epoch": 1.24,
"learning_rate": 1.5864393194496474e-06,
"loss": 6.0848,
"step": 11530
},
{
"epoch": 1.24,
"learning_rate": 1.5739317079387994e-06,
"loss": 6.0563,
"step": 11540
},
{
"epoch": 1.24,
"learning_rate": 1.561469384291674e-06,
"loss": 6.0695,
"step": 11550
},
{
"epoch": 1.24,
"learning_rate": 1.5490524154899234e-06,
"loss": 6.0297,
"step": 11560
},
{
"epoch": 1.24,
"learning_rate": 1.5366808682714396e-06,
"loss": 6.0676,
"step": 11570
},
{
"epoch": 1.24,
"learning_rate": 1.5243548091299753e-06,
"loss": 6.0496,
"step": 11580
},
{
"epoch": 1.24,
"learning_rate": 1.5120743043148066e-06,
"loss": 6.0711,
"step": 11590
},
{
"epoch": 1.25,
"learning_rate": 1.4998394198303589e-06,
"loss": 6.1078,
"step": 11600
},
{
"epoch": 1.25,
"learning_rate": 1.4876502214358678e-06,
"loss": 6.0664,
"step": 11610
},
{
"epoch": 1.25,
"learning_rate": 1.4755067746450113e-06,
"loss": 6.0578,
"step": 11620
},
{
"epoch": 1.25,
"learning_rate": 1.4634091447255705e-06,
"loss": 6.0715,
"step": 11630
},
{
"epoch": 1.25,
"learning_rate": 1.4513573966990735e-06,
"loss": 6.0664,
"step": 11640
},
{
"epoch": 1.25,
"learning_rate": 1.439351595340437e-06,
"loss": 6.0563,
"step": 11650
},
{
"epoch": 1.25,
"learning_rate": 1.4273918051776392e-06,
"loss": 6.0371,
"step": 11660
},
{
"epoch": 1.25,
"learning_rate": 1.415478090491348e-06,
"loss": 6.0574,
"step": 11670
},
{
"epoch": 1.25,
"learning_rate": 1.4036105153145996e-06,
"loss": 6.084,
"step": 11680
},
{
"epoch": 1.26,
"learning_rate": 1.3917891434324305e-06,
"loss": 6.0582,
"step": 11690
},
{
"epoch": 1.26,
"learning_rate": 1.3800140383815585e-06,
"loss": 6.0516,
"step": 11700
},
{
"epoch": 1.26,
"learning_rate": 1.368285263450021e-06,
"loss": 6.034,
"step": 11710
},
{
"epoch": 1.26,
"learning_rate": 1.3566028816768494e-06,
"loss": 6.0633,
"step": 11720
},
{
"epoch": 1.26,
"learning_rate": 1.3449669558517187e-06,
"loss": 6.0625,
"step": 11730
},
{
"epoch": 1.26,
"learning_rate": 1.3333775485146217e-06,
"loss": 6.034,
"step": 11740
},
{
"epoch": 1.26,
"learning_rate": 1.3218347219555195e-06,
"loss": 6.0598,
"step": 11750
},
{
"epoch": 1.26,
"learning_rate": 1.3103385382140222e-06,
"loss": 6.0699,
"step": 11760
},
{
"epoch": 1.26,
"learning_rate": 1.2988890590790394e-06,
"loss": 6.0492,
"step": 11770
},
{
"epoch": 1.26,
"learning_rate": 1.2874863460884635e-06,
"loss": 6.048,
"step": 11780
},
{
"epoch": 1.27,
"learning_rate": 1.2761304605288216e-06,
"loss": 6.0598,
"step": 11790
},
{
"epoch": 1.27,
"learning_rate": 1.2648214634349688e-06,
"loss": 6.073,
"step": 11800
},
{
"epoch": 1.27,
"learning_rate": 1.2535594155897346e-06,
"loss": 6.0625,
"step": 11810
},
{
"epoch": 1.27,
"learning_rate": 1.2423443775236177e-06,
"loss": 6.0449,
"step": 11820
},
{
"epoch": 1.27,
"learning_rate": 1.231176409514445e-06,
"loss": 6.0402,
"step": 11830
},
{
"epoch": 1.27,
"learning_rate": 1.2200555715870631e-06,
"loss": 6.0574,
"step": 11840
},
{
"epoch": 1.27,
"learning_rate": 1.2089819235129964e-06,
"loss": 6.084,
"step": 11850
},
{
"epoch": 1.27,
"learning_rate": 1.197955524810146e-06,
"loss": 6.0508,
"step": 11860
},
{
"epoch": 1.27,
"learning_rate": 1.186976434742454e-06,
"loss": 6.0547,
"step": 11870
},
{
"epoch": 1.28,
"learning_rate": 1.176044712319595e-06,
"loss": 6.0789,
"step": 11880
},
{
"epoch": 1.28,
"learning_rate": 1.1651604162966511e-06,
"loss": 6.0516,
"step": 11890
},
{
"epoch": 1.28,
"learning_rate": 1.154323605173806e-06,
"loss": 6.0617,
"step": 11900
},
{
"epoch": 1.28,
"learning_rate": 1.1435343371960183e-06,
"loss": 6.0625,
"step": 11910
},
{
"epoch": 1.28,
"learning_rate": 1.1327926703527203e-06,
"loss": 6.0047,
"step": 11920
},
{
"epoch": 1.28,
"learning_rate": 1.1220986623774966e-06,
"loss": 6.0684,
"step": 11930
},
{
"epoch": 1.28,
"learning_rate": 1.1114523707477809e-06,
"loss": 6.0711,
"step": 11940
},
{
"epoch": 1.28,
"learning_rate": 1.1008538526845468e-06,
"loss": 6.0625,
"step": 11950
},
{
"epoch": 1.28,
"learning_rate": 1.0903031651519902e-06,
"loss": 6.0687,
"step": 11960
},
{
"epoch": 1.29,
"learning_rate": 1.0798003648572387e-06,
"loss": 6.0473,
"step": 11970
},
{
"epoch": 1.29,
"learning_rate": 1.0693455082500303e-06,
"loss": 6.0625,
"step": 11980
},
{
"epoch": 1.29,
"learning_rate": 1.0589386515224286e-06,
"loss": 6.052,
"step": 11990
},
{
"epoch": 1.29,
"learning_rate": 1.048579850608502e-06,
"loss": 6.0332,
"step": 12000
},
{
"epoch": 1.29,
"learning_rate": 1.0382691611840367e-06,
"loss": 6.059,
"step": 12010
},
{
"epoch": 1.29,
"learning_rate": 1.0280066386662303e-06,
"loss": 6.0367,
"step": 12020
},
{
"epoch": 1.29,
"learning_rate": 1.0198313378998736e-06,
"loss": 6.0578,
"step": 12030
},
{
"epoch": 1.29,
"learning_rate": 1.009655654638846e-06,
"loss": 6.0637,
"step": 12040
},
{
"epoch": 1.29,
"learning_rate": 9.995282920744242e-07,
"loss": 6.0613,
"step": 12050
},
{
"epoch": 1.29,
"learning_rate": 9.894493046384711e-07,
"loss": 6.0344,
"step": 12060
},
{
"epoch": 1.3,
"learning_rate": 9.794187465028527e-07,
"loss": 6.0652,
"step": 12070
},
{
"epoch": 1.3,
"learning_rate": 9.694366715791327e-07,
"loss": 6.0383,
"step": 12080
},
{
"epoch": 1.3,
"learning_rate": 9.595031335182968e-07,
"loss": 6.0664,
"step": 12090
},
{
"epoch": 1.3,
"learning_rate": 9.496181857104536e-07,
"loss": 6.0648,
"step": 12100
},
{
"epoch": 1.3,
"learning_rate": 9.397818812845571e-07,
"loss": 6.0461,
"step": 12110
},
{
"epoch": 1.3,
"learning_rate": 9.299942731081091e-07,
"loss": 6.0785,
"step": 12120
},
{
"epoch": 1.3,
"learning_rate": 9.202554137868913e-07,
"loss": 6.0707,
"step": 12130
},
{
"epoch": 1.3,
"learning_rate": 9.105653556646621e-07,
"loss": 6.0617,
"step": 12140
},
{
"epoch": 1.3,
"learning_rate": 9.009241508228972e-07,
"loss": 6.0738,
"step": 12150
},
{
"epoch": 1.31,
"learning_rate": 8.913318510804914e-07,
"loss": 6.0594,
"step": 12160
},
{
"epoch": 1.31,
"learning_rate": 8.81788507993494e-07,
"loss": 6.0766,
"step": 12170
},
{
"epoch": 1.31,
"learning_rate": 8.722941728548173e-07,
"loss": 6.0461,
"step": 12180
},
{
"epoch": 1.31,
"learning_rate": 8.628488966939785e-07,
"loss": 6.0582,
"step": 12190
},
{
"epoch": 1.31,
"learning_rate": 8.534527302768058e-07,
"loss": 6.0766,
"step": 12200
},
{
"epoch": 1.31,
"learning_rate": 8.441057241051842e-07,
"loss": 6.0531,
"step": 12210
},
{
"epoch": 1.31,
"learning_rate": 8.348079284167743e-07,
"loss": 6.0723,
"step": 12220
},
{
"epoch": 1.31,
"learning_rate": 8.255593931847372e-07,
"loss": 6.0355,
"step": 12230
},
{
"epoch": 1.31,
"learning_rate": 8.163601681174793e-07,
"loss": 6.0605,
"step": 12240
},
{
"epoch": 1.32,
"learning_rate": 8.072103026583722e-07,
"loss": 6.0363,
"step": 12250
},
{
"epoch": 1.32,
"learning_rate": 7.981098459855008e-07,
"loss": 6.0711,
"step": 12260
},
{
"epoch": 1.32,
"learning_rate": 7.890588470113802e-07,
"loss": 6.0754,
"step": 12270
},
{
"epoch": 1.32,
"learning_rate": 7.800573543827139e-07,
"loss": 6.0641,
"step": 12280
},
{
"epoch": 1.32,
"learning_rate": 7.711054164801147e-07,
"loss": 6.0539,
"step": 12290
},
{
"epoch": 1.32,
"learning_rate": 7.622030814178582e-07,
"loss": 6.0367,
"step": 12300
},
{
"epoch": 1.32,
"learning_rate": 7.533503970436096e-07,
"loss": 6.0805,
"step": 12310
},
{
"epoch": 1.32,
"learning_rate": 7.445474109381856e-07,
"loss": 6.0582,
"step": 12320
},
{
"epoch": 1.32,
"learning_rate": 7.357941704152771e-07,
"loss": 6.0387,
"step": 12330
},
{
"epoch": 1.32,
"learning_rate": 7.270907225212154e-07,
"loss": 6.0566,
"step": 12340
},
{
"epoch": 1.33,
"learning_rate": 7.184371140347024e-07,
"loss": 6.0488,
"step": 12350
},
{
"epoch": 1.33,
"learning_rate": 7.098333914665723e-07,
"loss": 6.0426,
"step": 12360
},
{
"epoch": 1.33,
"learning_rate": 7.012796010595302e-07,
"loss": 6.0699,
"step": 12370
},
{
"epoch": 1.33,
"learning_rate": 6.927757887879139e-07,
"loss": 6.0508,
"step": 12380
},
{
"epoch": 1.33,
"learning_rate": 6.84322000357438e-07,
"loss": 6.0426,
"step": 12390
},
{
"epoch": 1.33,
"learning_rate": 6.75918281204958e-07,
"loss": 6.0449,
"step": 12400
},
{
"epoch": 1.33,
"learning_rate": 6.675646764982147e-07,
"loss": 6.0672,
"step": 12410
},
{
"epoch": 1.33,
"learning_rate": 6.592612311355994e-07,
"loss": 6.0793,
"step": 12420
},
{
"epoch": 1.33,
"learning_rate": 6.510079897459109e-07,
"loss": 6.0563,
"step": 12430
},
{
"epoch": 1.34,
"learning_rate": 6.428049966881156e-07,
"loss": 6.0484,
"step": 12440
},
{
"epoch": 1.34,
"learning_rate": 6.346522960511048e-07,
"loss": 6.0473,
"step": 12450
},
{
"epoch": 1.34,
"learning_rate": 6.265499316534651e-07,
"loss": 6.0539,
"step": 12460
},
{
"epoch": 1.34,
"learning_rate": 6.184979470432407e-07,
"loss": 6.0727,
"step": 12470
},
{
"epoch": 1.34,
"learning_rate": 6.104963854976931e-07,
"loss": 6.091,
"step": 12480
},
{
"epoch": 1.34,
"learning_rate": 6.025452900230777e-07,
"loss": 6.0512,
"step": 12490
},
{
"epoch": 1.34,
"learning_rate": 5.946447033544034e-07,
"loss": 6.0816,
"step": 12500
},
{
"epoch": 1.34,
"learning_rate": 5.867946679552138e-07,
"loss": 6.0738,
"step": 12510
},
{
"epoch": 1.34,
"learning_rate": 5.789952260173481e-07,
"loss": 6.0605,
"step": 12520
},
{
"epoch": 1.35,
"learning_rate": 5.712464194607214e-07,
"loss": 6.0344,
"step": 12530
},
{
"epoch": 1.35,
"learning_rate": 5.635482899330968e-07,
"loss": 6.048,
"step": 12540
},
{
"epoch": 1.35,
"learning_rate": 5.559008788098619e-07,
"loss": 6.0762,
"step": 12550
},
{
"epoch": 1.35,
"learning_rate": 5.483042271938055e-07,
"loss": 6.0738,
"step": 12560
},
{
"epoch": 1.35,
"learning_rate": 5.407583759149005e-07,
"loss": 6.0574,
"step": 12570
},
{
"epoch": 1.35,
"learning_rate": 5.332633655300767e-07,
"loss": 6.0637,
"step": 12580
},
{
"epoch": 1.35,
"learning_rate": 5.258192363230141e-07,
"loss": 6.0281,
"step": 12590
},
{
"epoch": 1.35,
"learning_rate": 5.184260283039133e-07,
"loss": 6.0754,
"step": 12600
},
{
"epoch": 1.35,
"learning_rate": 5.110837812092906e-07,
"loss": 6.0586,
"step": 12610
},
{
"epoch": 1.35,
"learning_rate": 5.037925345017635e-07,
"loss": 6.0395,
"step": 12620
},
{
"epoch": 1.36,
"learning_rate": 4.965523273698292e-07,
"loss": 6.0422,
"step": 12630
},
{
"epoch": 1.36,
"learning_rate": 4.893631987276682e-07,
"loss": 6.0191,
"step": 12640
},
{
"epoch": 1.36,
"learning_rate": 4.822251872149219e-07,
"loss": 6.0445,
"step": 12650
},
{
"epoch": 1.36,
"learning_rate": 4.7513833119649633e-07,
"loss": 6.075,
"step": 12660
},
{
"epoch": 1.36,
"learning_rate": 4.6810266876234247e-07,
"loss": 6.0559,
"step": 12670
},
{
"epoch": 1.36,
"learning_rate": 4.611182377272705e-07,
"loss": 6.034,
"step": 12680
},
{
"epoch": 1.36,
"learning_rate": 4.541850756307231e-07,
"loss": 6.0461,
"step": 12690
},
{
"epoch": 1.36,
"learning_rate": 4.4730321973659787e-07,
"loss": 6.0617,
"step": 12700
},
{
"epoch": 1.36,
"learning_rate": 4.4047270703302613e-07,
"loss": 6.0371,
"step": 12710
},
{
"epoch": 1.37,
"learning_rate": 4.3369357423219016e-07,
"loss": 6.0852,
"step": 12720
},
{
"epoch": 1.37,
"learning_rate": 4.26965857770113e-07,
"loss": 6.0535,
"step": 12730
},
{
"epoch": 1.37,
"learning_rate": 4.2028959380647327e-07,
"loss": 6.032,
"step": 12740
},
{
"epoch": 1.37,
"learning_rate": 4.1366481822440186e-07,
"loss": 6.0559,
"step": 12750
},
{
"epoch": 1.37,
"learning_rate": 4.070915666302999e-07,
"loss": 6.0488,
"step": 12760
},
{
"epoch": 1.37,
"learning_rate": 4.0056987435363346e-07,
"loss": 6.0687,
"step": 12770
},
{
"epoch": 1.37,
"learning_rate": 3.9409977644675577e-07,
"loss": 6.0582,
"step": 12780
},
{
"epoch": 1.37,
"learning_rate": 3.87681307684713e-07,
"loss": 6.0512,
"step": 12790
},
{
"epoch": 1.37,
"learning_rate": 3.8131450256505773e-07,
"loss": 6.0551,
"step": 12800
},
{
"epoch": 1.38,
"learning_rate": 3.749993953076647e-07,
"loss": 6.0609,
"step": 12810
},
{
"epoch": 1.38,
"learning_rate": 3.6873601985454863e-07,
"loss": 6.0809,
"step": 12820
},
{
"epoch": 1.38,
"learning_rate": 3.625244098696734e-07,
"loss": 6.0723,
"step": 12830
},
{
"epoch": 1.38,
"learning_rate": 3.563645987387865e-07,
"loss": 6.0578,
"step": 12840
},
{
"epoch": 1.38,
"learning_rate": 3.502566195692214e-07,
"loss": 6.0758,
"step": 12850
},
{
"epoch": 1.38,
"learning_rate": 3.442005051897357e-07,
"loss": 6.0406,
"step": 12860
},
{
"epoch": 1.38,
"learning_rate": 3.38196288150322e-07,
"loss": 6.0484,
"step": 12870
},
{
"epoch": 1.38,
"learning_rate": 3.322440007220429e-07,
"loss": 6.084,
"step": 12880
},
{
"epoch": 1.38,
"learning_rate": 3.263436748968507e-07,
"loss": 6.0461,
"step": 12890
},
{
"epoch": 1.39,
"learning_rate": 3.204953423874202e-07,
"loss": 6.0746,
"step": 12900
},
{
"epoch": 1.39,
"learning_rate": 3.146990346269729e-07,
"loss": 6.0555,
"step": 12910
},
{
"epoch": 1.39,
"learning_rate": 3.089547827691142e-07,
"loss": 6.0582,
"step": 12920
},
{
"epoch": 1.39,
"learning_rate": 3.0326261768766073e-07,
"loss": 6.0473,
"step": 12930
},
{
"epoch": 1.39,
"learning_rate": 2.97622569976479e-07,
"loss": 6.0652,
"step": 12940
},
{
"epoch": 1.39,
"learning_rate": 2.920346699493137e-07,
"loss": 6.0508,
"step": 12950
},
{
"epoch": 1.39,
"learning_rate": 2.864989476396385e-07,
"loss": 6.066,
"step": 12960
},
{
"epoch": 1.39,
"learning_rate": 2.810154328004755e-07,
"loss": 6.0574,
"step": 12970
},
{
"epoch": 1.39,
"learning_rate": 2.7558415490425375e-07,
"loss": 6.0547,
"step": 12980
},
{
"epoch": 1.39,
"learning_rate": 2.702051431426367e-07,
"loss": 6.0547,
"step": 12990
},
{
"epoch": 1.4,
"learning_rate": 2.6487842642637704e-07,
"loss": 6.0609,
"step": 13000
},
{
"epoch": 1.4,
"learning_rate": 2.5960403338515016e-07,
"loss": 6.0945,
"step": 13010
},
{
"epoch": 1.4,
"learning_rate": 2.5438199236740955e-07,
"loss": 6.0516,
"step": 13020
},
{
"epoch": 1.4,
"learning_rate": 2.502420718806597e-07,
"loss": 6.0559,
"step": 13030
},
{
"epoch": 1.4,
"learning_rate": 2.45114335045038e-07,
"loss": 6.0613,
"step": 13040
},
{
"epoch": 1.4,
"learning_rate": 2.4003902811115644e-07,
"loss": 6.057,
"step": 13050
},
{
"epoch": 1.4,
"learning_rate": 2.3501617835743007e-07,
"loss": 6.0691,
"step": 13060
},
{
"epoch": 1.4,
"learning_rate": 2.3004581278033734e-07,
"loss": 6.0527,
"step": 13070
},
{
"epoch": 1.4,
"learning_rate": 2.2512795809426003e-07,
"loss": 6.0363,
"step": 13080
},
{
"epoch": 1.41,
"learning_rate": 2.2026264073135462e-07,
"loss": 6.0641,
"step": 13090
},
{
"epoch": 1.41,
"learning_rate": 2.15449886841399e-07,
"loss": 6.0711,
"step": 13100
},
{
"epoch": 1.41,
"learning_rate": 2.106897222916593e-07,
"loss": 6.0664,
"step": 13110
},
{
"epoch": 1.41,
"learning_rate": 2.0598217266674658e-07,
"loss": 6.0656,
"step": 13120
},
{
"epoch": 1.41,
"learning_rate": 2.013272632684815e-07,
"loss": 6.0711,
"step": 13130
},
{
"epoch": 1.41,
"learning_rate": 1.9672501911575658e-07,
"loss": 6.0543,
"step": 13140
},
{
"epoch": 1.41,
"learning_rate": 1.921754649444041e-07,
"loss": 6.0691,
"step": 13150
},
{
"epoch": 1.41,
"learning_rate": 1.876786252070606e-07,
"loss": 6.0379,
"step": 13160
},
{
"epoch": 1.41,
"learning_rate": 1.832345240730371e-07,
"loss": 6.0695,
"step": 13170
},
{
"epoch": 1.42,
"learning_rate": 1.7884318542818912e-07,
"loss": 6.0922,
"step": 13180
},
{
"epoch": 1.42,
"learning_rate": 1.7450463287478792e-07,
"loss": 6.0777,
"step": 13190
},
{
"epoch": 1.42,
"learning_rate": 1.7021888973139166e-07,
"loss": 6.0594,
"step": 13200
},
{
"epoch": 1.42,
"learning_rate": 1.659859790327245e-07,
"loss": 6.0664,
"step": 13210
},
{
"epoch": 1.42,
"learning_rate": 1.6180592352955105e-07,
"loss": 6.0535,
"step": 13220
},
{
"epoch": 1.42,
"learning_rate": 1.5767874568854868e-07,
"loss": 6.068,
"step": 13230
},
{
"epoch": 1.42,
"learning_rate": 1.5360446769219663e-07,
"loss": 6.066,
"step": 13240
},
{
"epoch": 1.42,
"learning_rate": 1.4958311143864922e-07,
"loss": 6.0391,
"step": 13250
},
{
"epoch": 1.42,
"learning_rate": 1.456146985416207e-07,
"loss": 6.05,
"step": 13260
},
{
"epoch": 1.42,
"learning_rate": 1.4169925033026832e-07,
"loss": 6.0711,
"step": 13270
},
{
"epoch": 1.43,
"learning_rate": 1.3783678784908162e-07,
"loss": 6.0781,
"step": 13280
},
{
"epoch": 1.43,
"learning_rate": 1.3402733185776006e-07,
"loss": 6.0809,
"step": 13290
},
{
"epoch": 1.43,
"learning_rate": 1.3027090283111442e-07,
"loss": 6.0605,
"step": 13300
},
{
"epoch": 1.43,
"learning_rate": 1.2656752095894342e-07,
"loss": 6.0758,
"step": 13310
},
{
"epoch": 1.43,
"learning_rate": 1.2291720614593493e-07,
"loss": 6.059,
"step": 13320
},
{
"epoch": 1.43,
"learning_rate": 1.1931997801155614e-07,
"loss": 6.0414,
"step": 13330
},
{
"epoch": 1.43,
"learning_rate": 1.1577585588994466e-07,
"loss": 6.0504,
"step": 13340
},
{
"epoch": 1.43,
"learning_rate": 1.1228485882980977e-07,
"loss": 6.0492,
"step": 13350
},
{
"epoch": 1.43,
"learning_rate": 1.0884700559432693e-07,
"loss": 6.0301,
"step": 13360
},
{
"epoch": 1.44,
"learning_rate": 1.0546231466103785e-07,
"loss": 6.048,
"step": 13370
},
{
"epoch": 1.44,
"learning_rate": 1.0213080422175281e-07,
"loss": 6.0687,
"step": 13380
},
{
"epoch": 1.44,
"learning_rate": 9.88524921824463e-08,
"loss": 6.0645,
"step": 13390
},
{
"epoch": 1.44,
"learning_rate": 9.562739616317152e-08,
"loss": 6.0953,
"step": 13400
},
{
"epoch": 1.44,
"learning_rate": 9.2455533497956e-08,
"loss": 6.0484,
"step": 13410
},
{
"epoch": 1.44,
"learning_rate": 8.933692123471282e-08,
"loss": 6.041,
"step": 13420
},
{
"epoch": 1.44,
"learning_rate": 8.627157613514958e-08,
"loss": 6.0555,
"step": 13430
},
{
"epoch": 1.44,
"learning_rate": 8.32595146746773e-08,
"loss": 6.0844,
"step": 13440
},
{
"epoch": 1.44,
"learning_rate": 8.030075304231944e-08,
"loss": 6.0934,
"step": 13450
},
{
"epoch": 1.45,
"learning_rate": 7.73953071406297e-08,
"loss": 6.0422,
"step": 13460
},
{
"epoch": 1.45,
"learning_rate": 7.454319258560105e-08,
"loss": 6.0812,
"step": 13470
},
{
"epoch": 1.45,
"learning_rate": 7.174442470658794e-08,
"loss": 6.0656,
"step": 13480
},
{
"epoch": 1.45,
"learning_rate": 6.89990185462186e-08,
"loss": 6.0656,
"step": 13490
},
{
"epoch": 1.45,
"learning_rate": 6.630698886031738e-08,
"loss": 6.0613,
"step": 13500
},
{
"epoch": 1.45,
"learning_rate": 6.366835011782368e-08,
"loss": 6.0633,
"step": 13510
},
{
"epoch": 1.45,
"learning_rate": 6.10831165007153e-08,
"loss": 6.0691,
"step": 13520
},
{
"epoch": 1.45,
"learning_rate": 5.855130190393188e-08,
"loss": 6.0508,
"step": 13530
},
{
"epoch": 1.45,
"learning_rate": 5.6072919935298286e-08,
"loss": 6.0883,
"step": 13540
},
{
"epoch": 1.45,
"learning_rate": 5.3647983915456894e-08,
"loss": 6.032,
"step": 13550
},
{
"epoch": 1.46,
"learning_rate": 5.127650687778873e-08,
"loss": 6.0875,
"step": 13560
},
{
"epoch": 1.46,
"learning_rate": 4.895850156834914e-08,
"loss": 6.0285,
"step": 13570
},
{
"epoch": 1.46,
"learning_rate": 4.669398044579776e-08,
"loss": 6.0605,
"step": 13580
},
{
"epoch": 1.46,
"learning_rate": 4.448295568132866e-08,
"loss": 6.0648,
"step": 13590
},
{
"epoch": 1.46,
"learning_rate": 4.2325439158609204e-08,
"loss": 6.0863,
"step": 13600
},
{
"epoch": 1.46,
"learning_rate": 4.0221442473713514e-08,
"loss": 6.0613,
"step": 13610
},
{
"epoch": 1.46,
"learning_rate": 3.8170976935062445e-08,
"loss": 6.0828,
"step": 13620
},
{
"epoch": 1.46,
"learning_rate": 3.617405356335701e-08,
"loss": 6.0883,
"step": 13630
},
{
"epoch": 1.46,
"learning_rate": 3.4230683091529545e-08,
"loss": 6.0465,
"step": 13640
},
{
"epoch": 1.47,
"learning_rate": 3.2340875964674833e-08,
"loss": 6.0465,
"step": 13650
},
{
"epoch": 1.47,
"learning_rate": 3.050464234000017e-08,
"loss": 6.0324,
"step": 13660
},
{
"epoch": 1.47,
"learning_rate": 2.8721992086772065e-08,
"loss": 6.0449,
"step": 13670
},
{
"epoch": 1.47,
"learning_rate": 2.6992934786257418e-08,
"loss": 6.0422,
"step": 13680
},
{
"epoch": 1.47,
"learning_rate": 2.5317479731677973e-08,
"loss": 6.0754,
"step": 13690
},
{
"epoch": 1.47,
"learning_rate": 2.3695635928155935e-08,
"loss": 6.084,
"step": 13700
},
{
"epoch": 1.47,
"learning_rate": 2.2127412092668444e-08,
"loss": 6.0457,
"step": 13710
},
{
"epoch": 1.47,
"learning_rate": 2.0612816653998723e-08,
"loss": 6.0687,
"step": 13720
},
{
"epoch": 1.47,
"learning_rate": 1.9151857752691684e-08,
"loss": 6.0527,
"step": 13730
},
{
"epoch": 1.48,
"learning_rate": 1.7744543241012823e-08,
"loss": 6.059,
"step": 13740
},
{
"epoch": 1.48,
"learning_rate": 1.639088068289829e-08,
"loss": 6.0598,
"step": 13750
},
{
"epoch": 1.48,
"learning_rate": 1.509087735392489e-08,
"loss": 6.0668,
"step": 13760
},
{
"epoch": 1.48,
"learning_rate": 1.3844540241261251e-08,
"loss": 6.0816,
"step": 13770
},
{
"epoch": 1.48,
"learning_rate": 1.2651876043637822e-08,
"loss": 6.0895,
"step": 13780
},
{
"epoch": 1.48,
"learning_rate": 1.1512891171303608e-08,
"loss": 6.0895,
"step": 13790
},
{
"epoch": 1.48,
"learning_rate": 1.0427591745999499e-08,
"loss": 6.0285,
"step": 13800
},
{
"epoch": 1.48,
"learning_rate": 9.395983600918313e-09,
"loss": 6.0953,
"step": 13810
},
{
"epoch": 1.48,
"learning_rate": 8.418072280679257e-09,
"loss": 6.0426,
"step": 13820
},
{
"epoch": 1.48,
"learning_rate": 7.493863041292405e-09,
"loss": 6.0602,
"step": 13830
},
{
"epoch": 1.49,
"learning_rate": 6.623360850136484e-09,
"loss": 6.0742,
"step": 13840
},
{
"epoch": 1.49,
"learning_rate": 5.8065703859278014e-09,
"loss": 6.0805,
"step": 13850
},
{
"epoch": 1.49,
"learning_rate": 5.0434960386969154e-09,
"loss": 6.0574,
"step": 13860
},
{
"epoch": 1.49,
"learning_rate": 4.334141909764223e-09,
"loss": 6.0777,
"step": 13870
},
{
"epoch": 1.49,
"learning_rate": 3.6785118117188544e-09,
"loss": 6.05,
"step": 13880
},
{
"epoch": 1.49,
"learning_rate": 3.0766092683953697e-09,
"loss": 6.0445,
"step": 13890
},
{
"epoch": 1.49,
"learning_rate": 2.5284375148615368e-09,
"loss": 6.0648,
"step": 13900
},
{
"epoch": 1.49,
"learning_rate": 2.033999497391692e-09,
"loss": 6.0672,
"step": 13910
},
{
"epoch": 1.49,
"learning_rate": 1.5932978734600757e-09,
"loss": 6.0727,
"step": 13920
},
{
"epoch": 1.5,
"learning_rate": 1.2063350117175188e-09,
"loss": 6.0816,
"step": 13930
},
{
"epoch": 1.5,
"learning_rate": 8.731129919892223e-10,
"loss": 6.066,
"step": 13940
},
{
"epoch": 1.5,
"learning_rate": 5.936336052514424e-10,
"loss": 6.0367,
"step": 13950
},
{
"epoch": 1.5,
"learning_rate": 3.6789835363260087e-10,
"loss": 6.0438,
"step": 13960
},
{
"epoch": 1.5,
"learning_rate": 1.9590845039885175e-10,
"loss": 6.0586,
"step": 13970
},
{
"epoch": 1.5,
"step": 13971,
"total_flos": 4.912468616465613e+16,
"train_loss": 6.204258486328824,
"train_runtime": 12473.8331,
"train_samples_per_second": 17.92,
"train_steps_per_second": 1.12
}
],
"max_steps": 13971,
"num_train_epochs": 2,
"total_flos": 4.912468616465613e+16,
"trial_name": null,
"trial_params": null
}