vcoder_llava-v1.5-7b / trainer_state.json
praeclarumjj3's picture
Upload folder using huggingface_hub
c7b3fb3
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"global_step": 2206,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.4925373134328359e-05,
"loss": 7.8398,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 2.9850746268656717e-05,
"loss": 5.6055,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 4.477611940298507e-05,
"loss": 4.9727,
"step": 3
},
{
"epoch": 0.0,
"learning_rate": 5.9701492537313435e-05,
"loss": 2.8965,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 7.462686567164179e-05,
"loss": 2.3398,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 8.955223880597014e-05,
"loss": 2.2109,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 0.0001044776119402985,
"loss": 2.0615,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 0.00011940298507462687,
"loss": 1.9082,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 0.00013432835820895522,
"loss": 1.7432,
"step": 9
},
{
"epoch": 0.01,
"learning_rate": 0.00014925373134328358,
"loss": 1.666,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 0.00016417910447761195,
"loss": 1.54,
"step": 11
},
{
"epoch": 0.01,
"learning_rate": 0.00017910447761194028,
"loss": 1.5146,
"step": 12
},
{
"epoch": 0.01,
"learning_rate": 0.00019402985074626867,
"loss": 1.4375,
"step": 13
},
{
"epoch": 0.01,
"learning_rate": 0.000208955223880597,
"loss": 1.3984,
"step": 14
},
{
"epoch": 0.01,
"learning_rate": 0.00022388059701492538,
"loss": 1.3428,
"step": 15
},
{
"epoch": 0.01,
"learning_rate": 0.00023880597014925374,
"loss": 1.3223,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 0.0002537313432835821,
"loss": 1.3018,
"step": 17
},
{
"epoch": 0.02,
"learning_rate": 0.00026865671641791044,
"loss": 1.29,
"step": 18
},
{
"epoch": 0.02,
"learning_rate": 0.0002835820895522388,
"loss": 1.2109,
"step": 19
},
{
"epoch": 0.02,
"learning_rate": 0.00029850746268656717,
"loss": 1.2031,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 0.00031343283582089556,
"loss": 1.2158,
"step": 21
},
{
"epoch": 0.02,
"learning_rate": 0.0003283582089552239,
"loss": 1.1895,
"step": 22
},
{
"epoch": 0.02,
"learning_rate": 0.00034328358208955223,
"loss": 1.1582,
"step": 23
},
{
"epoch": 0.02,
"learning_rate": 0.00035820895522388057,
"loss": 1.168,
"step": 24
},
{
"epoch": 0.02,
"learning_rate": 0.00037313432835820896,
"loss": 1.1182,
"step": 25
},
{
"epoch": 0.02,
"learning_rate": 0.00038805970149253735,
"loss": 1.1089,
"step": 26
},
{
"epoch": 0.02,
"learning_rate": 0.0004029850746268657,
"loss": 1.0869,
"step": 27
},
{
"epoch": 0.03,
"learning_rate": 0.000417910447761194,
"loss": 1.0693,
"step": 28
},
{
"epoch": 0.03,
"learning_rate": 0.00043283582089552236,
"loss": 1.0508,
"step": 29
},
{
"epoch": 0.03,
"learning_rate": 0.00044776119402985075,
"loss": 1.0679,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 0.00046268656716417914,
"loss": 1.0693,
"step": 31
},
{
"epoch": 0.03,
"learning_rate": 0.0004776119402985075,
"loss": 1.0117,
"step": 32
},
{
"epoch": 0.03,
"learning_rate": 0.0004925373134328358,
"loss": 1.0908,
"step": 33
},
{
"epoch": 0.03,
"learning_rate": 0.0005074626865671642,
"loss": 1.1123,
"step": 34
},
{
"epoch": 0.03,
"learning_rate": 0.0005223880597014925,
"loss": 1.0454,
"step": 35
},
{
"epoch": 0.03,
"learning_rate": 0.0005373134328358209,
"loss": 1.0596,
"step": 36
},
{
"epoch": 0.03,
"learning_rate": 0.0005522388059701493,
"loss": 1.0884,
"step": 37
},
{
"epoch": 0.03,
"learning_rate": 0.0005671641791044776,
"loss": 1.0537,
"step": 38
},
{
"epoch": 0.04,
"learning_rate": 0.0005820895522388059,
"loss": 1.0498,
"step": 39
},
{
"epoch": 0.04,
"learning_rate": 0.0005970149253731343,
"loss": 1.0391,
"step": 40
},
{
"epoch": 0.04,
"learning_rate": 0.0006119402985074627,
"loss": 0.981,
"step": 41
},
{
"epoch": 0.04,
"learning_rate": 0.0006268656716417911,
"loss": 0.9932,
"step": 42
},
{
"epoch": 0.04,
"learning_rate": 0.0006417910447761194,
"loss": 1.0,
"step": 43
},
{
"epoch": 0.04,
"learning_rate": 0.0006567164179104478,
"loss": 0.9668,
"step": 44
},
{
"epoch": 0.04,
"learning_rate": 0.0006716417910447762,
"loss": 0.9482,
"step": 45
},
{
"epoch": 0.04,
"learning_rate": 0.0006865671641791045,
"loss": 0.9316,
"step": 46
},
{
"epoch": 0.04,
"learning_rate": 0.0007014925373134329,
"loss": 0.9082,
"step": 47
},
{
"epoch": 0.04,
"learning_rate": 0.0007164179104477611,
"loss": 0.9243,
"step": 48
},
{
"epoch": 0.04,
"learning_rate": 0.0007313432835820895,
"loss": 0.9043,
"step": 49
},
{
"epoch": 0.05,
"learning_rate": 0.0007462686567164179,
"loss": 0.8994,
"step": 50
},
{
"epoch": 0.05,
"learning_rate": 0.0007611940298507463,
"loss": 0.9053,
"step": 51
},
{
"epoch": 0.05,
"learning_rate": 0.0007761194029850747,
"loss": 0.8721,
"step": 52
},
{
"epoch": 0.05,
"learning_rate": 0.000791044776119403,
"loss": 0.9058,
"step": 53
},
{
"epoch": 0.05,
"learning_rate": 0.0008059701492537314,
"loss": 0.8364,
"step": 54
},
{
"epoch": 0.05,
"learning_rate": 0.0008208955223880598,
"loss": 0.8828,
"step": 55
},
{
"epoch": 0.05,
"learning_rate": 0.000835820895522388,
"loss": 0.8867,
"step": 56
},
{
"epoch": 0.05,
"learning_rate": 0.0008507462686567164,
"loss": 0.8745,
"step": 57
},
{
"epoch": 0.05,
"learning_rate": 0.0008656716417910447,
"loss": 0.8696,
"step": 58
},
{
"epoch": 0.05,
"learning_rate": 0.0008805970149253731,
"loss": 0.8735,
"step": 59
},
{
"epoch": 0.05,
"learning_rate": 0.0008955223880597015,
"loss": 0.8579,
"step": 60
},
{
"epoch": 0.06,
"learning_rate": 0.0009104477611940298,
"loss": 0.8413,
"step": 61
},
{
"epoch": 0.06,
"learning_rate": 0.0009253731343283583,
"loss": 0.8447,
"step": 62
},
{
"epoch": 0.06,
"learning_rate": 0.0009402985074626867,
"loss": 0.8188,
"step": 63
},
{
"epoch": 0.06,
"learning_rate": 0.000955223880597015,
"loss": 0.8418,
"step": 64
},
{
"epoch": 0.06,
"learning_rate": 0.0009701492537313433,
"loss": 0.8257,
"step": 65
},
{
"epoch": 0.06,
"learning_rate": 0.0009850746268656716,
"loss": 0.7837,
"step": 66
},
{
"epoch": 0.06,
"learning_rate": 0.001,
"loss": 0.8159,
"step": 67
},
{
"epoch": 0.06,
"learning_rate": 0.0009999994607152905,
"loss": 0.8252,
"step": 68
},
{
"epoch": 0.06,
"learning_rate": 0.000999997842862325,
"loss": 0.7983,
"step": 69
},
{
"epoch": 0.06,
"learning_rate": 0.0009999951464445937,
"loss": 0.8013,
"step": 70
},
{
"epoch": 0.06,
"learning_rate": 0.000999991371467913,
"loss": 0.7749,
"step": 71
},
{
"epoch": 0.07,
"learning_rate": 0.000999986517940426,
"loss": 0.7793,
"step": 72
},
{
"epoch": 0.07,
"learning_rate": 0.0009999805858726025,
"loss": 0.7671,
"step": 73
},
{
"epoch": 0.07,
"learning_rate": 0.000999973575277239,
"loss": 0.7856,
"step": 74
},
{
"epoch": 0.07,
"learning_rate": 0.000999965486169458,
"loss": 0.7607,
"step": 75
},
{
"epoch": 0.07,
"learning_rate": 0.0009999563185667088,
"loss": 0.7632,
"step": 76
},
{
"epoch": 0.07,
"learning_rate": 0.0009999460724887676,
"loss": 0.7861,
"step": 77
},
{
"epoch": 0.07,
"learning_rate": 0.0009999347479577364,
"loss": 0.748,
"step": 78
},
{
"epoch": 0.07,
"learning_rate": 0.0009999223449980435,
"loss": 0.7983,
"step": 79
},
{
"epoch": 0.07,
"learning_rate": 0.0009999088636364441,
"loss": 0.7656,
"step": 80
},
{
"epoch": 0.07,
"learning_rate": 0.0009998943039020195,
"loss": 0.75,
"step": 81
},
{
"epoch": 0.07,
"learning_rate": 0.000999878665826177,
"loss": 0.7588,
"step": 82
},
{
"epoch": 0.08,
"learning_rate": 0.0009998619494426496,
"loss": 0.7534,
"step": 83
},
{
"epoch": 0.08,
"learning_rate": 0.0009998441547874974,
"loss": 0.7373,
"step": 84
},
{
"epoch": 0.08,
"learning_rate": 0.000999825281899106,
"loss": 0.7271,
"step": 85
},
{
"epoch": 0.08,
"learning_rate": 0.0009998053308181865,
"loss": 0.7446,
"step": 86
},
{
"epoch": 0.08,
"learning_rate": 0.0009997843015877763,
"loss": 0.7637,
"step": 87
},
{
"epoch": 0.08,
"learning_rate": 0.0009997621942532382,
"loss": 0.7466,
"step": 88
},
{
"epoch": 0.08,
"learning_rate": 0.000999739008862261,
"loss": 0.7529,
"step": 89
},
{
"epoch": 0.08,
"learning_rate": 0.000999714745464859,
"loss": 0.7119,
"step": 90
},
{
"epoch": 0.08,
"learning_rate": 0.0009996894041133715,
"loss": 0.7246,
"step": 91
},
{
"epoch": 0.08,
"learning_rate": 0.000999662984862463,
"loss": 0.7427,
"step": 92
},
{
"epoch": 0.08,
"learning_rate": 0.0009996354877691237,
"loss": 0.7324,
"step": 93
},
{
"epoch": 0.09,
"learning_rate": 0.0009996069128926691,
"loss": 0.6958,
"step": 94
},
{
"epoch": 0.09,
"learning_rate": 0.0009995772602947386,
"loss": 0.7144,
"step": 95
},
{
"epoch": 0.09,
"learning_rate": 0.0009995465300392972,
"loss": 0.709,
"step": 96
},
{
"epoch": 0.09,
"learning_rate": 0.0009995147221926342,
"loss": 0.7104,
"step": 97
},
{
"epoch": 0.09,
"learning_rate": 0.0009994818368233638,
"loss": 0.7236,
"step": 98
},
{
"epoch": 0.09,
"learning_rate": 0.0009994478740024241,
"loss": 0.7295,
"step": 99
},
{
"epoch": 0.09,
"learning_rate": 0.0009994128338030777,
"loss": 0.7031,
"step": 100
},
{
"epoch": 0.09,
"learning_rate": 0.0009993767163009111,
"loss": 0.6948,
"step": 101
},
{
"epoch": 0.09,
"learning_rate": 0.0009993395215738348,
"loss": 0.6738,
"step": 102
},
{
"epoch": 0.09,
"learning_rate": 0.000999301249702083,
"loss": 0.6899,
"step": 103
},
{
"epoch": 0.09,
"learning_rate": 0.0009992619007682133,
"loss": 0.6875,
"step": 104
},
{
"epoch": 0.1,
"learning_rate": 0.0009992214748571072,
"loss": 0.6816,
"step": 105
},
{
"epoch": 0.1,
"learning_rate": 0.0009991799720559687,
"loss": 0.686,
"step": 106
},
{
"epoch": 0.1,
"learning_rate": 0.000999137392454325,
"loss": 0.6743,
"step": 107
},
{
"epoch": 0.1,
"learning_rate": 0.0009990937361440268,
"loss": 0.6792,
"step": 108
},
{
"epoch": 0.1,
"learning_rate": 0.000999049003219246,
"loss": 0.6592,
"step": 109
},
{
"epoch": 0.1,
"learning_rate": 0.000999003193776478,
"loss": 0.6787,
"step": 110
},
{
"epoch": 0.1,
"learning_rate": 0.0009989563079145407,
"loss": 0.6724,
"step": 111
},
{
"epoch": 0.1,
"learning_rate": 0.0009989083457345727,
"loss": 0.6831,
"step": 112
},
{
"epoch": 0.1,
"learning_rate": 0.0009988593073400353,
"loss": 0.6724,
"step": 113
},
{
"epoch": 0.1,
"learning_rate": 0.0009988091928367113,
"loss": 0.6626,
"step": 114
},
{
"epoch": 0.1,
"learning_rate": 0.0009987580023327045,
"loss": 0.6641,
"step": 115
},
{
"epoch": 0.11,
"learning_rate": 0.0009987057359384399,
"loss": 0.6831,
"step": 116
},
{
"epoch": 0.11,
"learning_rate": 0.0009986523937666636,
"loss": 0.7046,
"step": 117
},
{
"epoch": 0.11,
"learning_rate": 0.0009985979759324418,
"loss": 0.6611,
"step": 118
},
{
"epoch": 0.11,
"learning_rate": 0.0009985424825531613,
"loss": 0.6768,
"step": 119
},
{
"epoch": 0.11,
"learning_rate": 0.0009984859137485292,
"loss": 0.6489,
"step": 120
},
{
"epoch": 0.11,
"learning_rate": 0.000998428269640572,
"loss": 0.6733,
"step": 121
},
{
"epoch": 0.11,
"learning_rate": 0.0009983695503536364,
"loss": 0.6919,
"step": 122
},
{
"epoch": 0.11,
"learning_rate": 0.000998309756014388,
"loss": 0.6846,
"step": 123
},
{
"epoch": 0.11,
"learning_rate": 0.000998248886751811,
"loss": 0.6611,
"step": 124
},
{
"epoch": 0.11,
"learning_rate": 0.0009981869426972097,
"loss": 0.6655,
"step": 125
},
{
"epoch": 0.11,
"learning_rate": 0.0009981239239842054,
"loss": 0.644,
"step": 126
},
{
"epoch": 0.12,
"learning_rate": 0.0009980598307487384,
"loss": 0.6431,
"step": 127
},
{
"epoch": 0.12,
"learning_rate": 0.0009979946631290665,
"loss": 0.6436,
"step": 128
},
{
"epoch": 0.12,
"learning_rate": 0.0009979284212657657,
"loss": 0.6357,
"step": 129
},
{
"epoch": 0.12,
"learning_rate": 0.0009978611053017286,
"loss": 0.6431,
"step": 130
},
{
"epoch": 0.12,
"learning_rate": 0.0009977927153821651,
"loss": 0.6704,
"step": 131
},
{
"epoch": 0.12,
"learning_rate": 0.000997723251654602,
"loss": 0.6455,
"step": 132
},
{
"epoch": 0.12,
"learning_rate": 0.0009976527142688818,
"loss": 0.6333,
"step": 133
},
{
"epoch": 0.12,
"learning_rate": 0.0009975811033771636,
"loss": 0.6382,
"step": 134
},
{
"epoch": 0.12,
"learning_rate": 0.0009975084191339223,
"loss": 0.6548,
"step": 135
},
{
"epoch": 0.12,
"learning_rate": 0.0009974346616959476,
"loss": 0.6226,
"step": 136
},
{
"epoch": 0.12,
"learning_rate": 0.0009973598312223446,
"loss": 0.6255,
"step": 137
},
{
"epoch": 0.13,
"learning_rate": 0.0009972839278745331,
"loss": 0.6504,
"step": 138
},
{
"epoch": 0.13,
"learning_rate": 0.0009972069518162472,
"loss": 0.6113,
"step": 139
},
{
"epoch": 0.13,
"learning_rate": 0.0009971289032135347,
"loss": 0.6392,
"step": 140
},
{
"epoch": 0.13,
"learning_rate": 0.0009970497822347573,
"loss": 0.6284,
"step": 141
},
{
"epoch": 0.13,
"learning_rate": 0.0009969695890505903,
"loss": 0.6245,
"step": 142
},
{
"epoch": 0.13,
"learning_rate": 0.0009968883238340213,
"loss": 0.604,
"step": 143
},
{
"epoch": 0.13,
"learning_rate": 0.0009968059867603504,
"loss": 0.6221,
"step": 144
},
{
"epoch": 0.13,
"learning_rate": 0.0009967225780071907,
"loss": 0.6147,
"step": 145
},
{
"epoch": 0.13,
"learning_rate": 0.0009966380977544658,
"loss": 0.6069,
"step": 146
},
{
"epoch": 0.13,
"learning_rate": 0.0009965525461844119,
"loss": 0.646,
"step": 147
},
{
"epoch": 0.13,
"learning_rate": 0.0009964659234815753,
"loss": 0.603,
"step": 148
},
{
"epoch": 0.14,
"learning_rate": 0.000996378229832813,
"loss": 0.6289,
"step": 149
},
{
"epoch": 0.14,
"learning_rate": 0.0009962894654272927,
"loss": 0.5952,
"step": 150
},
{
"epoch": 0.14,
"learning_rate": 0.0009961996304564916,
"loss": 0.627,
"step": 151
},
{
"epoch": 0.14,
"learning_rate": 0.000996108725114196,
"loss": 0.6245,
"step": 152
},
{
"epoch": 0.14,
"learning_rate": 0.0009960167495965016,
"loss": 0.606,
"step": 153
},
{
"epoch": 0.14,
"learning_rate": 0.000995923704101812,
"loss": 0.603,
"step": 154
},
{
"epoch": 0.14,
"learning_rate": 0.0009958295888308395,
"loss": 0.6108,
"step": 155
},
{
"epoch": 0.14,
"learning_rate": 0.0009957344039866036,
"loss": 0.603,
"step": 156
},
{
"epoch": 0.14,
"learning_rate": 0.0009956381497744317,
"loss": 0.6147,
"step": 157
},
{
"epoch": 0.14,
"learning_rate": 0.0009955408264019567,
"loss": 0.625,
"step": 158
},
{
"epoch": 0.14,
"learning_rate": 0.0009954424340791195,
"loss": 0.6094,
"step": 159
},
{
"epoch": 0.15,
"learning_rate": 0.0009953429730181654,
"loss": 0.6021,
"step": 160
},
{
"epoch": 0.15,
"learning_rate": 0.000995242443433646,
"loss": 0.6128,
"step": 161
},
{
"epoch": 0.15,
"learning_rate": 0.0009951408455424174,
"loss": 0.5728,
"step": 162
},
{
"epoch": 0.15,
"learning_rate": 0.0009950381795636406,
"loss": 0.5537,
"step": 163
},
{
"epoch": 0.15,
"learning_rate": 0.0009949344457187799,
"loss": 0.5767,
"step": 164
},
{
"epoch": 0.15,
"learning_rate": 0.000994829644231604,
"loss": 0.5776,
"step": 165
},
{
"epoch": 0.15,
"learning_rate": 0.0009947237753281843,
"loss": 0.5801,
"step": 166
},
{
"epoch": 0.15,
"learning_rate": 0.0009946168392368946,
"loss": 0.6094,
"step": 167
},
{
"epoch": 0.15,
"learning_rate": 0.000994508836188411,
"loss": 0.5723,
"step": 168
},
{
"epoch": 0.15,
"learning_rate": 0.0009943997664157108,
"loss": 0.5654,
"step": 169
},
{
"epoch": 0.15,
"learning_rate": 0.0009942896301540728,
"loss": 0.5933,
"step": 170
},
{
"epoch": 0.16,
"learning_rate": 0.000994178427641076,
"loss": 0.5742,
"step": 171
},
{
"epoch": 0.16,
"learning_rate": 0.0009940661591166002,
"loss": 0.5759,
"step": 172
},
{
"epoch": 0.16,
"learning_rate": 0.0009939528248228237,
"loss": 0.5825,
"step": 173
},
{
"epoch": 0.16,
"learning_rate": 0.0009938384250042247,
"loss": 0.6196,
"step": 174
},
{
"epoch": 0.16,
"learning_rate": 0.000993722959907579,
"loss": 0.5889,
"step": 175
},
{
"epoch": 0.16,
"learning_rate": 0.0009936064297819613,
"loss": 0.5686,
"step": 176
},
{
"epoch": 0.16,
"learning_rate": 0.000993488834878743,
"loss": 0.5474,
"step": 177
},
{
"epoch": 0.16,
"learning_rate": 0.0009933701754515926,
"loss": 0.5488,
"step": 178
},
{
"epoch": 0.16,
"learning_rate": 0.0009932504517564753,
"loss": 0.5601,
"step": 179
},
{
"epoch": 0.16,
"learning_rate": 0.0009931296640516513,
"loss": 0.5366,
"step": 180
},
{
"epoch": 0.16,
"learning_rate": 0.0009930078125976766,
"loss": 0.5532,
"step": 181
},
{
"epoch": 0.17,
"learning_rate": 0.0009928848976574018,
"loss": 0.551,
"step": 182
},
{
"epoch": 0.17,
"learning_rate": 0.0009927609194959716,
"loss": 0.5781,
"step": 183
},
{
"epoch": 0.17,
"learning_rate": 0.0009926358783808238,
"loss": 0.5732,
"step": 184
},
{
"epoch": 0.17,
"learning_rate": 0.0009925097745816895,
"loss": 0.5354,
"step": 185
},
{
"epoch": 0.17,
"learning_rate": 0.0009923826083705924,
"loss": 0.5664,
"step": 186
},
{
"epoch": 0.17,
"learning_rate": 0.0009922543800218472,
"loss": 0.5486,
"step": 187
},
{
"epoch": 0.17,
"learning_rate": 0.000992125089812061,
"loss": 0.5605,
"step": 188
},
{
"epoch": 0.17,
"learning_rate": 0.0009919947380201298,
"loss": 0.5425,
"step": 189
},
{
"epoch": 0.17,
"learning_rate": 0.000991863324927241,
"loss": 0.5417,
"step": 190
},
{
"epoch": 0.17,
"learning_rate": 0.000991730850816871,
"loss": 0.5249,
"step": 191
},
{
"epoch": 0.17,
"learning_rate": 0.0009915973159747846,
"loss": 0.5503,
"step": 192
},
{
"epoch": 0.17,
"learning_rate": 0.0009914627206890352,
"loss": 0.5464,
"step": 193
},
{
"epoch": 0.18,
"learning_rate": 0.0009913270652499634,
"loss": 0.5605,
"step": 194
},
{
"epoch": 0.18,
"learning_rate": 0.000991190349950197,
"loss": 0.5425,
"step": 195
},
{
"epoch": 0.18,
"learning_rate": 0.0009910525750846493,
"loss": 0.5354,
"step": 196
},
{
"epoch": 0.18,
"learning_rate": 0.0009909137409505205,
"loss": 0.5361,
"step": 197
},
{
"epoch": 0.18,
"learning_rate": 0.0009907738478472947,
"loss": 0.5547,
"step": 198
},
{
"epoch": 0.18,
"learning_rate": 0.000990632896076741,
"loss": 0.563,
"step": 199
},
{
"epoch": 0.18,
"learning_rate": 0.0009904908859429116,
"loss": 0.5547,
"step": 200
},
{
"epoch": 0.18,
"learning_rate": 0.0009903478177521424,
"loss": 0.5464,
"step": 201
},
{
"epoch": 0.18,
"learning_rate": 0.0009902036918130513,
"loss": 0.5728,
"step": 202
},
{
"epoch": 0.18,
"learning_rate": 0.000990058508436538,
"loss": 0.563,
"step": 203
},
{
"epoch": 0.18,
"learning_rate": 0.0009899122679357828,
"loss": 0.5286,
"step": 204
},
{
"epoch": 0.19,
"learning_rate": 0.0009897649706262473,
"loss": 0.5645,
"step": 205
},
{
"epoch": 0.19,
"learning_rate": 0.000989616616825672,
"loss": 0.5327,
"step": 206
},
{
"epoch": 0.19,
"learning_rate": 0.0009894672068540764,
"loss": 0.5422,
"step": 207
},
{
"epoch": 0.19,
"learning_rate": 0.0009893167410337592,
"loss": 0.5447,
"step": 208
},
{
"epoch": 0.19,
"learning_rate": 0.0009891652196892954,
"loss": 0.5479,
"step": 209
},
{
"epoch": 0.19,
"learning_rate": 0.0009890126431475379,
"loss": 0.5129,
"step": 210
},
{
"epoch": 0.19,
"learning_rate": 0.0009888590117376154,
"loss": 0.5393,
"step": 211
},
{
"epoch": 0.19,
"learning_rate": 0.0009887043257909322,
"loss": 0.509,
"step": 212
},
{
"epoch": 0.19,
"learning_rate": 0.0009885485856411676,
"loss": 0.5381,
"step": 213
},
{
"epoch": 0.19,
"learning_rate": 0.0009883917916242743,
"loss": 0.4993,
"step": 214
},
{
"epoch": 0.19,
"learning_rate": 0.000988233944078479,
"loss": 0.4988,
"step": 215
},
{
"epoch": 0.2,
"learning_rate": 0.0009880750433442806,
"loss": 0.5173,
"step": 216
},
{
"epoch": 0.2,
"learning_rate": 0.0009879150897644504,
"loss": 0.5312,
"step": 217
},
{
"epoch": 0.2,
"learning_rate": 0.0009877540836840302,
"loss": 0.5159,
"step": 218
},
{
"epoch": 0.2,
"learning_rate": 0.0009875920254503326,
"loss": 0.5208,
"step": 219
},
{
"epoch": 0.2,
"learning_rate": 0.0009874289154129395,
"loss": 0.5081,
"step": 220
},
{
"epoch": 0.2,
"learning_rate": 0.0009872647539237022,
"loss": 0.5115,
"step": 221
},
{
"epoch": 0.2,
"learning_rate": 0.0009870995413367397,
"loss": 0.5024,
"step": 222
},
{
"epoch": 0.2,
"learning_rate": 0.000986933278008438,
"loss": 0.5083,
"step": 223
},
{
"epoch": 0.2,
"learning_rate": 0.000986765964297451,
"loss": 0.5168,
"step": 224
},
{
"epoch": 0.2,
"learning_rate": 0.0009865976005646972,
"loss": 0.501,
"step": 225
},
{
"epoch": 0.2,
"learning_rate": 0.0009864281871733607,
"loss": 0.5173,
"step": 226
},
{
"epoch": 0.21,
"learning_rate": 0.0009862577244888896,
"loss": 0.5115,
"step": 227
},
{
"epoch": 0.21,
"learning_rate": 0.0009860862128789954,
"loss": 0.5122,
"step": 228
},
{
"epoch": 0.21,
"learning_rate": 0.0009859136527136526,
"loss": 0.5005,
"step": 229
},
{
"epoch": 0.21,
"learning_rate": 0.0009857400443650975,
"loss": 0.5183,
"step": 230
},
{
"epoch": 0.21,
"learning_rate": 0.0009855653882078275,
"loss": 0.4907,
"step": 231
},
{
"epoch": 0.21,
"learning_rate": 0.0009853896846185998,
"loss": 0.5007,
"step": 232
},
{
"epoch": 0.21,
"learning_rate": 0.000985212933976432,
"loss": 0.5066,
"step": 233
},
{
"epoch": 0.21,
"learning_rate": 0.0009850351366625992,
"loss": 0.5173,
"step": 234
},
{
"epoch": 0.21,
"learning_rate": 0.0009848562930606353,
"loss": 0.5093,
"step": 235
},
{
"epoch": 0.21,
"learning_rate": 0.0009846764035563306,
"loss": 0.49,
"step": 236
},
{
"epoch": 0.21,
"learning_rate": 0.0009844954685377316,
"loss": 0.5122,
"step": 237
},
{
"epoch": 0.22,
"learning_rate": 0.0009843134883951406,
"loss": 0.4688,
"step": 238
},
{
"epoch": 0.22,
"learning_rate": 0.000984130463521114,
"loss": 0.4956,
"step": 239
},
{
"epoch": 0.22,
"learning_rate": 0.0009839463943104613,
"loss": 0.4878,
"step": 240
},
{
"epoch": 0.22,
"learning_rate": 0.000983761281160246,
"loss": 0.4829,
"step": 241
},
{
"epoch": 0.22,
"learning_rate": 0.0009835751244697828,
"loss": 0.5215,
"step": 242
},
{
"epoch": 0.22,
"learning_rate": 0.0009833879246406371,
"loss": 0.4678,
"step": 243
},
{
"epoch": 0.22,
"learning_rate": 0.0009831996820766254,
"loss": 0.4856,
"step": 244
},
{
"epoch": 0.22,
"learning_rate": 0.0009830103971838126,
"loss": 0.4917,
"step": 245
},
{
"epoch": 0.22,
"learning_rate": 0.0009828200703705127,
"loss": 0.4912,
"step": 246
},
{
"epoch": 0.22,
"learning_rate": 0.0009826287020472872,
"loss": 0.5112,
"step": 247
},
{
"epoch": 0.22,
"learning_rate": 0.000982436292626944,
"loss": 0.512,
"step": 248
},
{
"epoch": 0.23,
"learning_rate": 0.000982242842524537,
"loss": 0.4966,
"step": 249
},
{
"epoch": 0.23,
"learning_rate": 0.000982048352157365,
"loss": 0.4902,
"step": 250
},
{
"epoch": 0.23,
"learning_rate": 0.0009818528219449705,
"loss": 0.4861,
"step": 251
},
{
"epoch": 0.23,
"learning_rate": 0.0009816562523091393,
"loss": 0.4954,
"step": 252
},
{
"epoch": 0.23,
"learning_rate": 0.0009814586436738997,
"loss": 0.5303,
"step": 253
},
{
"epoch": 0.23,
"learning_rate": 0.0009812599964655207,
"loss": 0.4961,
"step": 254
},
{
"epoch": 0.23,
"learning_rate": 0.000981060311112512,
"loss": 0.4841,
"step": 255
},
{
"epoch": 0.23,
"learning_rate": 0.0009808595880456226,
"loss": 0.4858,
"step": 256
},
{
"epoch": 0.23,
"learning_rate": 0.00098065782769784,
"loss": 0.5027,
"step": 257
},
{
"epoch": 0.23,
"learning_rate": 0.0009804550305043895,
"loss": 0.4829,
"step": 258
},
{
"epoch": 0.23,
"learning_rate": 0.0009802511969027325,
"loss": 0.4653,
"step": 259
},
{
"epoch": 0.24,
"learning_rate": 0.0009800463273325665,
"loss": 0.4827,
"step": 260
},
{
"epoch": 0.24,
"learning_rate": 0.0009798404222358236,
"loss": 0.4893,
"step": 261
},
{
"epoch": 0.24,
"learning_rate": 0.0009796334820566697,
"loss": 0.4888,
"step": 262
},
{
"epoch": 0.24,
"learning_rate": 0.0009794255072415035,
"loss": 0.4919,
"step": 263
},
{
"epoch": 0.24,
"learning_rate": 0.0009792164982389557,
"loss": 0.4646,
"step": 264
},
{
"epoch": 0.24,
"learning_rate": 0.0009790064554998875,
"loss": 0.4788,
"step": 265
},
{
"epoch": 0.24,
"learning_rate": 0.0009787953794773902,
"loss": 0.4612,
"step": 266
},
{
"epoch": 0.24,
"learning_rate": 0.0009785832706267843,
"loss": 0.5146,
"step": 267
},
{
"epoch": 0.24,
"learning_rate": 0.000978370129405618,
"loss": 0.4663,
"step": 268
},
{
"epoch": 0.24,
"learning_rate": 0.0009781559562736665,
"loss": 0.4614,
"step": 269
},
{
"epoch": 0.24,
"learning_rate": 0.0009779407516929308,
"loss": 0.4583,
"step": 270
},
{
"epoch": 0.25,
"learning_rate": 0.000977724516127637,
"loss": 0.467,
"step": 271
},
{
"epoch": 0.25,
"learning_rate": 0.0009775072500442358,
"loss": 0.4653,
"step": 272
},
{
"epoch": 0.25,
"learning_rate": 0.0009772889539114,
"loss": 0.488,
"step": 273
},
{
"epoch": 0.25,
"learning_rate": 0.0009770696282000244,
"loss": 0.5039,
"step": 274
},
{
"epoch": 0.25,
"learning_rate": 0.0009768492733832252,
"loss": 0.4761,
"step": 275
},
{
"epoch": 0.25,
"learning_rate": 0.0009766278899363385,
"loss": 0.4741,
"step": 276
},
{
"epoch": 0.25,
"learning_rate": 0.0009764054783369191,
"loss": 0.4573,
"step": 277
},
{
"epoch": 0.25,
"learning_rate": 0.0009761820390647393,
"loss": 0.4795,
"step": 278
},
{
"epoch": 0.25,
"learning_rate": 0.0009759575726017891,
"loss": 0.4734,
"step": 279
},
{
"epoch": 0.25,
"learning_rate": 0.0009757320794322736,
"loss": 0.4512,
"step": 280
},
{
"epoch": 0.25,
"learning_rate": 0.000975505560042613,
"loss": 0.4812,
"step": 281
},
{
"epoch": 0.26,
"learning_rate": 0.0009752780149214408,
"loss": 0.4929,
"step": 282
},
{
"epoch": 0.26,
"learning_rate": 0.0009750494445596037,
"loss": 0.4902,
"step": 283
},
{
"epoch": 0.26,
"learning_rate": 0.0009748198494501597,
"loss": 0.4675,
"step": 284
},
{
"epoch": 0.26,
"learning_rate": 0.0009745892300883772,
"loss": 0.4585,
"step": 285
},
{
"epoch": 0.26,
"learning_rate": 0.0009743575869717342,
"loss": 0.4612,
"step": 286
},
{
"epoch": 0.26,
"learning_rate": 0.000974124920599917,
"loss": 0.4348,
"step": 287
},
{
"epoch": 0.26,
"learning_rate": 0.0009738912314748194,
"loss": 0.4778,
"step": 288
},
{
"epoch": 0.26,
"learning_rate": 0.0009736565201005413,
"loss": 0.4563,
"step": 289
},
{
"epoch": 0.26,
"learning_rate": 0.0009734207869833878,
"loss": 0.4778,
"step": 290
},
{
"epoch": 0.26,
"learning_rate": 0.0009731840326318675,
"loss": 0.4421,
"step": 291
},
{
"epoch": 0.26,
"learning_rate": 0.000972946257556693,
"loss": 0.4551,
"step": 292
},
{
"epoch": 0.27,
"learning_rate": 0.0009727074622707778,
"loss": 0.457,
"step": 293
},
{
"epoch": 0.27,
"learning_rate": 0.0009724676472892365,
"loss": 0.488,
"step": 294
},
{
"epoch": 0.27,
"learning_rate": 0.0009722268131293835,
"loss": 0.4568,
"step": 295
},
{
"epoch": 0.27,
"learning_rate": 0.0009719849603107314,
"loss": 0.4448,
"step": 296
},
{
"epoch": 0.27,
"learning_rate": 0.0009717420893549903,
"loss": 0.468,
"step": 297
},
{
"epoch": 0.27,
"learning_rate": 0.0009714982007860665,
"loss": 0.467,
"step": 298
},
{
"epoch": 0.27,
"learning_rate": 0.0009712532951300615,
"loss": 0.4534,
"step": 299
},
{
"epoch": 0.27,
"learning_rate": 0.000971007372915271,
"loss": 0.4519,
"step": 300
},
{
"epoch": 0.27,
"learning_rate": 0.0009707604346721832,
"loss": 0.438,
"step": 301
},
{
"epoch": 0.27,
"learning_rate": 0.0009705124809334782,
"loss": 0.4268,
"step": 302
},
{
"epoch": 0.27,
"learning_rate": 0.0009702635122340268,
"loss": 0.4539,
"step": 303
},
{
"epoch": 0.28,
"learning_rate": 0.0009700135291108888,
"loss": 0.4792,
"step": 304
},
{
"epoch": 0.28,
"learning_rate": 0.0009697625321033127,
"loss": 0.4426,
"step": 305
},
{
"epoch": 0.28,
"learning_rate": 0.0009695105217527337,
"loss": 0.4414,
"step": 306
},
{
"epoch": 0.28,
"learning_rate": 0.0009692574986027734,
"loss": 0.439,
"step": 307
},
{
"epoch": 0.28,
"learning_rate": 0.0009690034631992375,
"loss": 0.4182,
"step": 308
},
{
"epoch": 0.28,
"learning_rate": 0.0009687484160901159,
"loss": 0.4492,
"step": 309
},
{
"epoch": 0.28,
"learning_rate": 0.0009684923578255804,
"loss": 0.4346,
"step": 310
},
{
"epoch": 0.28,
"learning_rate": 0.0009682352889579846,
"loss": 0.427,
"step": 311
},
{
"epoch": 0.28,
"learning_rate": 0.0009679772100418614,
"loss": 0.4497,
"step": 312
},
{
"epoch": 0.28,
"learning_rate": 0.000967718121633923,
"loss": 0.4224,
"step": 313
},
{
"epoch": 0.28,
"learning_rate": 0.000967458024293059,
"loss": 0.4377,
"step": 314
},
{
"epoch": 0.29,
"learning_rate": 0.0009671969185803355,
"loss": 0.4653,
"step": 315
},
{
"epoch": 0.29,
"learning_rate": 0.0009669348050589939,
"loss": 0.4375,
"step": 316
},
{
"epoch": 0.29,
"learning_rate": 0.0009666716842944492,
"loss": 0.4282,
"step": 317
},
{
"epoch": 0.29,
"learning_rate": 0.0009664075568542896,
"loss": 0.4563,
"step": 318
},
{
"epoch": 0.29,
"learning_rate": 0.0009661424233082747,
"loss": 0.4556,
"step": 319
},
{
"epoch": 0.29,
"learning_rate": 0.0009658762842283342,
"loss": 0.4285,
"step": 320
},
{
"epoch": 0.29,
"learning_rate": 0.0009656091401885671,
"loss": 0.4487,
"step": 321
},
{
"epoch": 0.29,
"learning_rate": 0.0009653409917652404,
"loss": 0.4417,
"step": 322
},
{
"epoch": 0.29,
"learning_rate": 0.0009650718395367873,
"loss": 0.4238,
"step": 323
},
{
"epoch": 0.29,
"learning_rate": 0.0009648016840838064,
"loss": 0.4565,
"step": 324
},
{
"epoch": 0.29,
"learning_rate": 0.0009645305259890606,
"loss": 0.4324,
"step": 325
},
{
"epoch": 0.3,
"learning_rate": 0.0009642583658374756,
"loss": 0.4275,
"step": 326
},
{
"epoch": 0.3,
"learning_rate": 0.0009639852042161387,
"loss": 0.4734,
"step": 327
},
{
"epoch": 0.3,
"learning_rate": 0.0009637110417142974,
"loss": 0.4392,
"step": 328
},
{
"epoch": 0.3,
"learning_rate": 0.0009634358789233581,
"loss": 0.4365,
"step": 329
},
{
"epoch": 0.3,
"learning_rate": 0.0009631597164368855,
"loss": 0.4673,
"step": 330
},
{
"epoch": 0.3,
"learning_rate": 0.0009628825548506001,
"loss": 0.4265,
"step": 331
},
{
"epoch": 0.3,
"learning_rate": 0.0009626043947623781,
"loss": 0.4307,
"step": 332
},
{
"epoch": 0.3,
"learning_rate": 0.0009623252367722494,
"loss": 0.4329,
"step": 333
},
{
"epoch": 0.3,
"learning_rate": 0.0009620450814823965,
"loss": 0.4299,
"step": 334
},
{
"epoch": 0.3,
"learning_rate": 0.0009617639294971532,
"loss": 0.425,
"step": 335
},
{
"epoch": 0.3,
"learning_rate": 0.0009614817814230036,
"loss": 0.4241,
"step": 336
},
{
"epoch": 0.31,
"learning_rate": 0.00096119863786858,
"loss": 0.4294,
"step": 337
},
{
"epoch": 0.31,
"learning_rate": 0.0009609144994446625,
"loss": 0.4194,
"step": 338
},
{
"epoch": 0.31,
"learning_rate": 0.000960629366764177,
"loss": 0.4443,
"step": 339
},
{
"epoch": 0.31,
"learning_rate": 0.0009603432404421946,
"loss": 0.4294,
"step": 340
},
{
"epoch": 0.31,
"learning_rate": 0.0009600561210959291,
"loss": 0.4421,
"step": 341
},
{
"epoch": 0.31,
"learning_rate": 0.000959768009344737,
"loss": 0.4363,
"step": 342
},
{
"epoch": 0.31,
"learning_rate": 0.0009594789058101153,
"loss": 0.4182,
"step": 343
},
{
"epoch": 0.31,
"learning_rate": 0.0009591888111157006,
"loss": 0.4314,
"step": 344
},
{
"epoch": 0.31,
"learning_rate": 0.0009588977258872672,
"loss": 0.4546,
"step": 345
},
{
"epoch": 0.31,
"learning_rate": 0.0009586056507527265,
"loss": 0.436,
"step": 346
},
{
"epoch": 0.31,
"learning_rate": 0.0009583125863421251,
"loss": 0.4463,
"step": 347
},
{
"epoch": 0.32,
"learning_rate": 0.0009580185332876437,
"loss": 0.4297,
"step": 348
},
{
"epoch": 0.32,
"learning_rate": 0.0009577234922235954,
"loss": 0.4319,
"step": 349
},
{
"epoch": 0.32,
"learning_rate": 0.0009574274637864247,
"loss": 0.4595,
"step": 350
},
{
"epoch": 0.32,
"learning_rate": 0.0009571304486147063,
"loss": 0.4219,
"step": 351
},
{
"epoch": 0.32,
"learning_rate": 0.000956832447349143,
"loss": 0.4312,
"step": 352
},
{
"epoch": 0.32,
"learning_rate": 0.0009565334606325647,
"loss": 0.3999,
"step": 353
},
{
"epoch": 0.32,
"learning_rate": 0.0009562334891099277,
"loss": 0.4216,
"step": 354
},
{
"epoch": 0.32,
"learning_rate": 0.000955932533428312,
"loss": 0.4414,
"step": 355
},
{
"epoch": 0.32,
"learning_rate": 0.0009556305942369207,
"loss": 0.4229,
"step": 356
},
{
"epoch": 0.32,
"learning_rate": 0.0009553276721870786,
"loss": 0.429,
"step": 357
},
{
"epoch": 0.32,
"learning_rate": 0.0009550237679322307,
"loss": 0.4058,
"step": 358
},
{
"epoch": 0.33,
"learning_rate": 0.0009547188821279407,
"loss": 0.4197,
"step": 359
},
{
"epoch": 0.33,
"learning_rate": 0.0009544130154318895,
"loss": 0.4329,
"step": 360
},
{
"epoch": 0.33,
"learning_rate": 0.0009541061685038741,
"loss": 0.4126,
"step": 361
},
{
"epoch": 0.33,
"learning_rate": 0.0009537983420058058,
"loss": 0.4385,
"step": 362
},
{
"epoch": 0.33,
"learning_rate": 0.0009534895366017093,
"loss": 0.4197,
"step": 363
},
{
"epoch": 0.33,
"learning_rate": 0.0009531797529577205,
"loss": 0.4104,
"step": 364
},
{
"epoch": 0.33,
"learning_rate": 0.000952868991742086,
"loss": 0.4077,
"step": 365
},
{
"epoch": 0.33,
"learning_rate": 0.0009525572536251606,
"loss": 0.394,
"step": 366
},
{
"epoch": 0.33,
"learning_rate": 0.0009522445392794069,
"loss": 0.417,
"step": 367
},
{
"epoch": 0.33,
"learning_rate": 0.0009519308493793931,
"loss": 0.3999,
"step": 368
},
{
"epoch": 0.33,
"learning_rate": 0.0009516161846017919,
"loss": 0.4097,
"step": 369
},
{
"epoch": 0.34,
"learning_rate": 0.0009513005456253788,
"loss": 0.4026,
"step": 370
},
{
"epoch": 0.34,
"learning_rate": 0.0009509839331310311,
"loss": 0.4402,
"step": 371
},
{
"epoch": 0.34,
"learning_rate": 0.0009506663478017256,
"loss": 0.3955,
"step": 372
},
{
"epoch": 0.34,
"learning_rate": 0.0009503477903225382,
"loss": 0.4346,
"step": 373
},
{
"epoch": 0.34,
"learning_rate": 0.0009500282613806416,
"loss": 0.4138,
"step": 374
},
{
"epoch": 0.34,
"learning_rate": 0.000949707761665304,
"loss": 0.416,
"step": 375
},
{
"epoch": 0.34,
"learning_rate": 0.0009493862918678879,
"loss": 0.4087,
"step": 376
},
{
"epoch": 0.34,
"learning_rate": 0.0009490638526818481,
"loss": 0.4011,
"step": 377
},
{
"epoch": 0.34,
"learning_rate": 0.0009487404448027308,
"loss": 0.3826,
"step": 378
},
{
"epoch": 0.34,
"learning_rate": 0.0009484160689281716,
"loss": 0.4241,
"step": 379
},
{
"epoch": 0.34,
"learning_rate": 0.0009480907257578946,
"loss": 0.3984,
"step": 380
},
{
"epoch": 0.35,
"learning_rate": 0.0009477644159937098,
"loss": 0.4031,
"step": 381
},
{
"epoch": 0.35,
"learning_rate": 0.0009474371403395129,
"loss": 0.4158,
"step": 382
},
{
"epoch": 0.35,
"learning_rate": 0.0009471088995012829,
"loss": 0.4109,
"step": 383
},
{
"epoch": 0.35,
"learning_rate": 0.0009467796941870808,
"loss": 0.4241,
"step": 384
},
{
"epoch": 0.35,
"learning_rate": 0.0009464495251070482,
"loss": 0.4033,
"step": 385
},
{
"epoch": 0.35,
"learning_rate": 0.0009461183929734055,
"loss": 0.4053,
"step": 386
},
{
"epoch": 0.35,
"learning_rate": 0.0009457862985004509,
"loss": 0.4268,
"step": 387
},
{
"epoch": 0.35,
"learning_rate": 0.0009454532424045583,
"loss": 0.4192,
"step": 388
},
{
"epoch": 0.35,
"learning_rate": 0.0009451192254041758,
"loss": 0.4065,
"step": 389
},
{
"epoch": 0.35,
"learning_rate": 0.0009447842482198245,
"loss": 0.4341,
"step": 390
},
{
"epoch": 0.35,
"learning_rate": 0.0009444483115740967,
"loss": 0.3955,
"step": 391
},
{
"epoch": 0.36,
"learning_rate": 0.0009441114161916545,
"loss": 0.4343,
"step": 392
},
{
"epoch": 0.36,
"learning_rate": 0.0009437735627992277,
"loss": 0.4114,
"step": 393
},
{
"epoch": 0.36,
"learning_rate": 0.000943434752125613,
"loss": 0.3845,
"step": 394
},
{
"epoch": 0.36,
"learning_rate": 0.0009430949849016726,
"loss": 0.416,
"step": 395
},
{
"epoch": 0.36,
"learning_rate": 0.000942754261860331,
"loss": 0.4209,
"step": 396
},
{
"epoch": 0.36,
"learning_rate": 0.0009424125837365754,
"loss": 0.4231,
"step": 397
},
{
"epoch": 0.36,
"learning_rate": 0.0009420699512674526,
"loss": 0.4199,
"step": 398
},
{
"epoch": 0.36,
"learning_rate": 0.0009417263651920689,
"loss": 0.408,
"step": 399
},
{
"epoch": 0.36,
"learning_rate": 0.0009413818262515869,
"loss": 0.4204,
"step": 400
},
{
"epoch": 0.36,
"learning_rate": 0.000941036335189225,
"loss": 0.4192,
"step": 401
},
{
"epoch": 0.36,
"learning_rate": 0.0009406898927502554,
"loss": 0.3967,
"step": 402
},
{
"epoch": 0.37,
"learning_rate": 0.0009403424996820022,
"loss": 0.3909,
"step": 403
},
{
"epoch": 0.37,
"learning_rate": 0.0009399941567338411,
"loss": 0.3975,
"step": 404
},
{
"epoch": 0.37,
"learning_rate": 0.0009396448646571958,
"loss": 0.4209,
"step": 405
},
{
"epoch": 0.37,
"learning_rate": 0.0009392946242055378,
"loss": 0.415,
"step": 406
},
{
"epoch": 0.37,
"learning_rate": 0.0009389434361343844,
"loss": 0.416,
"step": 407
},
{
"epoch": 0.37,
"learning_rate": 0.0009385913012012972,
"loss": 0.4221,
"step": 408
},
{
"epoch": 0.37,
"learning_rate": 0.0009382382201658799,
"loss": 0.4082,
"step": 409
},
{
"epoch": 0.37,
"learning_rate": 0.0009378841937897776,
"loss": 0.4075,
"step": 410
},
{
"epoch": 0.37,
"learning_rate": 0.0009375292228366741,
"loss": 0.4182,
"step": 411
},
{
"epoch": 0.37,
"learning_rate": 0.0009371733080722911,
"loss": 0.3977,
"step": 412
},
{
"epoch": 0.37,
"learning_rate": 0.0009368164502643861,
"loss": 0.4167,
"step": 413
},
{
"epoch": 0.38,
"learning_rate": 0.0009364586501827512,
"loss": 0.3977,
"step": 414
},
{
"epoch": 0.38,
"learning_rate": 0.0009360999085992105,
"loss": 0.4001,
"step": 415
},
{
"epoch": 0.38,
"learning_rate": 0.0009357402262876198,
"loss": 0.407,
"step": 416
},
{
"epoch": 0.38,
"learning_rate": 0.0009353796040238633,
"loss": 0.4153,
"step": 417
},
{
"epoch": 0.38,
"learning_rate": 0.0009350180425858537,
"loss": 0.3962,
"step": 418
},
{
"epoch": 0.38,
"learning_rate": 0.000934655542753529,
"loss": 0.3899,
"step": 419
},
{
"epoch": 0.38,
"learning_rate": 0.0009342921053088518,
"loss": 0.3977,
"step": 420
},
{
"epoch": 0.38,
"learning_rate": 0.000933927731035807,
"loss": 0.3965,
"step": 421
},
{
"epoch": 0.38,
"learning_rate": 0.0009335624207204006,
"loss": 0.408,
"step": 422
},
{
"epoch": 0.38,
"learning_rate": 0.0009331961751506576,
"loss": 0.4006,
"step": 423
},
{
"epoch": 0.38,
"learning_rate": 0.0009328289951166204,
"loss": 0.4089,
"step": 424
},
{
"epoch": 0.39,
"learning_rate": 0.0009324608814103477,
"loss": 0.4094,
"step": 425
},
{
"epoch": 0.39,
"learning_rate": 0.0009320918348259115,
"loss": 0.4233,
"step": 426
},
{
"epoch": 0.39,
"learning_rate": 0.0009317218561593969,
"loss": 0.3977,
"step": 427
},
{
"epoch": 0.39,
"learning_rate": 0.0009313509462088989,
"loss": 0.4192,
"step": 428
},
{
"epoch": 0.39,
"learning_rate": 0.0009309791057745218,
"loss": 0.394,
"step": 429
},
{
"epoch": 0.39,
"learning_rate": 0.0009306063356583771,
"loss": 0.377,
"step": 430
},
{
"epoch": 0.39,
"learning_rate": 0.0009302326366645819,
"loss": 0.4155,
"step": 431
},
{
"epoch": 0.39,
"learning_rate": 0.0009298580095992566,
"loss": 0.3777,
"step": 432
},
{
"epoch": 0.39,
"learning_rate": 0.0009294824552705238,
"loss": 0.3853,
"step": 433
},
{
"epoch": 0.39,
"learning_rate": 0.0009291059744885063,
"loss": 0.4028,
"step": 434
},
{
"epoch": 0.39,
"learning_rate": 0.0009287285680653254,
"loss": 0.4084,
"step": 435
},
{
"epoch": 0.4,
"learning_rate": 0.0009283502368150994,
"loss": 0.4055,
"step": 436
},
{
"epoch": 0.4,
"learning_rate": 0.0009279709815539412,
"loss": 0.3779,
"step": 437
},
{
"epoch": 0.4,
"learning_rate": 0.0009275908030999569,
"loss": 0.4043,
"step": 438
},
{
"epoch": 0.4,
"learning_rate": 0.0009272097022732444,
"loss": 0.3623,
"step": 439
},
{
"epoch": 0.4,
"learning_rate": 0.0009268276798958907,
"loss": 0.3799,
"step": 440
},
{
"epoch": 0.4,
"learning_rate": 0.0009264447367919717,
"loss": 0.4004,
"step": 441
},
{
"epoch": 0.4,
"learning_rate": 0.0009260608737875486,
"loss": 0.3774,
"step": 442
},
{
"epoch": 0.4,
"learning_rate": 0.0009256760917106671,
"loss": 0.407,
"step": 443
},
{
"epoch": 0.4,
"learning_rate": 0.0009252903913913556,
"loss": 0.408,
"step": 444
},
{
"epoch": 0.4,
"learning_rate": 0.0009249037736616234,
"loss": 0.3984,
"step": 445
},
{
"epoch": 0.4,
"learning_rate": 0.0009245162393554582,
"loss": 0.3774,
"step": 446
},
{
"epoch": 0.41,
"learning_rate": 0.0009241277893088258,
"loss": 0.3989,
"step": 447
},
{
"epoch": 0.41,
"learning_rate": 0.0009237384243596667,
"loss": 0.3918,
"step": 448
},
{
"epoch": 0.41,
"learning_rate": 0.000923348145347895,
"loss": 0.3857,
"step": 449
},
{
"epoch": 0.41,
"learning_rate": 0.000922956953115397,
"loss": 0.3752,
"step": 450
},
{
"epoch": 0.41,
"learning_rate": 0.0009225648485060283,
"loss": 0.3948,
"step": 451
},
{
"epoch": 0.41,
"learning_rate": 0.0009221718323656132,
"loss": 0.3721,
"step": 452
},
{
"epoch": 0.41,
"learning_rate": 0.0009217779055419421,
"loss": 0.4028,
"step": 453
},
{
"epoch": 0.41,
"learning_rate": 0.0009213830688847699,
"loss": 0.3796,
"step": 454
},
{
"epoch": 0.41,
"learning_rate": 0.0009209873232458139,
"loss": 0.3906,
"step": 455
},
{
"epoch": 0.41,
"learning_rate": 0.0009205906694787525,
"loss": 0.3904,
"step": 456
},
{
"epoch": 0.41,
"learning_rate": 0.0009201931084392229,
"loss": 0.375,
"step": 457
},
{
"epoch": 0.42,
"learning_rate": 0.0009197946409848195,
"loss": 0.4016,
"step": 458
},
{
"epoch": 0.42,
"learning_rate": 0.0009193952679750919,
"loss": 0.3694,
"step": 459
},
{
"epoch": 0.42,
"learning_rate": 0.0009189949902715431,
"loss": 0.3652,
"step": 460
},
{
"epoch": 0.42,
"learning_rate": 0.0009185938087376277,
"loss": 0.3835,
"step": 461
},
{
"epoch": 0.42,
"learning_rate": 0.0009181917242387501,
"loss": 0.3899,
"step": 462
},
{
"epoch": 0.42,
"learning_rate": 0.0009177887376422624,
"loss": 0.3943,
"step": 463
},
{
"epoch": 0.42,
"learning_rate": 0.0009173848498174623,
"loss": 0.3508,
"step": 464
},
{
"epoch": 0.42,
"learning_rate": 0.0009169800616355921,
"loss": 0.366,
"step": 465
},
{
"epoch": 0.42,
"learning_rate": 0.0009165743739698363,
"loss": 0.3875,
"step": 466
},
{
"epoch": 0.42,
"learning_rate": 0.0009161677876953193,
"loss": 0.3833,
"step": 467
},
{
"epoch": 0.42,
"learning_rate": 0.0009157603036891042,
"loss": 0.3748,
"step": 468
},
{
"epoch": 0.43,
"learning_rate": 0.0009153519228301905,
"loss": 0.3711,
"step": 469
},
{
"epoch": 0.43,
"learning_rate": 0.0009149426459995126,
"loss": 0.3792,
"step": 470
},
{
"epoch": 0.43,
"learning_rate": 0.0009145324740799372,
"loss": 0.3542,
"step": 471
},
{
"epoch": 0.43,
"learning_rate": 0.0009141214079562625,
"loss": 0.3884,
"step": 472
},
{
"epoch": 0.43,
"learning_rate": 0.0009137094485152146,
"loss": 0.3774,
"step": 473
},
{
"epoch": 0.43,
"learning_rate": 0.0009132965966454476,
"loss": 0.4006,
"step": 474
},
{
"epoch": 0.43,
"learning_rate": 0.0009128828532375404,
"loss": 0.3755,
"step": 475
},
{
"epoch": 0.43,
"learning_rate": 0.0009124682191839946,
"loss": 0.3684,
"step": 476
},
{
"epoch": 0.43,
"learning_rate": 0.0009120526953792336,
"loss": 0.3545,
"step": 477
},
{
"epoch": 0.43,
"learning_rate": 0.0009116362827196001,
"loss": 0.3806,
"step": 478
},
{
"epoch": 0.43,
"learning_rate": 0.0009112189821033538,
"loss": 0.3958,
"step": 479
},
{
"epoch": 0.44,
"learning_rate": 0.0009108007944306702,
"loss": 0.366,
"step": 480
},
{
"epoch": 0.44,
"learning_rate": 0.0009103817206036382,
"loss": 0.3772,
"step": 481
},
{
"epoch": 0.44,
"learning_rate": 0.0009099617615262581,
"loss": 0.3896,
"step": 482
},
{
"epoch": 0.44,
"learning_rate": 0.0009095409181044398,
"loss": 0.3923,
"step": 483
},
{
"epoch": 0.44,
"learning_rate": 0.0009091191912460014,
"loss": 0.4026,
"step": 484
},
{
"epoch": 0.44,
"learning_rate": 0.000908696581860666,
"loss": 0.3699,
"step": 485
},
{
"epoch": 0.44,
"learning_rate": 0.0009082730908600607,
"loss": 0.3806,
"step": 486
},
{
"epoch": 0.44,
"learning_rate": 0.0009078487191577144,
"loss": 0.3804,
"step": 487
},
{
"epoch": 0.44,
"learning_rate": 0.000907423467669056,
"loss": 0.3955,
"step": 488
},
{
"epoch": 0.44,
"learning_rate": 0.0009069973373114118,
"loss": 0.3965,
"step": 489
},
{
"epoch": 0.44,
"learning_rate": 0.0009065703290040042,
"loss": 0.3911,
"step": 490
},
{
"epoch": 0.45,
"learning_rate": 0.0009061424436679494,
"loss": 0.3958,
"step": 491
},
{
"epoch": 0.45,
"learning_rate": 0.0009057136822262555,
"loss": 0.3574,
"step": 492
},
{
"epoch": 0.45,
"learning_rate": 0.0009052840456038203,
"loss": 0.3804,
"step": 493
},
{
"epoch": 0.45,
"learning_rate": 0.0009048535347274298,
"loss": 0.3833,
"step": 494
},
{
"epoch": 0.45,
"learning_rate": 0.0009044221505257558,
"loss": 0.3765,
"step": 495
},
{
"epoch": 0.45,
"learning_rate": 0.0009039898939293538,
"loss": 0.3706,
"step": 496
},
{
"epoch": 0.45,
"learning_rate": 0.0009035567658706613,
"loss": 0.3862,
"step": 497
},
{
"epoch": 0.45,
"learning_rate": 0.0009031227672839957,
"loss": 0.3621,
"step": 498
},
{
"epoch": 0.45,
"learning_rate": 0.000902687899105552,
"loss": 0.3745,
"step": 499
},
{
"epoch": 0.45,
"learning_rate": 0.0009022521622734017,
"loss": 0.3928,
"step": 500
},
{
"epoch": 0.45,
"learning_rate": 0.0009018155577274891,
"loss": 0.3872,
"step": 501
},
{
"epoch": 0.46,
"learning_rate": 0.0009013780864096313,
"loss": 0.3936,
"step": 502
},
{
"epoch": 0.46,
"learning_rate": 0.0009009397492635143,
"loss": 0.3767,
"step": 503
},
{
"epoch": 0.46,
"learning_rate": 0.0009005005472346923,
"loss": 0.3892,
"step": 504
},
{
"epoch": 0.46,
"learning_rate": 0.0009000604812705853,
"loss": 0.3787,
"step": 505
},
{
"epoch": 0.46,
"learning_rate": 0.0008996195523204762,
"loss": 0.3728,
"step": 506
},
{
"epoch": 0.46,
"learning_rate": 0.0008991777613355103,
"loss": 0.3958,
"step": 507
},
{
"epoch": 0.46,
"learning_rate": 0.0008987351092686922,
"loss": 0.3804,
"step": 508
},
{
"epoch": 0.46,
"learning_rate": 0.0008982915970748836,
"loss": 0.396,
"step": 509
},
{
"epoch": 0.46,
"learning_rate": 0.000897847225710802,
"loss": 0.3784,
"step": 510
},
{
"epoch": 0.46,
"learning_rate": 0.000897401996135018,
"loss": 0.3848,
"step": 511
},
{
"epoch": 0.46,
"learning_rate": 0.0008969559093079538,
"loss": 0.3767,
"step": 512
},
{
"epoch": 0.47,
"learning_rate": 0.0008965089661918805,
"loss": 0.3755,
"step": 513
},
{
"epoch": 0.47,
"learning_rate": 0.0008960611677509165,
"loss": 0.365,
"step": 514
},
{
"epoch": 0.47,
"learning_rate": 0.0008956125149510252,
"loss": 0.3706,
"step": 515
},
{
"epoch": 0.47,
"learning_rate": 0.0008951630087600129,
"loss": 0.3867,
"step": 516
},
{
"epoch": 0.47,
"learning_rate": 0.000894712650147527,
"loss": 0.4011,
"step": 517
},
{
"epoch": 0.47,
"learning_rate": 0.0008942614400850535,
"loss": 0.3674,
"step": 518
},
{
"epoch": 0.47,
"learning_rate": 0.0008938093795459151,
"loss": 0.3655,
"step": 519
},
{
"epoch": 0.47,
"learning_rate": 0.0008933564695052692,
"loss": 0.3748,
"step": 520
},
{
"epoch": 0.47,
"learning_rate": 0.0008929027109401055,
"loss": 0.3823,
"step": 521
},
{
"epoch": 0.47,
"learning_rate": 0.0008924481048292446,
"loss": 0.3818,
"step": 522
},
{
"epoch": 0.47,
"learning_rate": 0.0008919926521533346,
"loss": 0.3599,
"step": 523
},
{
"epoch": 0.48,
"learning_rate": 0.0008915363538948502,
"loss": 0.3696,
"step": 524
},
{
"epoch": 0.48,
"learning_rate": 0.0008910792110380903,
"loss": 0.3799,
"step": 525
},
{
"epoch": 0.48,
"learning_rate": 0.0008906212245691754,
"loss": 0.3738,
"step": 526
},
{
"epoch": 0.48,
"learning_rate": 0.0008901623954760459,
"loss": 0.3765,
"step": 527
},
{
"epoch": 0.48,
"learning_rate": 0.00088970272474846,
"loss": 0.3647,
"step": 528
},
{
"epoch": 0.48,
"learning_rate": 0.0008892422133779909,
"loss": 0.3708,
"step": 529
},
{
"epoch": 0.48,
"learning_rate": 0.000888780862358026,
"loss": 0.3682,
"step": 530
},
{
"epoch": 0.48,
"learning_rate": 0.0008883186726837631,
"loss": 0.3579,
"step": 531
},
{
"epoch": 0.48,
"learning_rate": 0.0008878556453522099,
"loss": 0.3564,
"step": 532
},
{
"epoch": 0.48,
"learning_rate": 0.0008873917813621803,
"loss": 0.3594,
"step": 533
},
{
"epoch": 0.48,
"learning_rate": 0.0008869270817142936,
"loss": 0.3601,
"step": 534
},
{
"epoch": 0.49,
"learning_rate": 0.0008864615474109714,
"loss": 0.3796,
"step": 535
},
{
"epoch": 0.49,
"learning_rate": 0.0008859951794564356,
"loss": 0.3528,
"step": 536
},
{
"epoch": 0.49,
"learning_rate": 0.0008855279788567068,
"loss": 0.3794,
"step": 537
},
{
"epoch": 0.49,
"learning_rate": 0.0008850599466196016,
"loss": 0.3535,
"step": 538
},
{
"epoch": 0.49,
"learning_rate": 0.0008845910837547305,
"loss": 0.3359,
"step": 539
},
{
"epoch": 0.49,
"learning_rate": 0.0008841213912734958,
"loss": 0.3799,
"step": 540
},
{
"epoch": 0.49,
"learning_rate": 0.0008836508701890892,
"loss": 0.3633,
"step": 541
},
{
"epoch": 0.49,
"learning_rate": 0.0008831795215164901,
"loss": 0.3711,
"step": 542
},
{
"epoch": 0.49,
"learning_rate": 0.0008827073462724633,
"loss": 0.3889,
"step": 543
},
{
"epoch": 0.49,
"learning_rate": 0.0008822343454755561,
"loss": 0.387,
"step": 544
},
{
"epoch": 0.49,
"learning_rate": 0.0008817605201460969,
"loss": 0.3782,
"step": 545
},
{
"epoch": 0.5,
"learning_rate": 0.0008812858713061927,
"loss": 0.3704,
"step": 546
},
{
"epoch": 0.5,
"learning_rate": 0.000880810399979727,
"loss": 0.354,
"step": 547
},
{
"epoch": 0.5,
"learning_rate": 0.0008803341071923575,
"loss": 0.3533,
"step": 548
},
{
"epoch": 0.5,
"learning_rate": 0.0008798569939715139,
"loss": 0.3806,
"step": 549
},
{
"epoch": 0.5,
"learning_rate": 0.0008793790613463954,
"loss": 0.3452,
"step": 550
},
{
"epoch": 0.5,
"learning_rate": 0.0008789003103479694,
"loss": 0.3743,
"step": 551
},
{
"epoch": 0.5,
"learning_rate": 0.000878420742008968,
"loss": 0.3521,
"step": 552
},
{
"epoch": 0.5,
"learning_rate": 0.0008779403573638869,
"loss": 0.3696,
"step": 553
},
{
"epoch": 0.5,
"learning_rate": 0.0008774591574489823,
"loss": 0.3674,
"step": 554
},
{
"epoch": 0.5,
"learning_rate": 0.0008769771433022693,
"loss": 0.3765,
"step": 555
},
{
"epoch": 0.5,
"learning_rate": 0.0008764943159635193,
"loss": 0.3428,
"step": 556
},
{
"epoch": 0.5,
"learning_rate": 0.0008760106764742579,
"loss": 0.3442,
"step": 557
},
{
"epoch": 0.51,
"learning_rate": 0.0008755262258777626,
"loss": 0.3386,
"step": 558
},
{
"epoch": 0.51,
"learning_rate": 0.0008750409652190607,
"loss": 0.3452,
"step": 559
},
{
"epoch": 0.51,
"learning_rate": 0.0008745548955449269,
"loss": 0.3735,
"step": 560
},
{
"epoch": 0.51,
"learning_rate": 0.0008740680179038807,
"loss": 0.3513,
"step": 561
},
{
"epoch": 0.51,
"learning_rate": 0.000873580333346185,
"loss": 0.3464,
"step": 562
},
{
"epoch": 0.51,
"learning_rate": 0.0008730918429238428,
"loss": 0.3711,
"step": 563
},
{
"epoch": 0.51,
"learning_rate": 0.0008726025476905961,
"loss": 0.3806,
"step": 564
},
{
"epoch": 0.51,
"learning_rate": 0.0008721124487019225,
"loss": 0.3557,
"step": 565
},
{
"epoch": 0.51,
"learning_rate": 0.0008716215470150335,
"loss": 0.3613,
"step": 566
},
{
"epoch": 0.51,
"learning_rate": 0.0008711298436888723,
"loss": 0.3833,
"step": 567
},
{
"epoch": 0.51,
"learning_rate": 0.0008706373397841114,
"loss": 0.3496,
"step": 568
},
{
"epoch": 0.52,
"learning_rate": 0.0008701440363631499,
"loss": 0.3379,
"step": 569
},
{
"epoch": 0.52,
"learning_rate": 0.0008696499344901115,
"loss": 0.3811,
"step": 570
},
{
"epoch": 0.52,
"learning_rate": 0.000869155035230843,
"loss": 0.3596,
"step": 571
},
{
"epoch": 0.52,
"learning_rate": 0.0008686593396529108,
"loss": 0.3616,
"step": 572
},
{
"epoch": 0.52,
"learning_rate": 0.0008681628488255986,
"loss": 0.3574,
"step": 573
},
{
"epoch": 0.52,
"learning_rate": 0.0008676655638199067,
"loss": 0.3699,
"step": 574
},
{
"epoch": 0.52,
"learning_rate": 0.0008671674857085473,
"loss": 0.3574,
"step": 575
},
{
"epoch": 0.52,
"learning_rate": 0.0008666686155659445,
"loss": 0.3462,
"step": 576
},
{
"epoch": 0.52,
"learning_rate": 0.0008661689544682301,
"loss": 0.3652,
"step": 577
},
{
"epoch": 0.52,
"learning_rate": 0.0008656685034932428,
"loss": 0.3269,
"step": 578
},
{
"epoch": 0.52,
"learning_rate": 0.0008651672637205245,
"loss": 0.3525,
"step": 579
},
{
"epoch": 0.53,
"learning_rate": 0.0008646652362313193,
"loss": 0.3845,
"step": 580
},
{
"epoch": 0.53,
"learning_rate": 0.0008641624221085699,
"loss": 0.3711,
"step": 581
},
{
"epoch": 0.53,
"learning_rate": 0.0008636588224369163,
"loss": 0.3477,
"step": 582
},
{
"epoch": 0.53,
"learning_rate": 0.0008631544383026931,
"loss": 0.3406,
"step": 583
},
{
"epoch": 0.53,
"learning_rate": 0.0008626492707939264,
"loss": 0.3618,
"step": 584
},
{
"epoch": 0.53,
"learning_rate": 0.0008621433210003333,
"loss": 0.364,
"step": 585
},
{
"epoch": 0.53,
"learning_rate": 0.0008616365900133174,
"loss": 0.3662,
"step": 586
},
{
"epoch": 0.53,
"learning_rate": 0.0008611290789259676,
"loss": 0.3464,
"step": 587
},
{
"epoch": 0.53,
"learning_rate": 0.0008606207888330562,
"loss": 0.3894,
"step": 588
},
{
"epoch": 0.53,
"learning_rate": 0.0008601117208310352,
"loss": 0.364,
"step": 589
},
{
"epoch": 0.53,
"learning_rate": 0.0008596018760180351,
"loss": 0.3667,
"step": 590
},
{
"epoch": 0.54,
"learning_rate": 0.0008590912554938619,
"loss": 0.3633,
"step": 591
},
{
"epoch": 0.54,
"learning_rate": 0.000858579860359995,
"loss": 0.3665,
"step": 592
},
{
"epoch": 0.54,
"learning_rate": 0.0008580676917195846,
"loss": 0.375,
"step": 593
},
{
"epoch": 0.54,
"learning_rate": 0.0008575547506774497,
"loss": 0.3501,
"step": 594
},
{
"epoch": 0.54,
"learning_rate": 0.0008570410383400753,
"loss": 0.3345,
"step": 595
},
{
"epoch": 0.54,
"learning_rate": 0.0008565265558156101,
"loss": 0.3589,
"step": 596
},
{
"epoch": 0.54,
"learning_rate": 0.0008560113042138647,
"loss": 0.3496,
"step": 597
},
{
"epoch": 0.54,
"learning_rate": 0.000855495284646308,
"loss": 0.3516,
"step": 598
},
{
"epoch": 0.54,
"learning_rate": 0.0008549784982260658,
"loss": 0.3462,
"step": 599
},
{
"epoch": 0.54,
"learning_rate": 0.0008544609460679184,
"loss": 0.3447,
"step": 600
},
{
"epoch": 0.54,
"learning_rate": 0.0008539426292882975,
"loss": 0.3472,
"step": 601
},
{
"epoch": 0.55,
"learning_rate": 0.0008534235490052846,
"loss": 0.3408,
"step": 602
},
{
"epoch": 0.55,
"learning_rate": 0.0008529037063386076,
"loss": 0.3669,
"step": 603
},
{
"epoch": 0.55,
"learning_rate": 0.0008523831024096395,
"loss": 0.3494,
"step": 604
},
{
"epoch": 0.55,
"learning_rate": 0.0008518617383413954,
"loss": 0.3689,
"step": 605
},
{
"epoch": 0.55,
"learning_rate": 0.0008513396152585296,
"loss": 0.3408,
"step": 606
},
{
"epoch": 0.55,
"learning_rate": 0.0008508167342873342,
"loss": 0.3701,
"step": 607
},
{
"epoch": 0.55,
"learning_rate": 0.0008502930965557363,
"loss": 0.3535,
"step": 608
},
{
"epoch": 0.55,
"learning_rate": 0.000849768703193295,
"loss": 0.3684,
"step": 609
},
{
"epoch": 0.55,
"learning_rate": 0.0008492435553311995,
"loss": 0.3757,
"step": 610
},
{
"epoch": 0.55,
"learning_rate": 0.000848717654102267,
"loss": 0.3679,
"step": 611
},
{
"epoch": 0.55,
"learning_rate": 0.000848191000640939,
"loss": 0.373,
"step": 612
},
{
"epoch": 0.56,
"learning_rate": 0.0008476635960832804,
"loss": 0.3406,
"step": 613
},
{
"epoch": 0.56,
"learning_rate": 0.0008471354415669761,
"loss": 0.3508,
"step": 614
},
{
"epoch": 0.56,
"learning_rate": 0.0008466065382313286,
"loss": 0.3459,
"step": 615
},
{
"epoch": 0.56,
"learning_rate": 0.0008460768872172557,
"loss": 0.3435,
"step": 616
},
{
"epoch": 0.56,
"learning_rate": 0.0008455464896672887,
"loss": 0.3513,
"step": 617
},
{
"epoch": 0.56,
"learning_rate": 0.0008450153467255681,
"loss": 0.3547,
"step": 618
},
{
"epoch": 0.56,
"learning_rate": 0.0008444834595378434,
"loss": 0.3616,
"step": 619
},
{
"epoch": 0.56,
"learning_rate": 0.0008439508292514688,
"loss": 0.334,
"step": 620
},
{
"epoch": 0.56,
"learning_rate": 0.000843417457015402,
"loss": 0.324,
"step": 621
},
{
"epoch": 0.56,
"learning_rate": 0.0008428833439802011,
"loss": 0.3513,
"step": 622
},
{
"epoch": 0.56,
"learning_rate": 0.0008423484912980217,
"loss": 0.3508,
"step": 623
},
{
"epoch": 0.57,
"learning_rate": 0.0008418129001226156,
"loss": 0.363,
"step": 624
},
{
"epoch": 0.57,
"learning_rate": 0.0008412765716093271,
"loss": 0.3413,
"step": 625
},
{
"epoch": 0.57,
"learning_rate": 0.0008407395069150914,
"loss": 0.3442,
"step": 626
},
{
"epoch": 0.57,
"learning_rate": 0.0008402017071984316,
"loss": 0.3752,
"step": 627
},
{
"epoch": 0.57,
"learning_rate": 0.0008396631736194562,
"loss": 0.3186,
"step": 628
},
{
"epoch": 0.57,
"learning_rate": 0.0008391239073398573,
"loss": 0.3577,
"step": 629
},
{
"epoch": 0.57,
"learning_rate": 0.0008385839095229066,
"loss": 0.3315,
"step": 630
},
{
"epoch": 0.57,
"learning_rate": 0.0008380431813334548,
"loss": 0.3364,
"step": 631
},
{
"epoch": 0.57,
"learning_rate": 0.0008375017239379276,
"loss": 0.3652,
"step": 632
},
{
"epoch": 0.57,
"learning_rate": 0.0008369595385043235,
"loss": 0.3213,
"step": 633
},
{
"epoch": 0.57,
"learning_rate": 0.0008364166262022119,
"loss": 0.355,
"step": 634
},
{
"epoch": 0.58,
"learning_rate": 0.0008358729882027302,
"loss": 0.3491,
"step": 635
},
{
"epoch": 0.58,
"learning_rate": 0.0008353286256785811,
"loss": 0.3667,
"step": 636
},
{
"epoch": 0.58,
"learning_rate": 0.0008347835398040297,
"loss": 0.3669,
"step": 637
},
{
"epoch": 0.58,
"learning_rate": 0.0008342377317549022,
"loss": 0.3433,
"step": 638
},
{
"epoch": 0.58,
"learning_rate": 0.0008336912027085821,
"loss": 0.3352,
"step": 639
},
{
"epoch": 0.58,
"learning_rate": 0.0008331439538440088,
"loss": 0.3523,
"step": 640
},
{
"epoch": 0.58,
"learning_rate": 0.0008325959863416741,
"loss": 0.3538,
"step": 641
},
{
"epoch": 0.58,
"learning_rate": 0.0008320473013836196,
"loss": 0.3633,
"step": 642
},
{
"epoch": 0.58,
"learning_rate": 0.000831497900153435,
"loss": 0.3701,
"step": 643
},
{
"epoch": 0.58,
"learning_rate": 0.0008309477838362554,
"loss": 0.3699,
"step": 644
},
{
"epoch": 0.58,
"learning_rate": 0.0008303969536187577,
"loss": 0.3569,
"step": 645
},
{
"epoch": 0.59,
"learning_rate": 0.0008298454106891593,
"loss": 0.3474,
"step": 646
},
{
"epoch": 0.59,
"learning_rate": 0.0008292931562372148,
"loss": 0.3503,
"step": 647
},
{
"epoch": 0.59,
"learning_rate": 0.0008287401914542138,
"loss": 0.3596,
"step": 648
},
{
"epoch": 0.59,
"learning_rate": 0.0008281865175329782,
"loss": 0.3518,
"step": 649
},
{
"epoch": 0.59,
"learning_rate": 0.0008276321356678593,
"loss": 0.3677,
"step": 650
},
{
"epoch": 0.59,
"learning_rate": 0.0008270770470547359,
"loss": 0.3533,
"step": 651
},
{
"epoch": 0.59,
"learning_rate": 0.0008265212528910113,
"loss": 0.3428,
"step": 652
},
{
"epoch": 0.59,
"learning_rate": 0.0008259647543756105,
"loss": 0.3394,
"step": 653
},
{
"epoch": 0.59,
"learning_rate": 0.0008254075527089781,
"loss": 0.3469,
"step": 654
},
{
"epoch": 0.59,
"learning_rate": 0.0008248496490930754,
"loss": 0.3319,
"step": 655
},
{
"epoch": 0.59,
"learning_rate": 0.0008242910447313781,
"loss": 0.3418,
"step": 656
},
{
"epoch": 0.6,
"learning_rate": 0.0008237317408288731,
"loss": 0.3337,
"step": 657
},
{
"epoch": 0.6,
"learning_rate": 0.0008231717385920569,
"loss": 0.3472,
"step": 658
},
{
"epoch": 0.6,
"learning_rate": 0.000822611039228932,
"loss": 0.355,
"step": 659
},
{
"epoch": 0.6,
"learning_rate": 0.0008220496439490046,
"loss": 0.3652,
"step": 660
},
{
"epoch": 0.6,
"learning_rate": 0.0008214875539632824,
"loss": 0.3445,
"step": 661
},
{
"epoch": 0.6,
"learning_rate": 0.0008209247704842715,
"loss": 0.355,
"step": 662
},
{
"epoch": 0.6,
"learning_rate": 0.000820361294725974,
"loss": 0.3352,
"step": 663
},
{
"epoch": 0.6,
"learning_rate": 0.0008197971279038854,
"loss": 0.3582,
"step": 664
},
{
"epoch": 0.6,
"learning_rate": 0.0008192322712349917,
"loss": 0.3296,
"step": 665
},
{
"epoch": 0.6,
"learning_rate": 0.0008186667259377675,
"loss": 0.3516,
"step": 666
},
{
"epoch": 0.6,
"learning_rate": 0.0008181004932321721,
"loss": 0.325,
"step": 667
},
{
"epoch": 0.61,
"learning_rate": 0.0008175335743396483,
"loss": 0.3253,
"step": 668
},
{
"epoch": 0.61,
"learning_rate": 0.0008169659704831187,
"loss": 0.3616,
"step": 669
},
{
"epoch": 0.61,
"learning_rate": 0.0008163976828869839,
"loss": 0.3513,
"step": 670
},
{
"epoch": 0.61,
"learning_rate": 0.0008158287127771191,
"loss": 0.3341,
"step": 671
},
{
"epoch": 0.61,
"learning_rate": 0.0008152590613808714,
"loss": 0.3416,
"step": 672
},
{
"epoch": 0.61,
"learning_rate": 0.0008146887299270584,
"loss": 0.3413,
"step": 673
},
{
"epoch": 0.61,
"learning_rate": 0.0008141177196459643,
"loss": 0.3303,
"step": 674
},
{
"epoch": 0.61,
"learning_rate": 0.000813546031769337,
"loss": 0.3579,
"step": 675
},
{
"epoch": 0.61,
"learning_rate": 0.0008129736675303871,
"loss": 0.3386,
"step": 676
},
{
"epoch": 0.61,
"learning_rate": 0.0008124006281637838,
"loss": 0.313,
"step": 677
},
{
"epoch": 0.61,
"learning_rate": 0.0008118269149056521,
"loss": 0.3242,
"step": 678
},
{
"epoch": 0.62,
"learning_rate": 0.0008112525289935715,
"loss": 0.3687,
"step": 679
},
{
"epoch": 0.62,
"learning_rate": 0.0008106774716665721,
"loss": 0.3145,
"step": 680
},
{
"epoch": 0.62,
"learning_rate": 0.0008101017441651324,
"loss": 0.3401,
"step": 681
},
{
"epoch": 0.62,
"learning_rate": 0.0008095253477311765,
"loss": 0.3401,
"step": 682
},
{
"epoch": 0.62,
"learning_rate": 0.0008089482836080717,
"loss": 0.3254,
"step": 683
},
{
"epoch": 0.62,
"learning_rate": 0.0008083705530406251,
"loss": 0.3774,
"step": 684
},
{
"epoch": 0.62,
"learning_rate": 0.000807792157275082,
"loss": 0.3518,
"step": 685
},
{
"epoch": 0.62,
"learning_rate": 0.0008072130975591223,
"loss": 0.3279,
"step": 686
},
{
"epoch": 0.62,
"learning_rate": 0.0008066333751418582,
"loss": 0.3347,
"step": 687
},
{
"epoch": 0.62,
"learning_rate": 0.0008060529912738315,
"loss": 0.374,
"step": 688
},
{
"epoch": 0.62,
"learning_rate": 0.0008054719472070106,
"loss": 0.3438,
"step": 689
},
{
"epoch": 0.63,
"learning_rate": 0.0008048902441947885,
"loss": 0.3359,
"step": 690
},
{
"epoch": 0.63,
"learning_rate": 0.0008043078834919792,
"loss": 0.354,
"step": 691
},
{
"epoch": 0.63,
"learning_rate": 0.0008037248663548156,
"loss": 0.3418,
"step": 692
},
{
"epoch": 0.63,
"learning_rate": 0.0008031411940409466,
"loss": 0.3521,
"step": 693
},
{
"epoch": 0.63,
"learning_rate": 0.0008025568678094345,
"loss": 0.3364,
"step": 694
},
{
"epoch": 0.63,
"learning_rate": 0.0008019718889207521,
"loss": 0.3384,
"step": 695
},
{
"epoch": 0.63,
"learning_rate": 0.0008013862586367798,
"loss": 0.3403,
"step": 696
},
{
"epoch": 0.63,
"learning_rate": 0.0008007999782208039,
"loss": 0.3345,
"step": 697
},
{
"epoch": 0.63,
"learning_rate": 0.0008002130489375123,
"loss": 0.332,
"step": 698
},
{
"epoch": 0.63,
"learning_rate": 0.0007996254720529929,
"loss": 0.3464,
"step": 699
},
{
"epoch": 0.63,
"learning_rate": 0.0007990372488347311,
"loss": 0.3586,
"step": 700
},
{
"epoch": 0.64,
"learning_rate": 0.0007984483805516055,
"loss": 0.3362,
"step": 701
},
{
"epoch": 0.64,
"learning_rate": 0.0007978588684738871,
"loss": 0.3328,
"step": 702
},
{
"epoch": 0.64,
"learning_rate": 0.0007972687138732351,
"loss": 0.333,
"step": 703
},
{
"epoch": 0.64,
"learning_rate": 0.0007966779180226951,
"loss": 0.3394,
"step": 704
},
{
"epoch": 0.64,
"learning_rate": 0.0007960864821966954,
"loss": 0.3547,
"step": 705
},
{
"epoch": 0.64,
"learning_rate": 0.0007954944076710458,
"loss": 0.3547,
"step": 706
},
{
"epoch": 0.64,
"learning_rate": 0.0007949016957229327,
"loss": 0.3303,
"step": 707
},
{
"epoch": 0.64,
"learning_rate": 0.0007943083476309183,
"loss": 0.3523,
"step": 708
},
{
"epoch": 0.64,
"learning_rate": 0.0007937143646749368,
"loss": 0.3452,
"step": 709
},
{
"epoch": 0.64,
"learning_rate": 0.0007931197481362917,
"loss": 0.3594,
"step": 710
},
{
"epoch": 0.64,
"learning_rate": 0.0007925244992976537,
"loss": 0.3367,
"step": 711
},
{
"epoch": 0.65,
"learning_rate": 0.0007919286194430573,
"loss": 0.3394,
"step": 712
},
{
"epoch": 0.65,
"learning_rate": 0.0007913321098578978,
"loss": 0.3508,
"step": 713
},
{
"epoch": 0.65,
"learning_rate": 0.0007907349718289291,
"loss": 0.3406,
"step": 714
},
{
"epoch": 0.65,
"learning_rate": 0.0007901372066442614,
"loss": 0.3511,
"step": 715
},
{
"epoch": 0.65,
"learning_rate": 0.0007895388155933564,
"loss": 0.3501,
"step": 716
},
{
"epoch": 0.65,
"learning_rate": 0.0007889397999670271,
"loss": 0.3479,
"step": 717
},
{
"epoch": 0.65,
"learning_rate": 0.0007883401610574337,
"loss": 0.354,
"step": 718
},
{
"epoch": 0.65,
"learning_rate": 0.00078773990015808,
"loss": 0.3276,
"step": 719
},
{
"epoch": 0.65,
"learning_rate": 0.0007871390185638123,
"loss": 0.3362,
"step": 720
},
{
"epoch": 0.65,
"learning_rate": 0.0007865375175708157,
"loss": 0.3403,
"step": 721
},
{
"epoch": 0.65,
"learning_rate": 0.0007859353984766114,
"loss": 0.3357,
"step": 722
},
{
"epoch": 0.66,
"learning_rate": 0.0007853326625800536,
"loss": 0.344,
"step": 723
},
{
"epoch": 0.66,
"learning_rate": 0.0007847293111813276,
"loss": 0.3359,
"step": 724
},
{
"epoch": 0.66,
"learning_rate": 0.0007841253455819462,
"loss": 0.3425,
"step": 725
},
{
"epoch": 0.66,
"learning_rate": 0.0007835207670847466,
"loss": 0.3464,
"step": 726
},
{
"epoch": 0.66,
"learning_rate": 0.0007829155769938889,
"loss": 0.3358,
"step": 727
},
{
"epoch": 0.66,
"learning_rate": 0.0007823097766148522,
"loss": 0.3408,
"step": 728
},
{
"epoch": 0.66,
"learning_rate": 0.0007817033672544321,
"loss": 0.3462,
"step": 729
},
{
"epoch": 0.66,
"learning_rate": 0.0007810963502207373,
"loss": 0.3347,
"step": 730
},
{
"epoch": 0.66,
"learning_rate": 0.0007804887268231883,
"loss": 0.3252,
"step": 731
},
{
"epoch": 0.66,
"learning_rate": 0.000779880498372513,
"loss": 0.3599,
"step": 732
},
{
"epoch": 0.66,
"learning_rate": 0.0007792716661807442,
"loss": 0.3467,
"step": 733
},
{
"epoch": 0.67,
"learning_rate": 0.0007786622315612182,
"loss": 0.3369,
"step": 734
},
{
"epoch": 0.67,
"learning_rate": 0.0007780521958285697,
"loss": 0.3391,
"step": 735
},
{
"epoch": 0.67,
"learning_rate": 0.0007774415602987304,
"loss": 0.3279,
"step": 736
},
{
"epoch": 0.67,
"learning_rate": 0.0007768303262889261,
"loss": 0.3074,
"step": 737
},
{
"epoch": 0.67,
"learning_rate": 0.0007762184951176733,
"loss": 0.3337,
"step": 738
},
{
"epoch": 0.67,
"learning_rate": 0.0007756060681047768,
"loss": 0.332,
"step": 739
},
{
"epoch": 0.67,
"learning_rate": 0.0007749930465713266,
"loss": 0.3411,
"step": 740
},
{
"epoch": 0.67,
"learning_rate": 0.0007743794318396955,
"loss": 0.3511,
"step": 741
},
{
"epoch": 0.67,
"learning_rate": 0.0007737652252335355,
"loss": 0.3259,
"step": 742
},
{
"epoch": 0.67,
"learning_rate": 0.0007731504280777757,
"loss": 0.3472,
"step": 743
},
{
"epoch": 0.67,
"learning_rate": 0.0007725350416986187,
"loss": 0.3528,
"step": 744
},
{
"epoch": 0.68,
"learning_rate": 0.0007719190674235384,
"loss": 0.3376,
"step": 745
},
{
"epoch": 0.68,
"learning_rate": 0.0007713025065812769,
"loss": 0.3462,
"step": 746
},
{
"epoch": 0.68,
"learning_rate": 0.0007706853605018416,
"loss": 0.3354,
"step": 747
},
{
"epoch": 0.68,
"learning_rate": 0.000770067630516502,
"loss": 0.3389,
"step": 748
},
{
"epoch": 0.68,
"learning_rate": 0.0007694493179577879,
"loss": 0.3425,
"step": 749
},
{
"epoch": 0.68,
"learning_rate": 0.0007688304241594847,
"loss": 0.3293,
"step": 750
},
{
"epoch": 0.68,
"learning_rate": 0.0007682109504566331,
"loss": 0.3403,
"step": 751
},
{
"epoch": 0.68,
"learning_rate": 0.000767590898185523,
"loss": 0.3325,
"step": 752
},
{
"epoch": 0.68,
"learning_rate": 0.0007669702686836936,
"loss": 0.3173,
"step": 753
},
{
"epoch": 0.68,
"learning_rate": 0.0007663490632899292,
"loss": 0.3391,
"step": 754
},
{
"epoch": 0.68,
"learning_rate": 0.0007657272833442559,
"loss": 0.3376,
"step": 755
},
{
"epoch": 0.69,
"learning_rate": 0.0007651049301879391,
"loss": 0.333,
"step": 756
},
{
"epoch": 0.69,
"learning_rate": 0.0007644820051634812,
"loss": 0.3303,
"step": 757
},
{
"epoch": 0.69,
"learning_rate": 0.000763858509614618,
"loss": 0.3186,
"step": 758
},
{
"epoch": 0.69,
"learning_rate": 0.0007632344448863157,
"loss": 0.3115,
"step": 759
},
{
"epoch": 0.69,
"learning_rate": 0.000762609812324769,
"loss": 0.3164,
"step": 760
},
{
"epoch": 0.69,
"learning_rate": 0.0007619846132773968,
"loss": 0.3335,
"step": 761
},
{
"epoch": 0.69,
"learning_rate": 0.00076135884909284,
"loss": 0.3735,
"step": 762
},
{
"epoch": 0.69,
"learning_rate": 0.0007607325211209593,
"loss": 0.3181,
"step": 763
},
{
"epoch": 0.69,
"learning_rate": 0.0007601056307128308,
"loss": 0.3542,
"step": 764
},
{
"epoch": 0.69,
"learning_rate": 0.0007594781792207441,
"loss": 0.3694,
"step": 765
},
{
"epoch": 0.69,
"learning_rate": 0.0007588501679981996,
"loss": 0.332,
"step": 766
},
{
"epoch": 0.7,
"learning_rate": 0.0007582215983999043,
"loss": 0.3223,
"step": 767
},
{
"epoch": 0.7,
"learning_rate": 0.00075759247178177,
"loss": 0.3298,
"step": 768
},
{
"epoch": 0.7,
"learning_rate": 0.0007569627895009103,
"loss": 0.3403,
"step": 769
},
{
"epoch": 0.7,
"learning_rate": 0.0007563325529156377,
"loss": 0.3374,
"step": 770
},
{
"epoch": 0.7,
"learning_rate": 0.0007557017633854595,
"loss": 0.3203,
"step": 771
},
{
"epoch": 0.7,
"learning_rate": 0.0007550704222710764,
"loss": 0.3298,
"step": 772
},
{
"epoch": 0.7,
"learning_rate": 0.0007544385309343792,
"loss": 0.3232,
"step": 773
},
{
"epoch": 0.7,
"learning_rate": 0.0007538060907384447,
"loss": 0.3264,
"step": 774
},
{
"epoch": 0.7,
"learning_rate": 0.0007531731030475345,
"loss": 0.3323,
"step": 775
},
{
"epoch": 0.7,
"learning_rate": 0.000752539569227091,
"loss": 0.3069,
"step": 776
},
{
"epoch": 0.7,
"learning_rate": 0.0007519054906437344,
"loss": 0.304,
"step": 777
},
{
"epoch": 0.71,
"learning_rate": 0.0007512708686652603,
"loss": 0.3347,
"step": 778
},
{
"epoch": 0.71,
"learning_rate": 0.0007506357046606367,
"loss": 0.3188,
"step": 779
},
{
"epoch": 0.71,
"learning_rate": 0.00075,
"loss": 0.3479,
"step": 780
},
{
"epoch": 0.71,
"learning_rate": 0.0007493637560546538,
"loss": 0.3071,
"step": 781
},
{
"epoch": 0.71,
"learning_rate": 0.0007487269741970647,
"loss": 0.3303,
"step": 782
},
{
"epoch": 0.71,
"learning_rate": 0.0007480896558008593,
"loss": 0.3103,
"step": 783
},
{
"epoch": 0.71,
"learning_rate": 0.000747451802240822,
"loss": 0.3325,
"step": 784
},
{
"epoch": 0.71,
"learning_rate": 0.0007468134148928916,
"loss": 0.3459,
"step": 785
},
{
"epoch": 0.71,
"learning_rate": 0.0007461744951341579,
"loss": 0.333,
"step": 786
},
{
"epoch": 0.71,
"learning_rate": 0.0007455350443428598,
"loss": 0.3406,
"step": 787
},
{
"epoch": 0.71,
"learning_rate": 0.0007448950638983815,
"loss": 0.3018,
"step": 788
},
{
"epoch": 0.72,
"learning_rate": 0.0007442545551812494,
"loss": 0.3264,
"step": 789
},
{
"epoch": 0.72,
"learning_rate": 0.0007436135195731299,
"loss": 0.3376,
"step": 790
},
{
"epoch": 0.72,
"learning_rate": 0.0007429719584568259,
"loss": 0.3201,
"step": 791
},
{
"epoch": 0.72,
"learning_rate": 0.0007423298732162736,
"loss": 0.3154,
"step": 792
},
{
"epoch": 0.72,
"learning_rate": 0.0007416872652365401,
"loss": 0.3301,
"step": 793
},
{
"epoch": 0.72,
"learning_rate": 0.0007410441359038202,
"loss": 0.3396,
"step": 794
},
{
"epoch": 0.72,
"learning_rate": 0.0007404004866054327,
"loss": 0.3398,
"step": 795
},
{
"epoch": 0.72,
"learning_rate": 0.0007397563187298188,
"loss": 0.324,
"step": 796
},
{
"epoch": 0.72,
"learning_rate": 0.0007391116336665384,
"loss": 0.3286,
"step": 797
},
{
"epoch": 0.72,
"learning_rate": 0.000738466432806266,
"loss": 0.3121,
"step": 798
},
{
"epoch": 0.72,
"learning_rate": 0.0007378207175407898,
"loss": 0.3474,
"step": 799
},
{
"epoch": 0.73,
"learning_rate": 0.0007371744892630073,
"loss": 0.3354,
"step": 800
},
{
"epoch": 0.73,
"learning_rate": 0.0007365277493669224,
"loss": 0.323,
"step": 801
},
{
"epoch": 0.73,
"learning_rate": 0.0007358804992476432,
"loss": 0.3208,
"step": 802
},
{
"epoch": 0.73,
"learning_rate": 0.0007352327403013779,
"loss": 0.3376,
"step": 803
},
{
"epoch": 0.73,
"learning_rate": 0.0007345844739254324,
"loss": 0.3557,
"step": 804
},
{
"epoch": 0.73,
"learning_rate": 0.0007339357015182071,
"loss": 0.3281,
"step": 805
},
{
"epoch": 0.73,
"learning_rate": 0.0007332864244791946,
"loss": 0.3477,
"step": 806
},
{
"epoch": 0.73,
"learning_rate": 0.0007326366442089754,
"loss": 0.3159,
"step": 807
},
{
"epoch": 0.73,
"learning_rate": 0.0007319863621092156,
"loss": 0.3408,
"step": 808
},
{
"epoch": 0.73,
"learning_rate": 0.0007313355795826643,
"loss": 0.3223,
"step": 809
},
{
"epoch": 0.73,
"learning_rate": 0.0007306842980331496,
"loss": 0.3447,
"step": 810
},
{
"epoch": 0.74,
"learning_rate": 0.0007300325188655761,
"loss": 0.3508,
"step": 811
},
{
"epoch": 0.74,
"learning_rate": 0.0007293802434859221,
"loss": 0.313,
"step": 812
},
{
"epoch": 0.74,
"learning_rate": 0.0007287274733012362,
"loss": 0.3337,
"step": 813
},
{
"epoch": 0.74,
"learning_rate": 0.0007280742097196342,
"loss": 0.3105,
"step": 814
},
{
"epoch": 0.74,
"learning_rate": 0.0007274204541502964,
"loss": 0.344,
"step": 815
},
{
"epoch": 0.74,
"learning_rate": 0.0007267662080034645,
"loss": 0.3164,
"step": 816
},
{
"epoch": 0.74,
"learning_rate": 0.0007261114726904379,
"loss": 0.3223,
"step": 817
},
{
"epoch": 0.74,
"learning_rate": 0.0007254562496235717,
"loss": 0.3311,
"step": 818
},
{
"epoch": 0.74,
"learning_rate": 0.0007248005402162731,
"loss": 0.3171,
"step": 819
},
{
"epoch": 0.74,
"learning_rate": 0.0007241443458829985,
"loss": 0.3353,
"step": 820
},
{
"epoch": 0.74,
"learning_rate": 0.0007234876680392498,
"loss": 0.3069,
"step": 821
},
{
"epoch": 0.75,
"learning_rate": 0.0007228305081015728,
"loss": 0.353,
"step": 822
},
{
"epoch": 0.75,
"learning_rate": 0.0007221728674875522,
"loss": 0.2959,
"step": 823
},
{
"epoch": 0.75,
"learning_rate": 0.0007215147476158102,
"loss": 0.3022,
"step": 824
},
{
"epoch": 0.75,
"learning_rate": 0.0007208561499060032,
"loss": 0.3191,
"step": 825
},
{
"epoch": 0.75,
"learning_rate": 0.0007201970757788173,
"loss": 0.3301,
"step": 826
},
{
"epoch": 0.75,
"learning_rate": 0.0007195375266559669,
"loss": 0.3252,
"step": 827
},
{
"epoch": 0.75,
"learning_rate": 0.0007188775039601918,
"loss": 0.3191,
"step": 828
},
{
"epoch": 0.75,
"learning_rate": 0.0007182170091152518,
"loss": 0.3186,
"step": 829
},
{
"epoch": 0.75,
"learning_rate": 0.0007175560435459262,
"loss": 0.3069,
"step": 830
},
{
"epoch": 0.75,
"learning_rate": 0.0007168946086780098,
"loss": 0.3411,
"step": 831
},
{
"epoch": 0.75,
"learning_rate": 0.0007162327059383089,
"loss": 0.3354,
"step": 832
},
{
"epoch": 0.76,
"learning_rate": 0.0007155703367546401,
"loss": 0.3301,
"step": 833
},
{
"epoch": 0.76,
"learning_rate": 0.0007149075025558255,
"loss": 0.2988,
"step": 834
},
{
"epoch": 0.76,
"learning_rate": 0.0007142442047716905,
"loss": 0.3157,
"step": 835
},
{
"epoch": 0.76,
"learning_rate": 0.0007135804448330604,
"loss": 0.3408,
"step": 836
},
{
"epoch": 0.76,
"learning_rate": 0.0007129162241717577,
"loss": 0.3276,
"step": 837
},
{
"epoch": 0.76,
"learning_rate": 0.0007122515442205985,
"loss": 0.3142,
"step": 838
},
{
"epoch": 0.76,
"learning_rate": 0.0007115864064133897,
"loss": 0.3167,
"step": 839
},
{
"epoch": 0.76,
"learning_rate": 0.0007109208121849261,
"loss": 0.3237,
"step": 840
},
{
"epoch": 0.76,
"learning_rate": 0.0007102547629709866,
"loss": 0.3203,
"step": 841
},
{
"epoch": 0.76,
"learning_rate": 0.0007095882602083321,
"loss": 0.3127,
"step": 842
},
{
"epoch": 0.76,
"learning_rate": 0.0007089213053347015,
"loss": 0.321,
"step": 843
},
{
"epoch": 0.77,
"learning_rate": 0.0007082538997888087,
"loss": 0.3359,
"step": 844
},
{
"epoch": 0.77,
"learning_rate": 0.0007075860450103407,
"loss": 0.3298,
"step": 845
},
{
"epoch": 0.77,
"learning_rate": 0.0007069177424399526,
"loss": 0.3313,
"step": 846
},
{
"epoch": 0.77,
"learning_rate": 0.000706248993519266,
"loss": 0.327,
"step": 847
},
{
"epoch": 0.77,
"learning_rate": 0.0007055797996908651,
"loss": 0.3134,
"step": 848
},
{
"epoch": 0.77,
"learning_rate": 0.0007049101623982937,
"loss": 0.335,
"step": 849
},
{
"epoch": 0.77,
"learning_rate": 0.0007042400830860529,
"loss": 0.3066,
"step": 850
},
{
"epoch": 0.77,
"learning_rate": 0.0007035695631995965,
"loss": 0.3367,
"step": 851
},
{
"epoch": 0.77,
"learning_rate": 0.0007028986041853289,
"loss": 0.3232,
"step": 852
},
{
"epoch": 0.77,
"learning_rate": 0.0007022272074906021,
"loss": 0.2981,
"step": 853
},
{
"epoch": 0.77,
"learning_rate": 0.0007015553745637117,
"loss": 0.3296,
"step": 854
},
{
"epoch": 0.78,
"learning_rate": 0.0007008831068538948,
"loss": 0.3408,
"step": 855
},
{
"epoch": 0.78,
"learning_rate": 0.0007002104058113263,
"loss": 0.335,
"step": 856
},
{
"epoch": 0.78,
"learning_rate": 0.0006995372728871154,
"loss": 0.3206,
"step": 857
},
{
"epoch": 0.78,
"learning_rate": 0.0006988637095333036,
"loss": 0.3259,
"step": 858
},
{
"epoch": 0.78,
"learning_rate": 0.0006981897172028604,
"loss": 0.3145,
"step": 859
},
{
"epoch": 0.78,
"learning_rate": 0.0006975152973496808,
"loss": 0.3167,
"step": 860
},
{
"epoch": 0.78,
"learning_rate": 0.0006968404514285821,
"loss": 0.3303,
"step": 861
},
{
"epoch": 0.78,
"learning_rate": 0.0006961651808953008,
"loss": 0.3342,
"step": 862
},
{
"epoch": 0.78,
"learning_rate": 0.0006954894872064889,
"loss": 0.3232,
"step": 863
},
{
"epoch": 0.78,
"learning_rate": 0.0006948133718197118,
"loss": 0.322,
"step": 864
},
{
"epoch": 0.78,
"learning_rate": 0.0006941368361934442,
"loss": 0.321,
"step": 865
},
{
"epoch": 0.79,
"learning_rate": 0.000693459881787067,
"loss": 0.3179,
"step": 866
},
{
"epoch": 0.79,
"learning_rate": 0.0006927825100608654,
"loss": 0.334,
"step": 867
},
{
"epoch": 0.79,
"learning_rate": 0.0006921047224760238,
"loss": 0.2998,
"step": 868
},
{
"epoch": 0.79,
"learning_rate": 0.0006914265204946244,
"loss": 0.3284,
"step": 869
},
{
"epoch": 0.79,
"learning_rate": 0.0006907479055796431,
"loss": 0.2926,
"step": 870
},
{
"epoch": 0.79,
"learning_rate": 0.0006900688791949463,
"loss": 0.3105,
"step": 871
},
{
"epoch": 0.79,
"learning_rate": 0.000689389442805288,
"loss": 0.2994,
"step": 872
},
{
"epoch": 0.79,
"learning_rate": 0.0006887095978763072,
"loss": 0.3174,
"step": 873
},
{
"epoch": 0.79,
"learning_rate": 0.0006880293458745237,
"loss": 0.3188,
"step": 874
},
{
"epoch": 0.79,
"learning_rate": 0.0006873486882673354,
"loss": 0.3174,
"step": 875
},
{
"epoch": 0.79,
"learning_rate": 0.0006866676265230154,
"loss": 0.302,
"step": 876
},
{
"epoch": 0.8,
"learning_rate": 0.0006859861621107084,
"loss": 0.3145,
"step": 877
},
{
"epoch": 0.8,
"learning_rate": 0.0006853042965004277,
"loss": 0.3245,
"step": 878
},
{
"epoch": 0.8,
"learning_rate": 0.000684622031163052,
"loss": 0.3081,
"step": 879
},
{
"epoch": 0.8,
"learning_rate": 0.0006839393675703225,
"loss": 0.3274,
"step": 880
},
{
"epoch": 0.8,
"learning_rate": 0.0006832563071948394,
"loss": 0.3198,
"step": 881
},
{
"epoch": 0.8,
"learning_rate": 0.0006825728515100586,
"loss": 0.3279,
"step": 882
},
{
"epoch": 0.8,
"learning_rate": 0.000681889001990289,
"loss": 0.3208,
"step": 883
},
{
"epoch": 0.8,
"learning_rate": 0.0006812047601106889,
"loss": 0.321,
"step": 884
},
{
"epoch": 0.8,
"learning_rate": 0.0006805201273472632,
"loss": 0.3323,
"step": 885
},
{
"epoch": 0.8,
"learning_rate": 0.0006798351051768597,
"loss": 0.3118,
"step": 886
},
{
"epoch": 0.8,
"learning_rate": 0.0006791496950771662,
"loss": 0.314,
"step": 887
},
{
"epoch": 0.81,
"learning_rate": 0.0006784638985267076,
"loss": 0.3154,
"step": 888
},
{
"epoch": 0.81,
"learning_rate": 0.0006777777170048423,
"loss": 0.3162,
"step": 889
},
{
"epoch": 0.81,
"learning_rate": 0.0006770911519917591,
"loss": 0.3108,
"step": 890
},
{
"epoch": 0.81,
"learning_rate": 0.0006764042049684739,
"loss": 0.3186,
"step": 891
},
{
"epoch": 0.81,
"learning_rate": 0.0006757168774168269,
"loss": 0.312,
"step": 892
},
{
"epoch": 0.81,
"learning_rate": 0.0006750291708194793,
"loss": 0.3142,
"step": 893
},
{
"epoch": 0.81,
"learning_rate": 0.0006743410866599094,
"loss": 0.3064,
"step": 894
},
{
"epoch": 0.81,
"learning_rate": 0.0006736526264224101,
"loss": 0.324,
"step": 895
},
{
"epoch": 0.81,
"learning_rate": 0.0006729637915920863,
"loss": 0.2933,
"step": 896
},
{
"epoch": 0.81,
"learning_rate": 0.0006722745836548499,
"loss": 0.3137,
"step": 897
},
{
"epoch": 0.81,
"learning_rate": 0.0006715850040974181,
"loss": 0.3301,
"step": 898
},
{
"epoch": 0.82,
"learning_rate": 0.0006708950544073101,
"loss": 0.3142,
"step": 899
},
{
"epoch": 0.82,
"learning_rate": 0.0006702047360728429,
"loss": 0.3513,
"step": 900
},
{
"epoch": 0.82,
"learning_rate": 0.000669514050583129,
"loss": 0.3167,
"step": 901
},
{
"epoch": 0.82,
"learning_rate": 0.000668822999428073,
"loss": 0.304,
"step": 902
},
{
"epoch": 0.82,
"learning_rate": 0.000668131584098368,
"loss": 0.303,
"step": 903
},
{
"epoch": 0.82,
"learning_rate": 0.0006674398060854931,
"loss": 0.3118,
"step": 904
},
{
"epoch": 0.82,
"learning_rate": 0.0006667476668817094,
"loss": 0.321,
"step": 905
},
{
"epoch": 0.82,
"learning_rate": 0.0006660551679800573,
"loss": 0.3188,
"step": 906
},
{
"epoch": 0.82,
"learning_rate": 0.0006653623108743529,
"loss": 0.3218,
"step": 907
},
{
"epoch": 0.82,
"learning_rate": 0.0006646690970591856,
"loss": 0.3008,
"step": 908
},
{
"epoch": 0.82,
"learning_rate": 0.0006639755280299133,
"loss": 0.3318,
"step": 909
},
{
"epoch": 0.83,
"learning_rate": 0.0006632816052826611,
"loss": 0.3059,
"step": 910
},
{
"epoch": 0.83,
"learning_rate": 0.0006625873303143166,
"loss": 0.304,
"step": 911
},
{
"epoch": 0.83,
"learning_rate": 0.0006618927046225272,
"loss": 0.3184,
"step": 912
},
{
"epoch": 0.83,
"learning_rate": 0.0006611977297056968,
"loss": 0.323,
"step": 913
},
{
"epoch": 0.83,
"learning_rate": 0.0006605024070629832,
"loss": 0.2986,
"step": 914
},
{
"epoch": 0.83,
"learning_rate": 0.0006598067381942938,
"loss": 0.3347,
"step": 915
},
{
"epoch": 0.83,
"learning_rate": 0.0006591107246002825,
"loss": 0.3076,
"step": 916
},
{
"epoch": 0.83,
"learning_rate": 0.0006584143677823477,
"loss": 0.3165,
"step": 917
},
{
"epoch": 0.83,
"learning_rate": 0.0006577176692426278,
"loss": 0.3142,
"step": 918
},
{
"epoch": 0.83,
"learning_rate": 0.0006570206304839978,
"loss": 0.3164,
"step": 919
},
{
"epoch": 0.83,
"learning_rate": 0.0006563232530100676,
"loss": 0.3313,
"step": 920
},
{
"epoch": 0.83,
"learning_rate": 0.0006556255383251769,
"loss": 0.3145,
"step": 921
},
{
"epoch": 0.84,
"learning_rate": 0.0006549274879343931,
"loss": 0.3262,
"step": 922
},
{
"epoch": 0.84,
"learning_rate": 0.0006542291033435082,
"loss": 0.3088,
"step": 923
},
{
"epoch": 0.84,
"learning_rate": 0.0006535303860590345,
"loss": 0.3237,
"step": 924
},
{
"epoch": 0.84,
"learning_rate": 0.0006528313375882021,
"loss": 0.3169,
"step": 925
},
{
"epoch": 0.84,
"learning_rate": 0.0006521319594389554,
"loss": 0.3367,
"step": 926
},
{
"epoch": 0.84,
"learning_rate": 0.0006514322531199509,
"loss": 0.3271,
"step": 927
},
{
"epoch": 0.84,
"learning_rate": 0.0006507322201405514,
"loss": 0.3184,
"step": 928
},
{
"epoch": 0.84,
"learning_rate": 0.0006500318620108259,
"loss": 0.3008,
"step": 929
},
{
"epoch": 0.84,
"learning_rate": 0.0006493311802415438,
"loss": 0.3154,
"step": 930
},
{
"epoch": 0.84,
"learning_rate": 0.0006486301763441731,
"loss": 0.3196,
"step": 931
},
{
"epoch": 0.84,
"learning_rate": 0.0006479288518308763,
"loss": 0.3059,
"step": 932
},
{
"epoch": 0.85,
"learning_rate": 0.0006472272082145081,
"loss": 0.3315,
"step": 933
},
{
"epoch": 0.85,
"learning_rate": 0.0006465252470086108,
"loss": 0.3408,
"step": 934
},
{
"epoch": 0.85,
"learning_rate": 0.0006458229697274125,
"loss": 0.288,
"step": 935
},
{
"epoch": 0.85,
"learning_rate": 0.0006451203778858228,
"loss": 0.3035,
"step": 936
},
{
"epoch": 0.85,
"learning_rate": 0.0006444174729994294,
"loss": 0.3156,
"step": 937
},
{
"epoch": 0.85,
"learning_rate": 0.0006437142565844963,
"loss": 0.3169,
"step": 938
},
{
"epoch": 0.85,
"learning_rate": 0.0006430107301579587,
"loss": 0.2957,
"step": 939
},
{
"epoch": 0.85,
"learning_rate": 0.0006423068952374207,
"loss": 0.2944,
"step": 940
},
{
"epoch": 0.85,
"learning_rate": 0.000641602753341152,
"loss": 0.3179,
"step": 941
},
{
"epoch": 0.85,
"learning_rate": 0.0006408983059880846,
"loss": 0.335,
"step": 942
},
{
"epoch": 0.85,
"learning_rate": 0.000640193554697809,
"loss": 0.3083,
"step": 943
},
{
"epoch": 0.86,
"learning_rate": 0.0006394885009905716,
"loss": 0.2937,
"step": 944
},
{
"epoch": 0.86,
"learning_rate": 0.0006387831463872714,
"loss": 0.332,
"step": 945
},
{
"epoch": 0.86,
"learning_rate": 0.0006380774924094559,
"loss": 0.3038,
"step": 946
},
{
"epoch": 0.86,
"learning_rate": 0.0006373715405793188,
"loss": 0.3181,
"step": 947
},
{
"epoch": 0.86,
"learning_rate": 0.0006366652924196964,
"loss": 0.3218,
"step": 948
},
{
"epoch": 0.86,
"learning_rate": 0.0006359587494540637,
"loss": 0.3108,
"step": 949
},
{
"epoch": 0.86,
"learning_rate": 0.0006352519132065321,
"loss": 0.3064,
"step": 950
},
{
"epoch": 0.86,
"learning_rate": 0.0006345447852018457,
"loss": 0.2981,
"step": 951
},
{
"epoch": 0.86,
"learning_rate": 0.0006338373669653776,
"loss": 0.314,
"step": 952
},
{
"epoch": 0.86,
"learning_rate": 0.0006331296600231272,
"loss": 0.3081,
"step": 953
},
{
"epoch": 0.86,
"learning_rate": 0.0006324216659017168,
"loss": 0.3254,
"step": 954
},
{
"epoch": 0.87,
"learning_rate": 0.0006317133861283876,
"loss": 0.3186,
"step": 955
},
{
"epoch": 0.87,
"learning_rate": 0.0006310048222309976,
"loss": 0.3201,
"step": 956
},
{
"epoch": 0.87,
"learning_rate": 0.0006302959757380177,
"loss": 0.3027,
"step": 957
},
{
"epoch": 0.87,
"learning_rate": 0.0006295868481785281,
"loss": 0.3086,
"step": 958
},
{
"epoch": 0.87,
"learning_rate": 0.0006288774410822153,
"loss": 0.3337,
"step": 959
},
{
"epoch": 0.87,
"learning_rate": 0.000628167755979369,
"loss": 0.3115,
"step": 960
},
{
"epoch": 0.87,
"learning_rate": 0.0006274577944008785,
"loss": 0.2803,
"step": 961
},
{
"epoch": 0.87,
"learning_rate": 0.0006267475578782293,
"loss": 0.2939,
"step": 962
},
{
"epoch": 0.87,
"learning_rate": 0.0006260370479435005,
"loss": 0.3047,
"step": 963
},
{
"epoch": 0.87,
"learning_rate": 0.0006253262661293602,
"loss": 0.3215,
"step": 964
},
{
"epoch": 0.87,
"learning_rate": 0.0006246152139690641,
"loss": 0.3137,
"step": 965
},
{
"epoch": 0.88,
"learning_rate": 0.00062390389299645,
"loss": 0.3027,
"step": 966
},
{
"epoch": 0.88,
"learning_rate": 0.0006231923047459361,
"loss": 0.2981,
"step": 967
},
{
"epoch": 0.88,
"learning_rate": 0.000622480450752517,
"loss": 0.3303,
"step": 968
},
{
"epoch": 0.88,
"learning_rate": 0.0006217683325517608,
"loss": 0.3269,
"step": 969
},
{
"epoch": 0.88,
"learning_rate": 0.000621055951679805,
"loss": 0.3049,
"step": 970
},
{
"epoch": 0.88,
"learning_rate": 0.0006203433096733542,
"loss": 0.3058,
"step": 971
},
{
"epoch": 0.88,
"learning_rate": 0.0006196304080696763,
"loss": 0.2935,
"step": 972
},
{
"epoch": 0.88,
"learning_rate": 0.0006189172484065989,
"loss": 0.301,
"step": 973
},
{
"epoch": 0.88,
"learning_rate": 0.0006182038322225063,
"loss": 0.3052,
"step": 974
},
{
"epoch": 0.88,
"learning_rate": 0.0006174901610563364,
"loss": 0.3262,
"step": 975
},
{
"epoch": 0.88,
"learning_rate": 0.0006167762364475771,
"loss": 0.3149,
"step": 976
},
{
"epoch": 0.89,
"learning_rate": 0.0006160620599362627,
"loss": 0.3218,
"step": 977
},
{
"epoch": 0.89,
"learning_rate": 0.0006153476330629711,
"loss": 0.3101,
"step": 978
},
{
"epoch": 0.89,
"learning_rate": 0.0006146329573688204,
"loss": 0.3254,
"step": 979
},
{
"epoch": 0.89,
"learning_rate": 0.0006139180343954651,
"loss": 0.303,
"step": 980
},
{
"epoch": 0.89,
"learning_rate": 0.0006132028656850935,
"loss": 0.3115,
"step": 981
},
{
"epoch": 0.89,
"learning_rate": 0.0006124874527804237,
"loss": 0.3059,
"step": 982
},
{
"epoch": 0.89,
"learning_rate": 0.0006117717972247005,
"loss": 0.2981,
"step": 983
},
{
"epoch": 0.89,
"learning_rate": 0.0006110559005616928,
"loss": 0.3274,
"step": 984
},
{
"epoch": 0.89,
"learning_rate": 0.0006103397643356887,
"loss": 0.332,
"step": 985
},
{
"epoch": 0.89,
"learning_rate": 0.0006096233900914934,
"loss": 0.3142,
"step": 986
},
{
"epoch": 0.89,
"learning_rate": 0.0006089067793744258,
"loss": 0.3191,
"step": 987
},
{
"epoch": 0.9,
"learning_rate": 0.0006081899337303147,
"loss": 0.3071,
"step": 988
},
{
"epoch": 0.9,
"learning_rate": 0.0006074728547054955,
"loss": 0.3123,
"step": 989
},
{
"epoch": 0.9,
"learning_rate": 0.0006067555438468075,
"loss": 0.3127,
"step": 990
},
{
"epoch": 0.9,
"learning_rate": 0.0006060380027015898,
"loss": 0.3035,
"step": 991
},
{
"epoch": 0.9,
"learning_rate": 0.0006053202328176778,
"loss": 0.2957,
"step": 992
},
{
"epoch": 0.9,
"learning_rate": 0.0006046022357434012,
"loss": 0.3171,
"step": 993
},
{
"epoch": 0.9,
"learning_rate": 0.0006038840130275794,
"loss": 0.2969,
"step": 994
},
{
"epoch": 0.9,
"learning_rate": 0.0006031655662195183,
"loss": 0.2931,
"step": 995
},
{
"epoch": 0.9,
"learning_rate": 0.0006024468968690077,
"loss": 0.3038,
"step": 996
},
{
"epoch": 0.9,
"learning_rate": 0.000601728006526317,
"loss": 0.2947,
"step": 997
},
{
"epoch": 0.9,
"learning_rate": 0.0006010088967421923,
"loss": 0.3174,
"step": 998
},
{
"epoch": 0.91,
"learning_rate": 0.0006002895690678534,
"loss": 0.3022,
"step": 999
},
{
"epoch": 0.91,
"learning_rate": 0.0005995700250549902,
"loss": 0.3142,
"step": 1000
},
{
"epoch": 0.91,
"learning_rate": 0.0005988502662557587,
"loss": 0.2896,
"step": 1001
},
{
"epoch": 0.91,
"learning_rate": 0.0005981302942227787,
"loss": 0.3049,
"step": 1002
},
{
"epoch": 0.91,
"learning_rate": 0.00059741011050913,
"loss": 0.3074,
"step": 1003
},
{
"epoch": 0.91,
"learning_rate": 0.0005966897166683483,
"loss": 0.2947,
"step": 1004
},
{
"epoch": 0.91,
"learning_rate": 0.0005959691142544236,
"loss": 0.3203,
"step": 1005
},
{
"epoch": 0.91,
"learning_rate": 0.0005952483048217956,
"loss": 0.2974,
"step": 1006
},
{
"epoch": 0.91,
"learning_rate": 0.0005945272899253496,
"loss": 0.3035,
"step": 1007
},
{
"epoch": 0.91,
"learning_rate": 0.0005938060711204154,
"loss": 0.3105,
"step": 1008
},
{
"epoch": 0.91,
"learning_rate": 0.0005930846499627619,
"loss": 0.2939,
"step": 1009
},
{
"epoch": 0.92,
"learning_rate": 0.0005923630280085947,
"loss": 0.2971,
"step": 1010
},
{
"epoch": 0.92,
"learning_rate": 0.0005916412068145524,
"loss": 0.3328,
"step": 1011
},
{
"epoch": 0.92,
"learning_rate": 0.000590919187937704,
"loss": 0.3257,
"step": 1012
},
{
"epoch": 0.92,
"learning_rate": 0.000590196972935544,
"loss": 0.3069,
"step": 1013
},
{
"epoch": 0.92,
"learning_rate": 0.0005894745633659906,
"loss": 0.3062,
"step": 1014
},
{
"epoch": 0.92,
"learning_rate": 0.0005887519607873815,
"loss": 0.3191,
"step": 1015
},
{
"epoch": 0.92,
"learning_rate": 0.0005880291667584708,
"loss": 0.2972,
"step": 1016
},
{
"epoch": 0.92,
"learning_rate": 0.0005873061828384255,
"loss": 0.3152,
"step": 1017
},
{
"epoch": 0.92,
"learning_rate": 0.0005865830105868225,
"loss": 0.3215,
"step": 1018
},
{
"epoch": 0.92,
"learning_rate": 0.0005858596515636445,
"loss": 0.3176,
"step": 1019
},
{
"epoch": 0.92,
"learning_rate": 0.0005851361073292776,
"loss": 0.3015,
"step": 1020
},
{
"epoch": 0.93,
"learning_rate": 0.0005844123794445069,
"loss": 0.3032,
"step": 1021
},
{
"epoch": 0.93,
"learning_rate": 0.0005836884694705143,
"loss": 0.3154,
"step": 1022
},
{
"epoch": 0.93,
"learning_rate": 0.0005829643789688737,
"loss": 0.3096,
"step": 1023
},
{
"epoch": 0.93,
"learning_rate": 0.0005822401095015489,
"loss": 0.2983,
"step": 1024
},
{
"epoch": 0.93,
"learning_rate": 0.00058151566263089,
"loss": 0.3201,
"step": 1025
},
{
"epoch": 0.93,
"learning_rate": 0.0005807910399196294,
"loss": 0.3079,
"step": 1026
},
{
"epoch": 0.93,
"learning_rate": 0.0005800662429308786,
"loss": 0.3069,
"step": 1027
},
{
"epoch": 0.93,
"learning_rate": 0.0005793412732281257,
"loss": 0.2944,
"step": 1028
},
{
"epoch": 0.93,
"learning_rate": 0.0005786161323752307,
"loss": 0.3101,
"step": 1029
},
{
"epoch": 0.93,
"learning_rate": 0.0005778908219364233,
"loss": 0.3044,
"step": 1030
},
{
"epoch": 0.93,
"learning_rate": 0.0005771653434762988,
"loss": 0.3044,
"step": 1031
},
{
"epoch": 0.94,
"learning_rate": 0.0005764396985598149,
"loss": 0.2866,
"step": 1032
},
{
"epoch": 0.94,
"learning_rate": 0.0005757138887522884,
"loss": 0.2927,
"step": 1033
},
{
"epoch": 0.94,
"learning_rate": 0.0005749879156193919,
"loss": 0.3113,
"step": 1034
},
{
"epoch": 0.94,
"learning_rate": 0.0005742617807271502,
"loss": 0.2939,
"step": 1035
},
{
"epoch": 0.94,
"learning_rate": 0.0005735354856419371,
"loss": 0.2756,
"step": 1036
},
{
"epoch": 0.94,
"learning_rate": 0.0005728090319304718,
"loss": 0.3049,
"step": 1037
},
{
"epoch": 0.94,
"learning_rate": 0.0005720824211598161,
"loss": 0.3091,
"step": 1038
},
{
"epoch": 0.94,
"learning_rate": 0.0005713556548973701,
"loss": 0.3201,
"step": 1039
},
{
"epoch": 0.94,
"learning_rate": 0.0005706287347108696,
"loss": 0.2925,
"step": 1040
},
{
"epoch": 0.94,
"learning_rate": 0.0005699016621683823,
"loss": 0.3044,
"step": 1041
},
{
"epoch": 0.94,
"learning_rate": 0.0005691744388383047,
"loss": 0.3184,
"step": 1042
},
{
"epoch": 0.95,
"learning_rate": 0.0005684470662893586,
"loss": 0.2959,
"step": 1043
},
{
"epoch": 0.95,
"learning_rate": 0.0005677195460905873,
"loss": 0.2947,
"step": 1044
},
{
"epoch": 0.95,
"learning_rate": 0.000566991879811353,
"loss": 0.3047,
"step": 1045
},
{
"epoch": 0.95,
"learning_rate": 0.0005662640690213329,
"loss": 0.2968,
"step": 1046
},
{
"epoch": 0.95,
"learning_rate": 0.0005655361152905161,
"loss": 0.3086,
"step": 1047
},
{
"epoch": 0.95,
"learning_rate": 0.0005648080201891995,
"loss": 0.3073,
"step": 1048
},
{
"epoch": 0.95,
"learning_rate": 0.0005640797852879856,
"loss": 0.2991,
"step": 1049
},
{
"epoch": 0.95,
"learning_rate": 0.0005633514121577781,
"loss": 0.296,
"step": 1050
},
{
"epoch": 0.95,
"learning_rate": 0.0005626229023697789,
"loss": 0.2931,
"step": 1051
},
{
"epoch": 0.95,
"learning_rate": 0.0005618942574954849,
"loss": 0.3088,
"step": 1052
},
{
"epoch": 0.95,
"learning_rate": 0.0005611654791066841,
"loss": 0.3171,
"step": 1053
},
{
"epoch": 0.96,
"learning_rate": 0.0005604365687754528,
"loss": 0.306,
"step": 1054
},
{
"epoch": 0.96,
"learning_rate": 0.0005597075280741518,
"loss": 0.3062,
"step": 1055
},
{
"epoch": 0.96,
"learning_rate": 0.0005589783585754232,
"loss": 0.3052,
"step": 1056
},
{
"epoch": 0.96,
"learning_rate": 0.0005582490618521864,
"loss": 0.3052,
"step": 1057
},
{
"epoch": 0.96,
"learning_rate": 0.0005575196394776359,
"loss": 0.2819,
"step": 1058
},
{
"epoch": 0.96,
"learning_rate": 0.0005567900930252374,
"loss": 0.2979,
"step": 1059
},
{
"epoch": 0.96,
"learning_rate": 0.0005560604240687234,
"loss": 0.2844,
"step": 1060
},
{
"epoch": 0.96,
"learning_rate": 0.0005553306341820916,
"loss": 0.3057,
"step": 1061
},
{
"epoch": 0.96,
"learning_rate": 0.0005546007249395998,
"loss": 0.2942,
"step": 1062
},
{
"epoch": 0.96,
"learning_rate": 0.0005538706979157635,
"loss": 0.2917,
"step": 1063
},
{
"epoch": 0.96,
"learning_rate": 0.0005531405546853525,
"loss": 0.3262,
"step": 1064
},
{
"epoch": 0.97,
"learning_rate": 0.0005524102968233872,
"loss": 0.3137,
"step": 1065
},
{
"epoch": 0.97,
"learning_rate": 0.0005516799259051349,
"loss": 0.282,
"step": 1066
},
{
"epoch": 0.97,
"learning_rate": 0.0005509494435061073,
"loss": 0.314,
"step": 1067
},
{
"epoch": 0.97,
"learning_rate": 0.0005502188512020566,
"loss": 0.2916,
"step": 1068
},
{
"epoch": 0.97,
"learning_rate": 0.0005494881505689713,
"loss": 0.3003,
"step": 1069
},
{
"epoch": 0.97,
"learning_rate": 0.0005487573431830745,
"loss": 0.2889,
"step": 1070
},
{
"epoch": 0.97,
"learning_rate": 0.000548026430620819,
"loss": 0.3101,
"step": 1071
},
{
"epoch": 0.97,
"learning_rate": 0.0005472954144588847,
"loss": 0.2876,
"step": 1072
},
{
"epoch": 0.97,
"learning_rate": 0.0005465642962741749,
"loss": 0.3101,
"step": 1073
},
{
"epoch": 0.97,
"learning_rate": 0.0005458330776438135,
"loss": 0.3062,
"step": 1074
},
{
"epoch": 0.97,
"learning_rate": 0.0005451017601451399,
"loss": 0.2983,
"step": 1075
},
{
"epoch": 0.98,
"learning_rate": 0.0005443703453557079,
"loss": 0.2874,
"step": 1076
},
{
"epoch": 0.98,
"learning_rate": 0.0005436388348532807,
"loss": 0.3032,
"step": 1077
},
{
"epoch": 0.98,
"learning_rate": 0.0005429072302158279,
"loss": 0.304,
"step": 1078
},
{
"epoch": 0.98,
"learning_rate": 0.0005421755330215223,
"loss": 0.3,
"step": 1079
},
{
"epoch": 0.98,
"learning_rate": 0.0005414437448487368,
"loss": 0.301,
"step": 1080
},
{
"epoch": 0.98,
"learning_rate": 0.0005407118672760393,
"loss": 0.3262,
"step": 1081
},
{
"epoch": 0.98,
"learning_rate": 0.0005399799018821917,
"loss": 0.302,
"step": 1082
},
{
"epoch": 0.98,
"learning_rate": 0.0005392478502461452,
"loss": 0.2927,
"step": 1083
},
{
"epoch": 0.98,
"learning_rate": 0.0005385157139470365,
"loss": 0.3057,
"step": 1084
},
{
"epoch": 0.98,
"learning_rate": 0.0005377834945641853,
"loss": 0.302,
"step": 1085
},
{
"epoch": 0.98,
"learning_rate": 0.0005370511936770906,
"loss": 0.3096,
"step": 1086
},
{
"epoch": 0.99,
"learning_rate": 0.0005363188128654271,
"loss": 0.2936,
"step": 1087
},
{
"epoch": 0.99,
"learning_rate": 0.0005355863537090418,
"loss": 0.3044,
"step": 1088
},
{
"epoch": 0.99,
"learning_rate": 0.0005348538177879506,
"loss": 0.2922,
"step": 1089
},
{
"epoch": 0.99,
"learning_rate": 0.0005341212066823356,
"loss": 0.3223,
"step": 1090
},
{
"epoch": 0.99,
"learning_rate": 0.0005333885219725402,
"loss": 0.3136,
"step": 1091
},
{
"epoch": 0.99,
"learning_rate": 0.0005326557652390676,
"loss": 0.2731,
"step": 1092
},
{
"epoch": 0.99,
"learning_rate": 0.0005319229380625754,
"loss": 0.2853,
"step": 1093
},
{
"epoch": 0.99,
"learning_rate": 0.0005311900420238736,
"loss": 0.3025,
"step": 1094
},
{
"epoch": 0.99,
"learning_rate": 0.0005304570787039208,
"loss": 0.3047,
"step": 1095
},
{
"epoch": 0.99,
"learning_rate": 0.0005297240496838206,
"loss": 0.3105,
"step": 1096
},
{
"epoch": 0.99,
"learning_rate": 0.0005289909565448184,
"loss": 0.2917,
"step": 1097
},
{
"epoch": 1.0,
"learning_rate": 0.000528257800868298,
"loss": 0.3115,
"step": 1098
},
{
"epoch": 1.0,
"learning_rate": 0.0005275245842357777,
"loss": 0.2814,
"step": 1099
},
{
"epoch": 1.0,
"learning_rate": 0.0005267913082289079,
"loss": 0.2881,
"step": 1100
},
{
"epoch": 1.0,
"learning_rate": 0.0005260579744294665,
"loss": 0.3306,
"step": 1101
},
{
"epoch": 1.0,
"learning_rate": 0.0005253245844193564,
"loss": 0.3071,
"step": 1102
},
{
"epoch": 1.0,
"learning_rate": 0.0005245911397806017,
"loss": 0.3223,
"step": 1103
},
{
"epoch": 1.0,
"learning_rate": 0.0005238576420953442,
"loss": 0.3137,
"step": 1104
},
{
"epoch": 1.0,
"learning_rate": 0.0005231240929458406,
"loss": 0.2968,
"step": 1105
},
{
"epoch": 1.0,
"learning_rate": 0.0005223904939144578,
"loss": 0.2845,
"step": 1106
},
{
"epoch": 1.0,
"learning_rate": 0.000521656846583671,
"loss": 0.3027,
"step": 1107
},
{
"epoch": 1.0,
"learning_rate": 0.0005209231525360594,
"loss": 0.2844,
"step": 1108
},
{
"epoch": 1.01,
"learning_rate": 0.0005201894133543027,
"loss": 0.2915,
"step": 1109
},
{
"epoch": 1.01,
"learning_rate": 0.0005194556306211784,
"loss": 0.312,
"step": 1110
},
{
"epoch": 1.01,
"learning_rate": 0.0005187218059195577,
"loss": 0.3083,
"step": 1111
},
{
"epoch": 1.01,
"learning_rate": 0.0005179879408324024,
"loss": 0.3079,
"step": 1112
},
{
"epoch": 1.01,
"learning_rate": 0.0005172540369427611,
"loss": 0.2969,
"step": 1113
},
{
"epoch": 1.01,
"learning_rate": 0.0005165200958337667,
"loss": 0.2983,
"step": 1114
},
{
"epoch": 1.01,
"learning_rate": 0.0005157861190886319,
"loss": 0.3018,
"step": 1115
},
{
"epoch": 1.01,
"learning_rate": 0.0005150521082906466,
"loss": 0.2965,
"step": 1116
},
{
"epoch": 1.01,
"learning_rate": 0.0005143180650231741,
"loss": 0.3081,
"step": 1117
},
{
"epoch": 1.01,
"learning_rate": 0.0005135839908696471,
"loss": 0.2849,
"step": 1118
},
{
"epoch": 1.01,
"learning_rate": 0.0005128498874135658,
"loss": 0.2789,
"step": 1119
},
{
"epoch": 1.02,
"learning_rate": 0.0005121157562384936,
"loss": 0.2971,
"step": 1120
},
{
"epoch": 1.02,
"learning_rate": 0.0005113815989280528,
"loss": 0.3179,
"step": 1121
},
{
"epoch": 1.02,
"learning_rate": 0.0005106474170659231,
"loss": 0.2908,
"step": 1122
},
{
"epoch": 1.02,
"learning_rate": 0.0005099132122358365,
"loss": 0.314,
"step": 1123
},
{
"epoch": 1.02,
"learning_rate": 0.0005091789860215746,
"loss": 0.3011,
"step": 1124
},
{
"epoch": 1.02,
"learning_rate": 0.0005084447400069655,
"loss": 0.3027,
"step": 1125
},
{
"epoch": 1.02,
"learning_rate": 0.0005077104757758799,
"loss": 0.3169,
"step": 1126
},
{
"epoch": 1.02,
"learning_rate": 0.0005069761949122274,
"loss": 0.3218,
"step": 1127
},
{
"epoch": 1.02,
"learning_rate": 0.000506241898999954,
"loss": 0.2952,
"step": 1128
},
{
"epoch": 1.02,
"learning_rate": 0.0005055075896230379,
"loss": 0.3027,
"step": 1129
},
{
"epoch": 1.02,
"learning_rate": 0.0005047732683654862,
"loss": 0.2942,
"step": 1130
},
{
"epoch": 1.03,
"learning_rate": 0.0005040389368113318,
"loss": 0.2983,
"step": 1131
},
{
"epoch": 1.03,
"learning_rate": 0.0005033045965446302,
"loss": 0.2957,
"step": 1132
},
{
"epoch": 1.03,
"learning_rate": 0.0005025702491494549,
"loss": 0.288,
"step": 1133
},
{
"epoch": 1.03,
"learning_rate": 0.0005018358962098954,
"loss": 0.3054,
"step": 1134
},
{
"epoch": 1.03,
"learning_rate": 0.0005011015393100529,
"loss": 0.3147,
"step": 1135
},
{
"epoch": 1.03,
"learning_rate": 0.0005003671800340371,
"loss": 0.2971,
"step": 1136
},
{
"epoch": 1.03,
"learning_rate": 0.000499632819965963,
"loss": 0.3049,
"step": 1137
},
{
"epoch": 1.03,
"learning_rate": 0.0004988984606899472,
"loss": 0.2905,
"step": 1138
},
{
"epoch": 1.03,
"learning_rate": 0.0004981641037901046,
"loss": 0.2886,
"step": 1139
},
{
"epoch": 1.03,
"learning_rate": 0.0004974297508505451,
"loss": 0.2827,
"step": 1140
},
{
"epoch": 1.03,
"learning_rate": 0.0004966954034553698,
"loss": 0.2638,
"step": 1141
},
{
"epoch": 1.04,
"learning_rate": 0.0004959610631886681,
"loss": 0.2679,
"step": 1142
},
{
"epoch": 1.04,
"learning_rate": 0.000495226731634514,
"loss": 0.2886,
"step": 1143
},
{
"epoch": 1.04,
"learning_rate": 0.0004944924103769623,
"loss": 0.2748,
"step": 1144
},
{
"epoch": 1.04,
"learning_rate": 0.0004937581010000462,
"loss": 0.3069,
"step": 1145
},
{
"epoch": 1.04,
"learning_rate": 0.0004930238050877727,
"loss": 0.3027,
"step": 1146
},
{
"epoch": 1.04,
"learning_rate": 0.0004922895242241202,
"loss": 0.2906,
"step": 1147
},
{
"epoch": 1.04,
"learning_rate": 0.0004915552599930346,
"loss": 0.2871,
"step": 1148
},
{
"epoch": 1.04,
"learning_rate": 0.0004908210139784254,
"loss": 0.2726,
"step": 1149
},
{
"epoch": 1.04,
"learning_rate": 0.0004900867877641636,
"loss": 0.2961,
"step": 1150
},
{
"epoch": 1.04,
"learning_rate": 0.000489352582934077,
"loss": 0.3118,
"step": 1151
},
{
"epoch": 1.04,
"learning_rate": 0.0004886184010719472,
"loss": 0.2888,
"step": 1152
},
{
"epoch": 1.05,
"learning_rate": 0.0004878842437615065,
"loss": 0.3044,
"step": 1153
},
{
"epoch": 1.05,
"learning_rate": 0.00048715011258643404,
"loss": 0.3188,
"step": 1154
},
{
"epoch": 1.05,
"learning_rate": 0.0004864160091303531,
"loss": 0.301,
"step": 1155
},
{
"epoch": 1.05,
"learning_rate": 0.0004856819349768262,
"loss": 0.2841,
"step": 1156
},
{
"epoch": 1.05,
"learning_rate": 0.0004849478917093535,
"loss": 0.2949,
"step": 1157
},
{
"epoch": 1.05,
"learning_rate": 0.00048421388091136815,
"loss": 0.3031,
"step": 1158
},
{
"epoch": 1.05,
"learning_rate": 0.00048347990416623335,
"loss": 0.2932,
"step": 1159
},
{
"epoch": 1.05,
"learning_rate": 0.000482745963057239,
"loss": 0.2864,
"step": 1160
},
{
"epoch": 1.05,
"learning_rate": 0.0004820120591675978,
"loss": 0.2998,
"step": 1161
},
{
"epoch": 1.05,
"learning_rate": 0.00048127819408044233,
"loss": 0.2861,
"step": 1162
},
{
"epoch": 1.05,
"learning_rate": 0.0004805443693788216,
"loss": 0.2947,
"step": 1163
},
{
"epoch": 1.06,
"learning_rate": 0.0004798105866456973,
"loss": 0.2823,
"step": 1164
},
{
"epoch": 1.06,
"learning_rate": 0.00047907684746394065,
"loss": 0.3076,
"step": 1165
},
{
"epoch": 1.06,
"learning_rate": 0.000478343153416329,
"loss": 0.2656,
"step": 1166
},
{
"epoch": 1.06,
"learning_rate": 0.0004776095060855424,
"loss": 0.3115,
"step": 1167
},
{
"epoch": 1.06,
"learning_rate": 0.0004768759070541596,
"loss": 0.2878,
"step": 1168
},
{
"epoch": 1.06,
"learning_rate": 0.00047614235790465576,
"loss": 0.2942,
"step": 1169
},
{
"epoch": 1.06,
"learning_rate": 0.00047540886021939845,
"loss": 0.2916,
"step": 1170
},
{
"epoch": 1.06,
"learning_rate": 0.0004746754155806437,
"loss": 0.2882,
"step": 1171
},
{
"epoch": 1.06,
"learning_rate": 0.0004739420255705337,
"loss": 0.3162,
"step": 1172
},
{
"epoch": 1.06,
"learning_rate": 0.0004732086917710922,
"loss": 0.3059,
"step": 1173
},
{
"epoch": 1.06,
"learning_rate": 0.00047247541576422224,
"loss": 0.2885,
"step": 1174
},
{
"epoch": 1.07,
"learning_rate": 0.0004717421991317021,
"loss": 0.2908,
"step": 1175
},
{
"epoch": 1.07,
"learning_rate": 0.0004710090434551816,
"loss": 0.2903,
"step": 1176
},
{
"epoch": 1.07,
"learning_rate": 0.00047027595031617935,
"loss": 0.2719,
"step": 1177
},
{
"epoch": 1.07,
"learning_rate": 0.0004695429212960793,
"loss": 0.2853,
"step": 1178
},
{
"epoch": 1.07,
"learning_rate": 0.00046880995797612664,
"loss": 0.293,
"step": 1179
},
{
"epoch": 1.07,
"learning_rate": 0.0004680770619374248,
"loss": 0.2837,
"step": 1180
},
{
"epoch": 1.07,
"learning_rate": 0.00046734423476093255,
"loss": 0.2764,
"step": 1181
},
{
"epoch": 1.07,
"learning_rate": 0.00046661147802745987,
"loss": 0.2899,
"step": 1182
},
{
"epoch": 1.07,
"learning_rate": 0.00046587879331766457,
"loss": 0.2885,
"step": 1183
},
{
"epoch": 1.07,
"learning_rate": 0.0004651461822120494,
"loss": 0.2883,
"step": 1184
},
{
"epoch": 1.07,
"learning_rate": 0.0004644136462909584,
"loss": 0.2856,
"step": 1185
},
{
"epoch": 1.08,
"learning_rate": 0.00046368118713457294,
"loss": 0.2966,
"step": 1186
},
{
"epoch": 1.08,
"learning_rate": 0.0004629488063229094,
"loss": 0.2797,
"step": 1187
},
{
"epoch": 1.08,
"learning_rate": 0.00046221650543581467,
"loss": 0.2927,
"step": 1188
},
{
"epoch": 1.08,
"learning_rate": 0.0004614842860529635,
"loss": 0.3,
"step": 1189
},
{
"epoch": 1.08,
"learning_rate": 0.00046075214975385486,
"loss": 0.2986,
"step": 1190
},
{
"epoch": 1.08,
"learning_rate": 0.0004600200981178084,
"loss": 0.3149,
"step": 1191
},
{
"epoch": 1.08,
"learning_rate": 0.0004592881327239609,
"loss": 0.2833,
"step": 1192
},
{
"epoch": 1.08,
"learning_rate": 0.00045855625515126344,
"loss": 0.2826,
"step": 1193
},
{
"epoch": 1.08,
"learning_rate": 0.0004578244669784777,
"loss": 0.2845,
"step": 1194
},
{
"epoch": 1.08,
"learning_rate": 0.00045709276978417215,
"loss": 0.3044,
"step": 1195
},
{
"epoch": 1.08,
"learning_rate": 0.00045636116514671934,
"loss": 0.3066,
"step": 1196
},
{
"epoch": 1.09,
"learning_rate": 0.00045562965464429216,
"loss": 0.2935,
"step": 1197
},
{
"epoch": 1.09,
"learning_rate": 0.0004548982398548601,
"loss": 0.3193,
"step": 1198
},
{
"epoch": 1.09,
"learning_rate": 0.00045416692235618664,
"loss": 0.2909,
"step": 1199
},
{
"epoch": 1.09,
"learning_rate": 0.000453435703725825,
"loss": 0.2866,
"step": 1200
},
{
"epoch": 1.09,
"learning_rate": 0.00045270458554111533,
"loss": 0.3007,
"step": 1201
},
{
"epoch": 1.09,
"learning_rate": 0.0004519735693791811,
"loss": 0.2893,
"step": 1202
},
{
"epoch": 1.09,
"learning_rate": 0.0004512426568169257,
"loss": 0.2812,
"step": 1203
},
{
"epoch": 1.09,
"learning_rate": 0.00045051184943102884,
"loss": 0.2605,
"step": 1204
},
{
"epoch": 1.09,
"learning_rate": 0.0004497811487979436,
"loss": 0.2942,
"step": 1205
},
{
"epoch": 1.09,
"learning_rate": 0.0004490505564938927,
"loss": 0.2732,
"step": 1206
},
{
"epoch": 1.09,
"learning_rate": 0.00044832007409486516,
"loss": 0.2842,
"step": 1207
},
{
"epoch": 1.1,
"learning_rate": 0.0004475897031766129,
"loss": 0.3188,
"step": 1208
},
{
"epoch": 1.1,
"learning_rate": 0.0004468594453146476,
"loss": 0.2937,
"step": 1209
},
{
"epoch": 1.1,
"learning_rate": 0.00044612930208423656,
"loss": 0.2802,
"step": 1210
},
{
"epoch": 1.1,
"learning_rate": 0.0004453992750604003,
"loss": 0.2817,
"step": 1211
},
{
"epoch": 1.1,
"learning_rate": 0.00044466936581790843,
"loss": 0.2787,
"step": 1212
},
{
"epoch": 1.1,
"learning_rate": 0.00044393957593127643,
"loss": 0.2932,
"step": 1213
},
{
"epoch": 1.1,
"learning_rate": 0.0004432099069747625,
"loss": 0.2832,
"step": 1214
},
{
"epoch": 1.1,
"learning_rate": 0.00044248036052236406,
"loss": 0.2976,
"step": 1215
},
{
"epoch": 1.1,
"learning_rate": 0.0004417509381478138,
"loss": 0.2938,
"step": 1216
},
{
"epoch": 1.1,
"learning_rate": 0.00044102164142457707,
"loss": 0.3105,
"step": 1217
},
{
"epoch": 1.1,
"learning_rate": 0.0004402924719258483,
"loss": 0.292,
"step": 1218
},
{
"epoch": 1.11,
"learning_rate": 0.0004395634312245473,
"loss": 0.2677,
"step": 1219
},
{
"epoch": 1.11,
"learning_rate": 0.00043883452089331593,
"loss": 0.3171,
"step": 1220
},
{
"epoch": 1.11,
"learning_rate": 0.0004381057425045152,
"loss": 0.2747,
"step": 1221
},
{
"epoch": 1.11,
"learning_rate": 0.00043737709763022115,
"loss": 0.2961,
"step": 1222
},
{
"epoch": 1.11,
"learning_rate": 0.000436648587842222,
"loss": 0.2806,
"step": 1223
},
{
"epoch": 1.11,
"learning_rate": 0.00043592021471201457,
"loss": 0.2911,
"step": 1224
},
{
"epoch": 1.11,
"learning_rate": 0.00043519197981080056,
"loss": 0.2695,
"step": 1225
},
{
"epoch": 1.11,
"learning_rate": 0.00043446388470948395,
"loss": 0.3191,
"step": 1226
},
{
"epoch": 1.11,
"learning_rate": 0.0004337359309786672,
"loss": 0.2816,
"step": 1227
},
{
"epoch": 1.11,
"learning_rate": 0.0004330081201886472,
"loss": 0.3154,
"step": 1228
},
{
"epoch": 1.11,
"learning_rate": 0.0004322804539094128,
"loss": 0.282,
"step": 1229
},
{
"epoch": 1.12,
"learning_rate": 0.00043155293371064155,
"loss": 0.2847,
"step": 1230
},
{
"epoch": 1.12,
"learning_rate": 0.00043082556116169537,
"loss": 0.3127,
"step": 1231
},
{
"epoch": 1.12,
"learning_rate": 0.00043009833783161773,
"loss": 0.2771,
"step": 1232
},
{
"epoch": 1.12,
"learning_rate": 0.00042937126528913046,
"loss": 0.2823,
"step": 1233
},
{
"epoch": 1.12,
"learning_rate": 0.00042864434510262996,
"loss": 0.2742,
"step": 1234
},
{
"epoch": 1.12,
"learning_rate": 0.00042791757884018394,
"loss": 0.3026,
"step": 1235
},
{
"epoch": 1.12,
"learning_rate": 0.0004271909680695281,
"loss": 0.2788,
"step": 1236
},
{
"epoch": 1.12,
"learning_rate": 0.000426464514358063,
"loss": 0.2958,
"step": 1237
},
{
"epoch": 1.12,
"learning_rate": 0.0004257382192728498,
"loss": 0.288,
"step": 1238
},
{
"epoch": 1.12,
"learning_rate": 0.0004250120843806083,
"loss": 0.2948,
"step": 1239
},
{
"epoch": 1.12,
"learning_rate": 0.00042428611124771184,
"loss": 0.2906,
"step": 1240
},
{
"epoch": 1.13,
"learning_rate": 0.00042356030144018533,
"loss": 0.2931,
"step": 1241
},
{
"epoch": 1.13,
"learning_rate": 0.0004228346565237013,
"loss": 0.3038,
"step": 1242
},
{
"epoch": 1.13,
"learning_rate": 0.0004221091780635768,
"loss": 0.2854,
"step": 1243
},
{
"epoch": 1.13,
"learning_rate": 0.00042138386762476934,
"loss": 0.2969,
"step": 1244
},
{
"epoch": 1.13,
"learning_rate": 0.0004206587267718743,
"loss": 0.2761,
"step": 1245
},
{
"epoch": 1.13,
"learning_rate": 0.0004199337570691214,
"loss": 0.2932,
"step": 1246
},
{
"epoch": 1.13,
"learning_rate": 0.0004192089600803707,
"loss": 0.2964,
"step": 1247
},
{
"epoch": 1.13,
"learning_rate": 0.0004184843373691099,
"loss": 0.2837,
"step": 1248
},
{
"epoch": 1.13,
"learning_rate": 0.00041775989049845104,
"loss": 0.3013,
"step": 1249
},
{
"epoch": 1.13,
"learning_rate": 0.0004170356210311264,
"loss": 0.2762,
"step": 1250
},
{
"epoch": 1.13,
"learning_rate": 0.00041631153052948595,
"loss": 0.2914,
"step": 1251
},
{
"epoch": 1.14,
"learning_rate": 0.00041558762055549314,
"loss": 0.2695,
"step": 1252
},
{
"epoch": 1.14,
"learning_rate": 0.00041486389267072257,
"loss": 0.301,
"step": 1253
},
{
"epoch": 1.14,
"learning_rate": 0.0004141403484363556,
"loss": 0.2985,
"step": 1254
},
{
"epoch": 1.14,
"learning_rate": 0.0004134169894131776,
"loss": 0.293,
"step": 1255
},
{
"epoch": 1.14,
"learning_rate": 0.0004126938171615746,
"loss": 0.2979,
"step": 1256
},
{
"epoch": 1.14,
"learning_rate": 0.00041197083324152927,
"loss": 0.3108,
"step": 1257
},
{
"epoch": 1.14,
"learning_rate": 0.0004112480392126187,
"loss": 0.2782,
"step": 1258
},
{
"epoch": 1.14,
"learning_rate": 0.0004105254366340095,
"loss": 0.2704,
"step": 1259
},
{
"epoch": 1.14,
"learning_rate": 0.00040980302706445607,
"loss": 0.2793,
"step": 1260
},
{
"epoch": 1.14,
"learning_rate": 0.0004090808120622961,
"loss": 0.2954,
"step": 1261
},
{
"epoch": 1.14,
"learning_rate": 0.00040835879318544744,
"loss": 0.2981,
"step": 1262
},
{
"epoch": 1.15,
"learning_rate": 0.0004076369719914055,
"loss": 0.3115,
"step": 1263
},
{
"epoch": 1.15,
"learning_rate": 0.0004069153500372382,
"loss": 0.2756,
"step": 1264
},
{
"epoch": 1.15,
"learning_rate": 0.00040619392887958475,
"loss": 0.2996,
"step": 1265
},
{
"epoch": 1.15,
"learning_rate": 0.0004054727100746505,
"loss": 0.2855,
"step": 1266
},
{
"epoch": 1.15,
"learning_rate": 0.0004047516951782045,
"loss": 0.2795,
"step": 1267
},
{
"epoch": 1.15,
"learning_rate": 0.0004040308857455763,
"loss": 0.2935,
"step": 1268
},
{
"epoch": 1.15,
"learning_rate": 0.0004033102833316517,
"loss": 0.2986,
"step": 1269
},
{
"epoch": 1.15,
"learning_rate": 0.0004025898894908702,
"loss": 0.2715,
"step": 1270
},
{
"epoch": 1.15,
"learning_rate": 0.0004018697057772213,
"loss": 0.2927,
"step": 1271
},
{
"epoch": 1.15,
"learning_rate": 0.00040114973374424126,
"loss": 0.3064,
"step": 1272
},
{
"epoch": 1.15,
"learning_rate": 0.0004004299749450098,
"loss": 0.2941,
"step": 1273
},
{
"epoch": 1.16,
"learning_rate": 0.0003997104309321465,
"loss": 0.2639,
"step": 1274
},
{
"epoch": 1.16,
"learning_rate": 0.00039899110325780786,
"loss": 0.2913,
"step": 1275
},
{
"epoch": 1.16,
"learning_rate": 0.0003982719934736832,
"loss": 0.2927,
"step": 1276
},
{
"epoch": 1.16,
"learning_rate": 0.00039755310313099244,
"loss": 0.2823,
"step": 1277
},
{
"epoch": 1.16,
"learning_rate": 0.00039683443378048174,
"loss": 0.2806,
"step": 1278
},
{
"epoch": 1.16,
"learning_rate": 0.00039611598697242067,
"loss": 0.2854,
"step": 1279
},
{
"epoch": 1.16,
"learning_rate": 0.0003953977642565989,
"loss": 0.3005,
"step": 1280
},
{
"epoch": 1.16,
"learning_rate": 0.0003946797671823223,
"loss": 0.2896,
"step": 1281
},
{
"epoch": 1.16,
"learning_rate": 0.00039396199729841043,
"loss": 0.293,
"step": 1282
},
{
"epoch": 1.16,
"learning_rate": 0.00039324445615319253,
"loss": 0.2811,
"step": 1283
},
{
"epoch": 1.16,
"learning_rate": 0.00039252714529450445,
"loss": 0.2876,
"step": 1284
},
{
"epoch": 1.17,
"learning_rate": 0.0003918100662696853,
"loss": 0.2966,
"step": 1285
},
{
"epoch": 1.17,
"learning_rate": 0.00039109322062557425,
"loss": 0.2913,
"step": 1286
},
{
"epoch": 1.17,
"learning_rate": 0.0003903766099085068,
"loss": 0.3064,
"step": 1287
},
{
"epoch": 1.17,
"learning_rate": 0.00038966023566431153,
"loss": 0.2838,
"step": 1288
},
{
"epoch": 1.17,
"learning_rate": 0.00038894409943830726,
"loss": 0.2811,
"step": 1289
},
{
"epoch": 1.17,
"learning_rate": 0.0003882282027752995,
"loss": 0.3066,
"step": 1290
},
{
"epoch": 1.17,
"learning_rate": 0.0003875125472195764,
"loss": 0.288,
"step": 1291
},
{
"epoch": 1.17,
"learning_rate": 0.00038679713431490667,
"loss": 0.295,
"step": 1292
},
{
"epoch": 1.17,
"learning_rate": 0.00038608196560453495,
"loss": 0.2941,
"step": 1293
},
{
"epoch": 1.17,
"learning_rate": 0.0003853670426311797,
"loss": 0.2986,
"step": 1294
},
{
"epoch": 1.17,
"learning_rate": 0.00038465236693702897,
"loss": 0.2843,
"step": 1295
},
{
"epoch": 1.17,
"learning_rate": 0.00038393794006373735,
"loss": 0.2747,
"step": 1296
},
{
"epoch": 1.18,
"learning_rate": 0.00038322376355242287,
"loss": 0.2828,
"step": 1297
},
{
"epoch": 1.18,
"learning_rate": 0.0003825098389436635,
"loss": 0.2887,
"step": 1298
},
{
"epoch": 1.18,
"learning_rate": 0.00038179616777749383,
"loss": 0.2969,
"step": 1299
},
{
"epoch": 1.18,
"learning_rate": 0.00038108275159340127,
"loss": 0.271,
"step": 1300
},
{
"epoch": 1.18,
"learning_rate": 0.0003803695919303238,
"loss": 0.2762,
"step": 1301
},
{
"epoch": 1.18,
"learning_rate": 0.0003796566903266459,
"loss": 0.2935,
"step": 1302
},
{
"epoch": 1.18,
"learning_rate": 0.00037894404832019516,
"loss": 0.2767,
"step": 1303
},
{
"epoch": 1.18,
"learning_rate": 0.00037823166744823934,
"loss": 0.2764,
"step": 1304
},
{
"epoch": 1.18,
"learning_rate": 0.00037751954924748304,
"loss": 0.2791,
"step": 1305
},
{
"epoch": 1.18,
"learning_rate": 0.00037680769525406395,
"loss": 0.2963,
"step": 1306
},
{
"epoch": 1.18,
"learning_rate": 0.00037609610700355014,
"loss": 0.3079,
"step": 1307
},
{
"epoch": 1.19,
"learning_rate": 0.00037538478603093596,
"loss": 0.2994,
"step": 1308
},
{
"epoch": 1.19,
"learning_rate": 0.00037467373387063967,
"loss": 0.3018,
"step": 1309
},
{
"epoch": 1.19,
"learning_rate": 0.00037396295205649964,
"loss": 0.2996,
"step": 1310
},
{
"epoch": 1.19,
"learning_rate": 0.0003732524421217709,
"loss": 0.2903,
"step": 1311
},
{
"epoch": 1.19,
"learning_rate": 0.0003725422055991218,
"loss": 0.2734,
"step": 1312
},
{
"epoch": 1.19,
"learning_rate": 0.00037183224402063107,
"loss": 0.2833,
"step": 1313
},
{
"epoch": 1.19,
"learning_rate": 0.00037112255891778487,
"loss": 0.2808,
"step": 1314
},
{
"epoch": 1.19,
"learning_rate": 0.000370413151821472,
"loss": 0.2822,
"step": 1315
},
{
"epoch": 1.19,
"learning_rate": 0.0003697040242619823,
"loss": 0.2933,
"step": 1316
},
{
"epoch": 1.19,
"learning_rate": 0.0003689951777690024,
"loss": 0.2686,
"step": 1317
},
{
"epoch": 1.19,
"learning_rate": 0.00036828661387161256,
"loss": 0.2631,
"step": 1318
},
{
"epoch": 1.2,
"learning_rate": 0.00036757833409828335,
"loss": 0.2753,
"step": 1319
},
{
"epoch": 1.2,
"learning_rate": 0.00036687033997687287,
"loss": 0.2913,
"step": 1320
},
{
"epoch": 1.2,
"learning_rate": 0.0003661626330346224,
"loss": 0.282,
"step": 1321
},
{
"epoch": 1.2,
"learning_rate": 0.00036545521479815435,
"loss": 0.2933,
"step": 1322
},
{
"epoch": 1.2,
"learning_rate": 0.00036474808679346795,
"loss": 0.2959,
"step": 1323
},
{
"epoch": 1.2,
"learning_rate": 0.0003640412505459365,
"loss": 0.2799,
"step": 1324
},
{
"epoch": 1.2,
"learning_rate": 0.00036333470758030385,
"loss": 0.2861,
"step": 1325
},
{
"epoch": 1.2,
"learning_rate": 0.00036262845942068133,
"loss": 0.29,
"step": 1326
},
{
"epoch": 1.2,
"learning_rate": 0.00036192250759054423,
"loss": 0.2827,
"step": 1327
},
{
"epoch": 1.2,
"learning_rate": 0.0003612168536127287,
"loss": 0.2676,
"step": 1328
},
{
"epoch": 1.2,
"learning_rate": 0.0003605114990094285,
"loss": 0.3007,
"step": 1329
},
{
"epoch": 1.21,
"learning_rate": 0.00035980644530219107,
"loss": 0.2766,
"step": 1330
},
{
"epoch": 1.21,
"learning_rate": 0.00035910169401191544,
"loss": 0.2764,
"step": 1331
},
{
"epoch": 1.21,
"learning_rate": 0.00035839724665884796,
"loss": 0.286,
"step": 1332
},
{
"epoch": 1.21,
"learning_rate": 0.00035769310476257933,
"loss": 0.2986,
"step": 1333
},
{
"epoch": 1.21,
"learning_rate": 0.0003569892698420413,
"loss": 0.2628,
"step": 1334
},
{
"epoch": 1.21,
"learning_rate": 0.0003562857434155038,
"loss": 0.2805,
"step": 1335
},
{
"epoch": 1.21,
"learning_rate": 0.0003555825270005707,
"loss": 0.2848,
"step": 1336
},
{
"epoch": 1.21,
"learning_rate": 0.00035487962211417745,
"loss": 0.3188,
"step": 1337
},
{
"epoch": 1.21,
"learning_rate": 0.0003541770302725875,
"loss": 0.2986,
"step": 1338
},
{
"epoch": 1.21,
"learning_rate": 0.0003534747529913893,
"loss": 0.2874,
"step": 1339
},
{
"epoch": 1.21,
"learning_rate": 0.00035277279178549205,
"loss": 0.2758,
"step": 1340
},
{
"epoch": 1.22,
"learning_rate": 0.0003520711481691238,
"loss": 0.2697,
"step": 1341
},
{
"epoch": 1.22,
"learning_rate": 0.00035136982365582706,
"loss": 0.3081,
"step": 1342
},
{
"epoch": 1.22,
"learning_rate": 0.0003506688197584562,
"loss": 0.3059,
"step": 1343
},
{
"epoch": 1.22,
"learning_rate": 0.00034996813798917416,
"loss": 0.2876,
"step": 1344
},
{
"epoch": 1.22,
"learning_rate": 0.00034926777985944856,
"loss": 0.2783,
"step": 1345
},
{
"epoch": 1.22,
"learning_rate": 0.00034856774688004917,
"loss": 0.2831,
"step": 1346
},
{
"epoch": 1.22,
"learning_rate": 0.0003478680405610446,
"loss": 0.2771,
"step": 1347
},
{
"epoch": 1.22,
"learning_rate": 0.0003471686624117982,
"loss": 0.295,
"step": 1348
},
{
"epoch": 1.22,
"learning_rate": 0.00034646961394096563,
"loss": 0.2843,
"step": 1349
},
{
"epoch": 1.22,
"learning_rate": 0.0003457708966564918,
"loss": 0.283,
"step": 1350
},
{
"epoch": 1.22,
"learning_rate": 0.00034507251206560686,
"loss": 0.283,
"step": 1351
},
{
"epoch": 1.23,
"learning_rate": 0.0003443744616748231,
"loss": 0.2629,
"step": 1352
},
{
"epoch": 1.23,
"learning_rate": 0.0003436767469899324,
"loss": 0.2942,
"step": 1353
},
{
"epoch": 1.23,
"learning_rate": 0.00034297936951600214,
"loss": 0.2871,
"step": 1354
},
{
"epoch": 1.23,
"learning_rate": 0.00034228233075737224,
"loss": 0.3043,
"step": 1355
},
{
"epoch": 1.23,
"learning_rate": 0.00034158563221765224,
"loss": 0.265,
"step": 1356
},
{
"epoch": 1.23,
"learning_rate": 0.0003408892753997175,
"loss": 0.2932,
"step": 1357
},
{
"epoch": 1.23,
"learning_rate": 0.0003401932618057063,
"loss": 0.2974,
"step": 1358
},
{
"epoch": 1.23,
"learning_rate": 0.00033949759293701694,
"loss": 0.2755,
"step": 1359
},
{
"epoch": 1.23,
"learning_rate": 0.00033880227029430335,
"loss": 0.2915,
"step": 1360
},
{
"epoch": 1.23,
"learning_rate": 0.0003381072953774731,
"loss": 0.2825,
"step": 1361
},
{
"epoch": 1.23,
"learning_rate": 0.00033741266968568363,
"loss": 0.2789,
"step": 1362
},
{
"epoch": 1.24,
"learning_rate": 0.00033671839471733907,
"loss": 0.262,
"step": 1363
},
{
"epoch": 1.24,
"learning_rate": 0.0003360244719700868,
"loss": 0.2577,
"step": 1364
},
{
"epoch": 1.24,
"learning_rate": 0.00033533090294081446,
"loss": 0.2991,
"step": 1365
},
{
"epoch": 1.24,
"learning_rate": 0.00033463768912564705,
"loss": 0.2784,
"step": 1366
},
{
"epoch": 1.24,
"learning_rate": 0.00033394483201994277,
"loss": 0.2771,
"step": 1367
},
{
"epoch": 1.24,
"learning_rate": 0.0003332523331182906,
"loss": 0.2864,
"step": 1368
},
{
"epoch": 1.24,
"learning_rate": 0.00033256019391450693,
"loss": 0.2598,
"step": 1369
},
{
"epoch": 1.24,
"learning_rate": 0.00033186841590163196,
"loss": 0.2917,
"step": 1370
},
{
"epoch": 1.24,
"learning_rate": 0.0003311770005719272,
"loss": 0.2856,
"step": 1371
},
{
"epoch": 1.24,
"learning_rate": 0.0003304859494168712,
"loss": 0.2866,
"step": 1372
},
{
"epoch": 1.24,
"learning_rate": 0.00032979526392715733,
"loss": 0.2661,
"step": 1373
},
{
"epoch": 1.25,
"learning_rate": 0.00032910494559269007,
"loss": 0.2808,
"step": 1374
},
{
"epoch": 1.25,
"learning_rate": 0.000328414995902582,
"loss": 0.2883,
"step": 1375
},
{
"epoch": 1.25,
"learning_rate": 0.0003277254163451503,
"loss": 0.3,
"step": 1376
},
{
"epoch": 1.25,
"learning_rate": 0.0003270362084079138,
"loss": 0.2937,
"step": 1377
},
{
"epoch": 1.25,
"learning_rate": 0.0003263473735775899,
"loss": 0.2808,
"step": 1378
},
{
"epoch": 1.25,
"learning_rate": 0.0003256589133400908,
"loss": 0.2958,
"step": 1379
},
{
"epoch": 1.25,
"learning_rate": 0.00032497082918052074,
"loss": 0.2734,
"step": 1380
},
{
"epoch": 1.25,
"learning_rate": 0.0003242831225831731,
"loss": 0.2776,
"step": 1381
},
{
"epoch": 1.25,
"learning_rate": 0.00032359579503152605,
"loss": 0.2883,
"step": 1382
},
{
"epoch": 1.25,
"learning_rate": 0.00032290884800824105,
"loss": 0.2723,
"step": 1383
},
{
"epoch": 1.25,
"learning_rate": 0.0003222222829951578,
"loss": 0.2869,
"step": 1384
},
{
"epoch": 1.26,
"learning_rate": 0.0003215361014732925,
"loss": 0.2853,
"step": 1385
},
{
"epoch": 1.26,
"learning_rate": 0.0003208503049228339,
"loss": 0.264,
"step": 1386
},
{
"epoch": 1.26,
"learning_rate": 0.0003201648948231404,
"loss": 0.2819,
"step": 1387
},
{
"epoch": 1.26,
"learning_rate": 0.0003194798726527369,
"loss": 0.264,
"step": 1388
},
{
"epoch": 1.26,
"learning_rate": 0.0003187952398893111,
"loss": 0.2826,
"step": 1389
},
{
"epoch": 1.26,
"learning_rate": 0.0003181109980097111,
"loss": 0.2797,
"step": 1390
},
{
"epoch": 1.26,
"learning_rate": 0.0003174271484899415,
"loss": 0.2772,
"step": 1391
},
{
"epoch": 1.26,
"learning_rate": 0.00031674369280516065,
"loss": 0.2697,
"step": 1392
},
{
"epoch": 1.26,
"learning_rate": 0.00031606063242967754,
"loss": 0.2859,
"step": 1393
},
{
"epoch": 1.26,
"learning_rate": 0.00031537796883694805,
"loss": 0.2948,
"step": 1394
},
{
"epoch": 1.26,
"learning_rate": 0.00031469570349957254,
"loss": 0.2782,
"step": 1395
},
{
"epoch": 1.27,
"learning_rate": 0.0003140138378892917,
"loss": 0.2819,
"step": 1396
},
{
"epoch": 1.27,
"learning_rate": 0.00031333237347698473,
"loss": 0.2792,
"step": 1397
},
{
"epoch": 1.27,
"learning_rate": 0.00031265131173266464,
"loss": 0.2821,
"step": 1398
},
{
"epoch": 1.27,
"learning_rate": 0.00031197065412547634,
"loss": 0.2799,
"step": 1399
},
{
"epoch": 1.27,
"learning_rate": 0.0003112904021236929,
"loss": 0.267,
"step": 1400
},
{
"epoch": 1.27,
"learning_rate": 0.000310610557194712,
"loss": 0.3032,
"step": 1401
},
{
"epoch": 1.27,
"learning_rate": 0.00030993112080505384,
"loss": 0.302,
"step": 1402
},
{
"epoch": 1.27,
"learning_rate": 0.000309252094420357,
"loss": 0.2755,
"step": 1403
},
{
"epoch": 1.27,
"learning_rate": 0.0003085734795053755,
"loss": 0.2975,
"step": 1404
},
{
"epoch": 1.27,
"learning_rate": 0.0003078952775239762,
"loss": 0.2932,
"step": 1405
},
{
"epoch": 1.27,
"learning_rate": 0.00030721748993913466,
"loss": 0.2733,
"step": 1406
},
{
"epoch": 1.28,
"learning_rate": 0.00030654011821293315,
"loss": 0.3018,
"step": 1407
},
{
"epoch": 1.28,
"learning_rate": 0.00030586316380655607,
"loss": 0.2815,
"step": 1408
},
{
"epoch": 1.28,
"learning_rate": 0.00030518662818028843,
"loss": 0.2811,
"step": 1409
},
{
"epoch": 1.28,
"learning_rate": 0.0003045105127935112,
"loss": 0.2878,
"step": 1410
},
{
"epoch": 1.28,
"learning_rate": 0.00030383481910469933,
"loss": 0.3086,
"step": 1411
},
{
"epoch": 1.28,
"learning_rate": 0.00030315954857141806,
"loss": 0.2867,
"step": 1412
},
{
"epoch": 1.28,
"learning_rate": 0.0003024847026503193,
"loss": 0.2484,
"step": 1413
},
{
"epoch": 1.28,
"learning_rate": 0.00030181028279713967,
"loss": 0.29,
"step": 1414
},
{
"epoch": 1.28,
"learning_rate": 0.0003011362904666965,
"loss": 0.2942,
"step": 1415
},
{
"epoch": 1.28,
"learning_rate": 0.0003004627271128846,
"loss": 0.2927,
"step": 1416
},
{
"epoch": 1.28,
"learning_rate": 0.0002997895941886737,
"loss": 0.2659,
"step": 1417
},
{
"epoch": 1.29,
"learning_rate": 0.00029911689314610513,
"loss": 0.2794,
"step": 1418
},
{
"epoch": 1.29,
"learning_rate": 0.00029844462543628846,
"loss": 0.2898,
"step": 1419
},
{
"epoch": 1.29,
"learning_rate": 0.0002977727925093981,
"loss": 0.2727,
"step": 1420
},
{
"epoch": 1.29,
"learning_rate": 0.00029710139581467116,
"loss": 0.2957,
"step": 1421
},
{
"epoch": 1.29,
"learning_rate": 0.0002964304368004037,
"loss": 0.2748,
"step": 1422
},
{
"epoch": 1.29,
"learning_rate": 0.0002957599169139472,
"loss": 0.287,
"step": 1423
},
{
"epoch": 1.29,
"learning_rate": 0.00029508983760170637,
"loss": 0.2837,
"step": 1424
},
{
"epoch": 1.29,
"learning_rate": 0.0002944202003091351,
"loss": 0.2843,
"step": 1425
},
{
"epoch": 1.29,
"learning_rate": 0.0002937510064807341,
"loss": 0.265,
"step": 1426
},
{
"epoch": 1.29,
"learning_rate": 0.0002930822575600475,
"loss": 0.2633,
"step": 1427
},
{
"epoch": 1.29,
"learning_rate": 0.0002924139549896594,
"loss": 0.2845,
"step": 1428
},
{
"epoch": 1.3,
"learning_rate": 0.00029174610021119135,
"loss": 0.2786,
"step": 1429
},
{
"epoch": 1.3,
"learning_rate": 0.0002910786946652987,
"loss": 0.2833,
"step": 1430
},
{
"epoch": 1.3,
"learning_rate": 0.0002904117397916681,
"loss": 0.2854,
"step": 1431
},
{
"epoch": 1.3,
"learning_rate": 0.00028974523702901344,
"loss": 0.2736,
"step": 1432
},
{
"epoch": 1.3,
"learning_rate": 0.00028907918781507396,
"loss": 0.2839,
"step": 1433
},
{
"epoch": 1.3,
"learning_rate": 0.0002884135935866103,
"loss": 0.2865,
"step": 1434
},
{
"epoch": 1.3,
"learning_rate": 0.0002877484557794017,
"loss": 0.2703,
"step": 1435
},
{
"epoch": 1.3,
"learning_rate": 0.00028708377582824246,
"loss": 0.291,
"step": 1436
},
{
"epoch": 1.3,
"learning_rate": 0.00028641955516693976,
"loss": 0.2961,
"step": 1437
},
{
"epoch": 1.3,
"learning_rate": 0.00028575579522830964,
"loss": 0.2904,
"step": 1438
},
{
"epoch": 1.3,
"learning_rate": 0.0002850924974441745,
"loss": 0.2861,
"step": 1439
},
{
"epoch": 1.31,
"learning_rate": 0.0002844296632453598,
"loss": 0.2881,
"step": 1440
},
{
"epoch": 1.31,
"learning_rate": 0.0002837672940616911,
"loss": 0.285,
"step": 1441
},
{
"epoch": 1.31,
"learning_rate": 0.0002831053913219903,
"loss": 0.2937,
"step": 1442
},
{
"epoch": 1.31,
"learning_rate": 0.0002824439564540739,
"loss": 0.2803,
"step": 1443
},
{
"epoch": 1.31,
"learning_rate": 0.00028178299088474834,
"loss": 0.2699,
"step": 1444
},
{
"epoch": 1.31,
"learning_rate": 0.0002811224960398083,
"loss": 0.2621,
"step": 1445
},
{
"epoch": 1.31,
"learning_rate": 0.00028046247334403295,
"loss": 0.2852,
"step": 1446
},
{
"epoch": 1.31,
"learning_rate": 0.0002798029242211828,
"loss": 0.2548,
"step": 1447
},
{
"epoch": 1.31,
"learning_rate": 0.00027914385009399713,
"loss": 0.275,
"step": 1448
},
{
"epoch": 1.31,
"learning_rate": 0.00027848525238418986,
"loss": 0.2827,
"step": 1449
},
{
"epoch": 1.31,
"learning_rate": 0.000277827132512448,
"loss": 0.281,
"step": 1450
},
{
"epoch": 1.32,
"learning_rate": 0.00027716949189842733,
"loss": 0.2906,
"step": 1451
},
{
"epoch": 1.32,
"learning_rate": 0.00027651233196075004,
"loss": 0.2859,
"step": 1452
},
{
"epoch": 1.32,
"learning_rate": 0.00027585565411700165,
"loss": 0.2694,
"step": 1453
},
{
"epoch": 1.32,
"learning_rate": 0.00027519945978372683,
"loss": 0.2925,
"step": 1454
},
{
"epoch": 1.32,
"learning_rate": 0.00027454375037642856,
"loss": 0.2946,
"step": 1455
},
{
"epoch": 1.32,
"learning_rate": 0.00027388852730956235,
"loss": 0.2856,
"step": 1456
},
{
"epoch": 1.32,
"learning_rate": 0.00027323379199653574,
"loss": 0.2854,
"step": 1457
},
{
"epoch": 1.32,
"learning_rate": 0.0002725795458497036,
"loss": 0.2947,
"step": 1458
},
{
"epoch": 1.32,
"learning_rate": 0.0002719257902803658,
"loss": 0.2654,
"step": 1459
},
{
"epoch": 1.32,
"learning_rate": 0.00027127252669876395,
"loss": 0.2803,
"step": 1460
},
{
"epoch": 1.32,
"learning_rate": 0.000270619756514078,
"loss": 0.2821,
"step": 1461
},
{
"epoch": 1.33,
"learning_rate": 0.00026996748113442394,
"loss": 0.2545,
"step": 1462
},
{
"epoch": 1.33,
"learning_rate": 0.00026931570196685054,
"loss": 0.2732,
"step": 1463
},
{
"epoch": 1.33,
"learning_rate": 0.00026866442041733566,
"loss": 0.2855,
"step": 1464
},
{
"epoch": 1.33,
"learning_rate": 0.00026801363789078446,
"loss": 0.27,
"step": 1465
},
{
"epoch": 1.33,
"learning_rate": 0.00026736335579102474,
"loss": 0.2778,
"step": 1466
},
{
"epoch": 1.33,
"learning_rate": 0.0002667135755208057,
"loss": 0.2778,
"step": 1467
},
{
"epoch": 1.33,
"learning_rate": 0.00026606429848179304,
"loss": 0.282,
"step": 1468
},
{
"epoch": 1.33,
"learning_rate": 0.0002654155260745679,
"loss": 0.2822,
"step": 1469
},
{
"epoch": 1.33,
"learning_rate": 0.00026476725969862226,
"loss": 0.2893,
"step": 1470
},
{
"epoch": 1.33,
"learning_rate": 0.0002641195007523568,
"loss": 0.26,
"step": 1471
},
{
"epoch": 1.33,
"learning_rate": 0.00026347225063307766,
"loss": 0.2845,
"step": 1472
},
{
"epoch": 1.34,
"learning_rate": 0.00026282551073699285,
"loss": 0.2769,
"step": 1473
},
{
"epoch": 1.34,
"learning_rate": 0.0002621792824592103,
"loss": 0.2806,
"step": 1474
},
{
"epoch": 1.34,
"learning_rate": 0.0002615335671937341,
"loss": 0.2981,
"step": 1475
},
{
"epoch": 1.34,
"learning_rate": 0.0002608883663334617,
"loss": 0.2631,
"step": 1476
},
{
"epoch": 1.34,
"learning_rate": 0.000260243681270181,
"loss": 0.2733,
"step": 1477
},
{
"epoch": 1.34,
"learning_rate": 0.00025959951339456737,
"loss": 0.2753,
"step": 1478
},
{
"epoch": 1.34,
"learning_rate": 0.00025895586409618014,
"loss": 0.2773,
"step": 1479
},
{
"epoch": 1.34,
"learning_rate": 0.00025831273476346005,
"loss": 0.27,
"step": 1480
},
{
"epoch": 1.34,
"learning_rate": 0.0002576701267837265,
"loss": 0.265,
"step": 1481
},
{
"epoch": 1.34,
"learning_rate": 0.00025702804154317414,
"loss": 0.2812,
"step": 1482
},
{
"epoch": 1.34,
"learning_rate": 0.00025638648042687005,
"loss": 0.2836,
"step": 1483
},
{
"epoch": 1.35,
"learning_rate": 0.0002557454448187507,
"loss": 0.2665,
"step": 1484
},
{
"epoch": 1.35,
"learning_rate": 0.00025510493610161867,
"loss": 0.2814,
"step": 1485
},
{
"epoch": 1.35,
"learning_rate": 0.0002544649556571402,
"loss": 0.2767,
"step": 1486
},
{
"epoch": 1.35,
"learning_rate": 0.0002538255048658421,
"loss": 0.2683,
"step": 1487
},
{
"epoch": 1.35,
"learning_rate": 0.00025318658510710846,
"loss": 0.2607,
"step": 1488
},
{
"epoch": 1.35,
"learning_rate": 0.0002525481977591779,
"loss": 0.2819,
"step": 1489
},
{
"epoch": 1.35,
"learning_rate": 0.00025191034419914075,
"loss": 0.3035,
"step": 1490
},
{
"epoch": 1.35,
"learning_rate": 0.0002512730258029355,
"loss": 0.2849,
"step": 1491
},
{
"epoch": 1.35,
"learning_rate": 0.0002506362439453463,
"loss": 0.2858,
"step": 1492
},
{
"epoch": 1.35,
"learning_rate": 0.0002500000000000001,
"loss": 0.2638,
"step": 1493
},
{
"epoch": 1.35,
"learning_rate": 0.0002493642953393635,
"loss": 0.281,
"step": 1494
},
{
"epoch": 1.36,
"learning_rate": 0.0002487291313347397,
"loss": 0.2854,
"step": 1495
},
{
"epoch": 1.36,
"learning_rate": 0.0002480945093562656,
"loss": 0.265,
"step": 1496
},
{
"epoch": 1.36,
"learning_rate": 0.00024746043077290917,
"loss": 0.291,
"step": 1497
},
{
"epoch": 1.36,
"learning_rate": 0.0002468268969524656,
"loss": 0.2839,
"step": 1498
},
{
"epoch": 1.36,
"learning_rate": 0.00024619390926155534,
"loss": 0.2778,
"step": 1499
},
{
"epoch": 1.36,
"learning_rate": 0.00024556146906562086,
"loss": 0.2749,
"step": 1500
},
{
"epoch": 1.36,
"learning_rate": 0.0002449295777289234,
"loss": 0.2928,
"step": 1501
},
{
"epoch": 1.36,
"learning_rate": 0.00024429823661454057,
"loss": 0.266,
"step": 1502
},
{
"epoch": 1.36,
"learning_rate": 0.00024366744708436233,
"loss": 0.2823,
"step": 1503
},
{
"epoch": 1.36,
"learning_rate": 0.00024303721049908971,
"loss": 0.2753,
"step": 1504
},
{
"epoch": 1.36,
"learning_rate": 0.0002424075282182302,
"loss": 0.2847,
"step": 1505
},
{
"epoch": 1.37,
"learning_rate": 0.00024177840160009595,
"loss": 0.28,
"step": 1506
},
{
"epoch": 1.37,
"learning_rate": 0.0002411498320018005,
"loss": 0.2883,
"step": 1507
},
{
"epoch": 1.37,
"learning_rate": 0.00024052182077925584,
"loss": 0.277,
"step": 1508
},
{
"epoch": 1.37,
"learning_rate": 0.0002398943692871694,
"loss": 0.2676,
"step": 1509
},
{
"epoch": 1.37,
"learning_rate": 0.00023926747887904082,
"loss": 0.2942,
"step": 1510
},
{
"epoch": 1.37,
"learning_rate": 0.00023864115090716,
"loss": 0.2822,
"step": 1511
},
{
"epoch": 1.37,
"learning_rate": 0.0002380153867226033,
"loss": 0.2589,
"step": 1512
},
{
"epoch": 1.37,
"learning_rate": 0.00023739018767523096,
"loss": 0.269,
"step": 1513
},
{
"epoch": 1.37,
"learning_rate": 0.00023676555511368424,
"loss": 0.2958,
"step": 1514
},
{
"epoch": 1.37,
"learning_rate": 0.00023614149038538208,
"loss": 0.2665,
"step": 1515
},
{
"epoch": 1.37,
"learning_rate": 0.0002355179948365189,
"loss": 0.2672,
"step": 1516
},
{
"epoch": 1.38,
"learning_rate": 0.000234895069812061,
"loss": 0.2812,
"step": 1517
},
{
"epoch": 1.38,
"learning_rate": 0.00023427271665574424,
"loss": 0.3033,
"step": 1518
},
{
"epoch": 1.38,
"learning_rate": 0.00023365093671007077,
"loss": 0.2762,
"step": 1519
},
{
"epoch": 1.38,
"learning_rate": 0.00023302973131630623,
"loss": 0.2704,
"step": 1520
},
{
"epoch": 1.38,
"learning_rate": 0.00023240910181447717,
"loss": 0.2725,
"step": 1521
},
{
"epoch": 1.38,
"learning_rate": 0.00023178904954336717,
"loss": 0.2722,
"step": 1522
},
{
"epoch": 1.38,
"learning_rate": 0.00023116957584051528,
"loss": 0.2842,
"step": 1523
},
{
"epoch": 1.38,
"learning_rate": 0.00023055068204221225,
"loss": 0.2693,
"step": 1524
},
{
"epoch": 1.38,
"learning_rate": 0.000229932369483498,
"loss": 0.2871,
"step": 1525
},
{
"epoch": 1.38,
"learning_rate": 0.00022931463949815844,
"loss": 0.2743,
"step": 1526
},
{
"epoch": 1.38,
"learning_rate": 0.0002286974934187232,
"loss": 0.2871,
"step": 1527
},
{
"epoch": 1.39,
"learning_rate": 0.00022808093257646185,
"loss": 0.2902,
"step": 1528
},
{
"epoch": 1.39,
"learning_rate": 0.00022746495830138153,
"loss": 0.2882,
"step": 1529
},
{
"epoch": 1.39,
"learning_rate": 0.00022684957192222445,
"loss": 0.2803,
"step": 1530
},
{
"epoch": 1.39,
"learning_rate": 0.0002262347747664645,
"loss": 0.2504,
"step": 1531
},
{
"epoch": 1.39,
"learning_rate": 0.00022562056816030447,
"loss": 0.2885,
"step": 1532
},
{
"epoch": 1.39,
"learning_rate": 0.0002250069534286735,
"loss": 0.2676,
"step": 1533
},
{
"epoch": 1.39,
"learning_rate": 0.0002243939318952234,
"loss": 0.2684,
"step": 1534
},
{
"epoch": 1.39,
"learning_rate": 0.00022378150488232685,
"loss": 0.2822,
"step": 1535
},
{
"epoch": 1.39,
"learning_rate": 0.000223169673711074,
"loss": 0.2469,
"step": 1536
},
{
"epoch": 1.39,
"learning_rate": 0.00022255843970126954,
"loss": 0.2531,
"step": 1537
},
{
"epoch": 1.39,
"learning_rate": 0.00022194780417143025,
"loss": 0.2845,
"step": 1538
},
{
"epoch": 1.4,
"learning_rate": 0.00022133776843878184,
"loss": 0.2849,
"step": 1539
},
{
"epoch": 1.4,
"learning_rate": 0.00022072833381925588,
"loss": 0.2742,
"step": 1540
},
{
"epoch": 1.4,
"learning_rate": 0.00022011950162748734,
"loss": 0.2732,
"step": 1541
},
{
"epoch": 1.4,
"learning_rate": 0.00021951127317681185,
"loss": 0.2817,
"step": 1542
},
{
"epoch": 1.4,
"learning_rate": 0.0002189036497792628,
"loss": 0.2954,
"step": 1543
},
{
"epoch": 1.4,
"learning_rate": 0.00021829663274556806,
"loss": 0.2959,
"step": 1544
},
{
"epoch": 1.4,
"learning_rate": 0.0002176902233851477,
"loss": 0.2703,
"step": 1545
},
{
"epoch": 1.4,
"learning_rate": 0.00021708442300611113,
"loss": 0.2616,
"step": 1546
},
{
"epoch": 1.4,
"learning_rate": 0.00021647923291525352,
"loss": 0.2803,
"step": 1547
},
{
"epoch": 1.4,
"learning_rate": 0.000215874654418054,
"loss": 0.27,
"step": 1548
},
{
"epoch": 1.4,
"learning_rate": 0.0002152706888186724,
"loss": 0.2593,
"step": 1549
},
{
"epoch": 1.41,
"learning_rate": 0.00021466733741994638,
"loss": 0.2549,
"step": 1550
},
{
"epoch": 1.41,
"learning_rate": 0.0002140646015233888,
"loss": 0.2753,
"step": 1551
},
{
"epoch": 1.41,
"learning_rate": 0.00021346248242918458,
"loss": 0.2601,
"step": 1552
},
{
"epoch": 1.41,
"learning_rate": 0.00021286098143618793,
"loss": 0.2683,
"step": 1553
},
{
"epoch": 1.41,
"learning_rate": 0.00021226009984192024,
"loss": 0.2717,
"step": 1554
},
{
"epoch": 1.41,
"learning_rate": 0.00021165983894256646,
"loss": 0.2617,
"step": 1555
},
{
"epoch": 1.41,
"learning_rate": 0.00021106020003297283,
"loss": 0.2645,
"step": 1556
},
{
"epoch": 1.41,
"learning_rate": 0.00021046118440664358,
"loss": 0.2686,
"step": 1557
},
{
"epoch": 1.41,
"learning_rate": 0.00020986279335573888,
"loss": 0.2716,
"step": 1558
},
{
"epoch": 1.41,
"learning_rate": 0.00020926502817107085,
"loss": 0.2739,
"step": 1559
},
{
"epoch": 1.41,
"learning_rate": 0.00020866789014210225,
"loss": 0.2601,
"step": 1560
},
{
"epoch": 1.42,
"learning_rate": 0.0002080713805569427,
"loss": 0.2462,
"step": 1561
},
{
"epoch": 1.42,
"learning_rate": 0.0002074755007023461,
"loss": 0.2626,
"step": 1562
},
{
"epoch": 1.42,
"learning_rate": 0.00020688025186370814,
"loss": 0.2819,
"step": 1563
},
{
"epoch": 1.42,
"learning_rate": 0.00020628563532506335,
"loss": 0.2675,
"step": 1564
},
{
"epoch": 1.42,
"learning_rate": 0.00020569165236908194,
"loss": 0.2581,
"step": 1565
},
{
"epoch": 1.42,
"learning_rate": 0.00020509830427706748,
"loss": 0.2684,
"step": 1566
},
{
"epoch": 1.42,
"learning_rate": 0.0002045055923289544,
"loss": 0.2756,
"step": 1567
},
{
"epoch": 1.42,
"learning_rate": 0.00020391351780330459,
"loss": 0.2808,
"step": 1568
},
{
"epoch": 1.42,
"learning_rate": 0.00020332208197730502,
"loss": 0.2695,
"step": 1569
},
{
"epoch": 1.42,
"learning_rate": 0.00020273128612676507,
"loss": 0.2734,
"step": 1570
},
{
"epoch": 1.42,
"learning_rate": 0.00020214113152611303,
"loss": 0.2646,
"step": 1571
},
{
"epoch": 1.43,
"learning_rate": 0.00020155161944839455,
"loss": 0.2761,
"step": 1572
},
{
"epoch": 1.43,
"learning_rate": 0.00020096275116526896,
"loss": 0.261,
"step": 1573
},
{
"epoch": 1.43,
"learning_rate": 0.00020037452794700696,
"loss": 0.2695,
"step": 1574
},
{
"epoch": 1.43,
"learning_rate": 0.00019978695106248766,
"loss": 0.2751,
"step": 1575
},
{
"epoch": 1.43,
"learning_rate": 0.00019920002177919622,
"loss": 0.2665,
"step": 1576
},
{
"epoch": 1.43,
"learning_rate": 0.00019861374136322037,
"loss": 0.2799,
"step": 1577
},
{
"epoch": 1.43,
"learning_rate": 0.00019802811107924812,
"loss": 0.2826,
"step": 1578
},
{
"epoch": 1.43,
"learning_rate": 0.0001974431321905656,
"loss": 0.2935,
"step": 1579
},
{
"epoch": 1.43,
"learning_rate": 0.00019685880595905343,
"loss": 0.2491,
"step": 1580
},
{
"epoch": 1.43,
"learning_rate": 0.0001962751336451844,
"loss": 0.2942,
"step": 1581
},
{
"epoch": 1.43,
"learning_rate": 0.00019569211650802076,
"loss": 0.2753,
"step": 1582
},
{
"epoch": 1.44,
"learning_rate": 0.00019510975580521155,
"loss": 0.2604,
"step": 1583
},
{
"epoch": 1.44,
"learning_rate": 0.0001945280527929894,
"loss": 0.269,
"step": 1584
},
{
"epoch": 1.44,
"learning_rate": 0.00019394700872616856,
"loss": 0.2738,
"step": 1585
},
{
"epoch": 1.44,
"learning_rate": 0.00019336662485814178,
"loss": 0.2773,
"step": 1586
},
{
"epoch": 1.44,
"learning_rate": 0.00019278690244087766,
"loss": 0.2761,
"step": 1587
},
{
"epoch": 1.44,
"learning_rate": 0.0001922078427249181,
"loss": 0.2961,
"step": 1588
},
{
"epoch": 1.44,
"learning_rate": 0.00019162944695937513,
"loss": 0.2651,
"step": 1589
},
{
"epoch": 1.44,
"learning_rate": 0.00019105171639192852,
"loss": 0.2826,
"step": 1590
},
{
"epoch": 1.44,
"learning_rate": 0.0001904746522688236,
"loss": 0.2662,
"step": 1591
},
{
"epoch": 1.44,
"learning_rate": 0.00018989825583486764,
"loss": 0.265,
"step": 1592
},
{
"epoch": 1.44,
"learning_rate": 0.00018932252833342783,
"loss": 0.2587,
"step": 1593
},
{
"epoch": 1.45,
"learning_rate": 0.00018874747100642843,
"loss": 0.2792,
"step": 1594
},
{
"epoch": 1.45,
"learning_rate": 0.00018817308509434794,
"loss": 0.2705,
"step": 1595
},
{
"epoch": 1.45,
"learning_rate": 0.00018759937183621635,
"loss": 0.2754,
"step": 1596
},
{
"epoch": 1.45,
"learning_rate": 0.00018702633246961282,
"loss": 0.2838,
"step": 1597
},
{
"epoch": 1.45,
"learning_rate": 0.0001864539682306629,
"loss": 0.2961,
"step": 1598
},
{
"epoch": 1.45,
"learning_rate": 0.0001858822803540357,
"loss": 0.2728,
"step": 1599
},
{
"epoch": 1.45,
"learning_rate": 0.00018531127007294156,
"loss": 0.2825,
"step": 1600
},
{
"epoch": 1.45,
"learning_rate": 0.00018474093861912854,
"loss": 0.2947,
"step": 1601
},
{
"epoch": 1.45,
"learning_rate": 0.00018417128722288112,
"loss": 0.2695,
"step": 1602
},
{
"epoch": 1.45,
"learning_rate": 0.00018360231711301618,
"loss": 0.2855,
"step": 1603
},
{
"epoch": 1.45,
"learning_rate": 0.00018303402951688126,
"loss": 0.2715,
"step": 1604
},
{
"epoch": 1.46,
"learning_rate": 0.00018246642566035177,
"loss": 0.2704,
"step": 1605
},
{
"epoch": 1.46,
"learning_rate": 0.0001818995067678279,
"loss": 0.2686,
"step": 1606
},
{
"epoch": 1.46,
"learning_rate": 0.0001813332740622327,
"loss": 0.271,
"step": 1607
},
{
"epoch": 1.46,
"learning_rate": 0.0001807677287650083,
"loss": 0.2793,
"step": 1608
},
{
"epoch": 1.46,
"learning_rate": 0.00018020287209611464,
"loss": 0.2675,
"step": 1609
},
{
"epoch": 1.46,
"learning_rate": 0.00017963870527402598,
"loss": 0.2947,
"step": 1610
},
{
"epoch": 1.46,
"learning_rate": 0.00017907522951572847,
"loss": 0.2867,
"step": 1611
},
{
"epoch": 1.46,
"learning_rate": 0.00017851244603671768,
"loss": 0.2803,
"step": 1612
},
{
"epoch": 1.46,
"learning_rate": 0.00017795035605099548,
"loss": 0.2776,
"step": 1613
},
{
"epoch": 1.46,
"learning_rate": 0.0001773889607710682,
"loss": 0.2637,
"step": 1614
},
{
"epoch": 1.46,
"learning_rate": 0.00017682826140794317,
"loss": 0.2811,
"step": 1615
},
{
"epoch": 1.47,
"learning_rate": 0.00017626825917112693,
"loss": 0.2693,
"step": 1616
},
{
"epoch": 1.47,
"learning_rate": 0.00017570895526862202,
"loss": 0.2579,
"step": 1617
},
{
"epoch": 1.47,
"learning_rate": 0.00017515035090692467,
"loss": 0.2671,
"step": 1618
},
{
"epoch": 1.47,
"learning_rate": 0.0001745924472910221,
"loss": 0.2783,
"step": 1619
},
{
"epoch": 1.47,
"learning_rate": 0.0001740352456243896,
"loss": 0.2546,
"step": 1620
},
{
"epoch": 1.47,
"learning_rate": 0.0001734787471089887,
"loss": 0.2627,
"step": 1621
},
{
"epoch": 1.47,
"learning_rate": 0.000172922952945264,
"loss": 0.2648,
"step": 1622
},
{
"epoch": 1.47,
"learning_rate": 0.00017236786433214057,
"loss": 0.2811,
"step": 1623
},
{
"epoch": 1.47,
"learning_rate": 0.00017181348246702184,
"loss": 0.2672,
"step": 1624
},
{
"epoch": 1.47,
"learning_rate": 0.00017125980854578615,
"loss": 0.2598,
"step": 1625
},
{
"epoch": 1.47,
"learning_rate": 0.00017070684376278532,
"loss": 0.2822,
"step": 1626
},
{
"epoch": 1.48,
"learning_rate": 0.00017015458931084082,
"loss": 0.283,
"step": 1627
},
{
"epoch": 1.48,
"learning_rate": 0.0001696030463812424,
"loss": 0.2815,
"step": 1628
},
{
"epoch": 1.48,
"learning_rate": 0.00016905221616374467,
"loss": 0.2909,
"step": 1629
},
{
"epoch": 1.48,
"learning_rate": 0.00016850209984656494,
"loss": 0.2704,
"step": 1630
},
{
"epoch": 1.48,
"learning_rate": 0.0001679526986163804,
"loss": 0.2518,
"step": 1631
},
{
"epoch": 1.48,
"learning_rate": 0.00016740401365832608,
"loss": 0.2677,
"step": 1632
},
{
"epoch": 1.48,
"learning_rate": 0.00016685604615599114,
"loss": 0.2681,
"step": 1633
},
{
"epoch": 1.48,
"learning_rate": 0.0001663087972914178,
"loss": 0.2814,
"step": 1634
},
{
"epoch": 1.48,
"learning_rate": 0.0001657622682450979,
"loss": 0.2798,
"step": 1635
},
{
"epoch": 1.48,
"learning_rate": 0.00016521646019597048,
"loss": 0.2706,
"step": 1636
},
{
"epoch": 1.48,
"learning_rate": 0.00016467137432141904,
"loss": 0.2723,
"step": 1637
},
{
"epoch": 1.49,
"learning_rate": 0.00016412701179726986,
"loss": 0.2842,
"step": 1638
},
{
"epoch": 1.49,
"learning_rate": 0.0001635833737977881,
"loss": 0.2769,
"step": 1639
},
{
"epoch": 1.49,
"learning_rate": 0.0001630404614956766,
"loss": 0.2465,
"step": 1640
},
{
"epoch": 1.49,
"learning_rate": 0.00016249827606207252,
"loss": 0.2725,
"step": 1641
},
{
"epoch": 1.49,
"learning_rate": 0.00016195681866654516,
"loss": 0.2803,
"step": 1642
},
{
"epoch": 1.49,
"learning_rate": 0.00016141609047709327,
"loss": 0.2877,
"step": 1643
},
{
"epoch": 1.49,
"learning_rate": 0.00016087609266014286,
"loss": 0.2714,
"step": 1644
},
{
"epoch": 1.49,
"learning_rate": 0.00016033682638054377,
"loss": 0.2878,
"step": 1645
},
{
"epoch": 1.49,
"learning_rate": 0.00015979829280156842,
"loss": 0.2655,
"step": 1646
},
{
"epoch": 1.49,
"learning_rate": 0.0001592604930849086,
"loss": 0.2687,
"step": 1647
},
{
"epoch": 1.49,
"learning_rate": 0.00015872342839067305,
"loss": 0.2756,
"step": 1648
},
{
"epoch": 1.5,
"learning_rate": 0.0001581870998773845,
"loss": 0.2521,
"step": 1649
},
{
"epoch": 1.5,
"learning_rate": 0.0001576515087019783,
"loss": 0.259,
"step": 1650
},
{
"epoch": 1.5,
"learning_rate": 0.00015711665601979906,
"loss": 0.2736,
"step": 1651
},
{
"epoch": 1.5,
"learning_rate": 0.000156582542984598,
"loss": 0.262,
"step": 1652
},
{
"epoch": 1.5,
"learning_rate": 0.00015604917074853126,
"loss": 0.2491,
"step": 1653
},
{
"epoch": 1.5,
"learning_rate": 0.00015551654046215668,
"loss": 0.2803,
"step": 1654
},
{
"epoch": 1.5,
"learning_rate": 0.0001549846532744319,
"loss": 0.2659,
"step": 1655
},
{
"epoch": 1.5,
"learning_rate": 0.00015445351033271148,
"loss": 0.277,
"step": 1656
},
{
"epoch": 1.5,
"learning_rate": 0.00015392311278274428,
"loss": 0.2755,
"step": 1657
},
{
"epoch": 1.5,
"learning_rate": 0.00015339346176867146,
"loss": 0.2732,
"step": 1658
},
{
"epoch": 1.5,
"learning_rate": 0.00015286455843302395,
"loss": 0.2789,
"step": 1659
},
{
"epoch": 1.5,
"learning_rate": 0.00015233640391671972,
"loss": 0.2699,
"step": 1660
},
{
"epoch": 1.51,
"learning_rate": 0.00015180899935906113,
"loss": 0.2455,
"step": 1661
},
{
"epoch": 1.51,
"learning_rate": 0.00015128234589773315,
"loss": 0.2966,
"step": 1662
},
{
"epoch": 1.51,
"learning_rate": 0.0001507564446688006,
"loss": 0.2698,
"step": 1663
},
{
"epoch": 1.51,
"learning_rate": 0.00015023129680670515,
"loss": 0.2659,
"step": 1664
},
{
"epoch": 1.51,
"learning_rate": 0.0001497069034442638,
"loss": 0.2745,
"step": 1665
},
{
"epoch": 1.51,
"learning_rate": 0.00014918326571266582,
"loss": 0.2781,
"step": 1666
},
{
"epoch": 1.51,
"learning_rate": 0.00014866038474147054,
"loss": 0.2662,
"step": 1667
},
{
"epoch": 1.51,
"learning_rate": 0.00014813826165860488,
"loss": 0.2521,
"step": 1668
},
{
"epoch": 1.51,
"learning_rate": 0.00014761689759036056,
"loss": 0.2783,
"step": 1669
},
{
"epoch": 1.51,
"learning_rate": 0.00014709629366139242,
"loss": 0.2855,
"step": 1670
},
{
"epoch": 1.51,
"learning_rate": 0.00014657645099471544,
"loss": 0.2739,
"step": 1671
},
{
"epoch": 1.52,
"learning_rate": 0.00014605737071170255,
"loss": 0.2537,
"step": 1672
},
{
"epoch": 1.52,
"learning_rate": 0.00014553905393208173,
"loss": 0.2826,
"step": 1673
},
{
"epoch": 1.52,
"learning_rate": 0.00014502150177393426,
"loss": 0.2883,
"step": 1674
},
{
"epoch": 1.52,
"learning_rate": 0.00014450471535369224,
"loss": 0.2767,
"step": 1675
},
{
"epoch": 1.52,
"learning_rate": 0.0001439886957861355,
"loss": 0.267,
"step": 1676
},
{
"epoch": 1.52,
"learning_rate": 0.0001434734441843899,
"loss": 0.2561,
"step": 1677
},
{
"epoch": 1.52,
"learning_rate": 0.00014295896165992473,
"loss": 0.2837,
"step": 1678
},
{
"epoch": 1.52,
"learning_rate": 0.00014244524932255026,
"loss": 0.287,
"step": 1679
},
{
"epoch": 1.52,
"learning_rate": 0.00014193230828041532,
"loss": 0.2932,
"step": 1680
},
{
"epoch": 1.52,
"learning_rate": 0.0001414201396400051,
"loss": 0.27,
"step": 1681
},
{
"epoch": 1.52,
"learning_rate": 0.0001409087445061381,
"loss": 0.2672,
"step": 1682
},
{
"epoch": 1.53,
"learning_rate": 0.00014039812398196488,
"loss": 0.2526,
"step": 1683
},
{
"epoch": 1.53,
"learning_rate": 0.0001398882791689649,
"loss": 0.2628,
"step": 1684
},
{
"epoch": 1.53,
"learning_rate": 0.0001393792111669439,
"loss": 0.2734,
"step": 1685
},
{
"epoch": 1.53,
"learning_rate": 0.00013887092107403242,
"loss": 0.2494,
"step": 1686
},
{
"epoch": 1.53,
"learning_rate": 0.00013836340998668283,
"loss": 0.2773,
"step": 1687
},
{
"epoch": 1.53,
"learning_rate": 0.00013785667899966685,
"loss": 0.2648,
"step": 1688
},
{
"epoch": 1.53,
"learning_rate": 0.00013735072920607362,
"loss": 0.2762,
"step": 1689
},
{
"epoch": 1.53,
"learning_rate": 0.00013684556169730704,
"loss": 0.2563,
"step": 1690
},
{
"epoch": 1.53,
"learning_rate": 0.00013634117756308368,
"loss": 0.2642,
"step": 1691
},
{
"epoch": 1.53,
"learning_rate": 0.00013583757789143014,
"loss": 0.2622,
"step": 1692
},
{
"epoch": 1.53,
"learning_rate": 0.00013533476376868087,
"loss": 0.2554,
"step": 1693
},
{
"epoch": 1.54,
"learning_rate": 0.00013483273627947555,
"loss": 0.2709,
"step": 1694
},
{
"epoch": 1.54,
"learning_rate": 0.0001343314965067573,
"loss": 0.2502,
"step": 1695
},
{
"epoch": 1.54,
"learning_rate": 0.00013383104553177,
"loss": 0.2673,
"step": 1696
},
{
"epoch": 1.54,
"learning_rate": 0.00013333138443405562,
"loss": 0.2701,
"step": 1697
},
{
"epoch": 1.54,
"learning_rate": 0.0001328325142914527,
"loss": 0.2654,
"step": 1698
},
{
"epoch": 1.54,
"learning_rate": 0.00013233443618009338,
"loss": 0.2639,
"step": 1699
},
{
"epoch": 1.54,
"learning_rate": 0.00013183715117440144,
"loss": 0.2694,
"step": 1700
},
{
"epoch": 1.54,
"learning_rate": 0.00013134066034708935,
"loss": 0.2778,
"step": 1701
},
{
"epoch": 1.54,
"learning_rate": 0.00013084496476915698,
"loss": 0.2603,
"step": 1702
},
{
"epoch": 1.54,
"learning_rate": 0.00013035006550988842,
"loss": 0.2589,
"step": 1703
},
{
"epoch": 1.54,
"learning_rate": 0.0001298559636368502,
"loss": 0.2681,
"step": 1704
},
{
"epoch": 1.55,
"learning_rate": 0.0001293626602158887,
"loss": 0.2609,
"step": 1705
},
{
"epoch": 1.55,
"learning_rate": 0.00012887015631112765,
"loss": 0.2966,
"step": 1706
},
{
"epoch": 1.55,
"learning_rate": 0.00012837845298496647,
"loss": 0.2542,
"step": 1707
},
{
"epoch": 1.55,
"learning_rate": 0.00012788755129807767,
"loss": 0.2762,
"step": 1708
},
{
"epoch": 1.55,
"learning_rate": 0.00012739745230940403,
"loss": 0.2839,
"step": 1709
},
{
"epoch": 1.55,
"learning_rate": 0.00012690815707615727,
"loss": 0.2823,
"step": 1710
},
{
"epoch": 1.55,
"learning_rate": 0.00012641966665381517,
"loss": 0.2808,
"step": 1711
},
{
"epoch": 1.55,
"learning_rate": 0.00012593198209611951,
"loss": 0.2635,
"step": 1712
},
{
"epoch": 1.55,
"learning_rate": 0.0001254451044550733,
"loss": 0.2783,
"step": 1713
},
{
"epoch": 1.55,
"learning_rate": 0.00012495903478093929,
"loss": 0.2664,
"step": 1714
},
{
"epoch": 1.55,
"learning_rate": 0.00012447377412223737,
"loss": 0.2676,
"step": 1715
},
{
"epoch": 1.56,
"learning_rate": 0.0001239893235257421,
"loss": 0.2646,
"step": 1716
},
{
"epoch": 1.56,
"learning_rate": 0.00012350568403648088,
"loss": 0.2703,
"step": 1717
},
{
"epoch": 1.56,
"learning_rate": 0.00012302285669773082,
"loss": 0.2473,
"step": 1718
},
{
"epoch": 1.56,
"learning_rate": 0.00012254084255101773,
"loss": 0.2611,
"step": 1719
},
{
"epoch": 1.56,
"learning_rate": 0.00012205964263611325,
"loss": 0.292,
"step": 1720
},
{
"epoch": 1.56,
"learning_rate": 0.00012157925799103203,
"loss": 0.2775,
"step": 1721
},
{
"epoch": 1.56,
"learning_rate": 0.00012109968965203066,
"loss": 0.2676,
"step": 1722
},
{
"epoch": 1.56,
"learning_rate": 0.00012062093865360457,
"loss": 0.2869,
"step": 1723
},
{
"epoch": 1.56,
"learning_rate": 0.00012014300602848627,
"loss": 0.2555,
"step": 1724
},
{
"epoch": 1.56,
"learning_rate": 0.00011966589280764257,
"loss": 0.2637,
"step": 1725
},
{
"epoch": 1.56,
"learning_rate": 0.00011918960002027306,
"loss": 0.264,
"step": 1726
},
{
"epoch": 1.57,
"learning_rate": 0.00011871412869380732,
"loss": 0.2671,
"step": 1727
},
{
"epoch": 1.57,
"learning_rate": 0.00011823947985390316,
"loss": 0.2692,
"step": 1728
},
{
"epoch": 1.57,
"learning_rate": 0.00011776565452444388,
"loss": 0.2742,
"step": 1729
},
{
"epoch": 1.57,
"learning_rate": 0.00011729265372753672,
"loss": 0.2554,
"step": 1730
},
{
"epoch": 1.57,
"learning_rate": 0.0001168204784835098,
"loss": 0.2878,
"step": 1731
},
{
"epoch": 1.57,
"learning_rate": 0.00011634912981091094,
"loss": 0.2866,
"step": 1732
},
{
"epoch": 1.57,
"learning_rate": 0.00011587860872650435,
"loss": 0.2653,
"step": 1733
},
{
"epoch": 1.57,
"learning_rate": 0.00011540891624526956,
"loss": 0.2804,
"step": 1734
},
{
"epoch": 1.57,
"learning_rate": 0.00011494005338039836,
"loss": 0.2539,
"step": 1735
},
{
"epoch": 1.57,
"learning_rate": 0.00011447202114329325,
"loss": 0.2688,
"step": 1736
},
{
"epoch": 1.57,
"learning_rate": 0.00011400482054356448,
"loss": 0.2739,
"step": 1737
},
{
"epoch": 1.58,
"learning_rate": 0.00011353845258902867,
"loss": 0.2661,
"step": 1738
},
{
"epoch": 1.58,
"learning_rate": 0.00011307291828570638,
"loss": 0.2659,
"step": 1739
},
{
"epoch": 1.58,
"learning_rate": 0.00011260821863781956,
"loss": 0.2585,
"step": 1740
},
{
"epoch": 1.58,
"learning_rate": 0.00011214435464779005,
"loss": 0.2612,
"step": 1741
},
{
"epoch": 1.58,
"learning_rate": 0.00011168132731623688,
"loss": 0.2686,
"step": 1742
},
{
"epoch": 1.58,
"learning_rate": 0.0001112191376419741,
"loss": 0.2672,
"step": 1743
},
{
"epoch": 1.58,
"learning_rate": 0.0001107577866220092,
"loss": 0.2452,
"step": 1744
},
{
"epoch": 1.58,
"learning_rate": 0.0001102972752515402,
"loss": 0.2557,
"step": 1745
},
{
"epoch": 1.58,
"learning_rate": 0.00010983760452395414,
"loss": 0.2686,
"step": 1746
},
{
"epoch": 1.58,
"learning_rate": 0.00010937877543082464,
"loss": 0.2766,
"step": 1747
},
{
"epoch": 1.58,
"learning_rate": 0.00010892078896190971,
"loss": 0.2664,
"step": 1748
},
{
"epoch": 1.59,
"learning_rate": 0.00010846364610514991,
"loss": 0.2909,
"step": 1749
},
{
"epoch": 1.59,
"learning_rate": 0.00010800734784666555,
"loss": 0.2656,
"step": 1750
},
{
"epoch": 1.59,
"learning_rate": 0.00010755189517075553,
"loss": 0.2627,
"step": 1751
},
{
"epoch": 1.59,
"learning_rate": 0.00010709728905989446,
"loss": 0.287,
"step": 1752
},
{
"epoch": 1.59,
"learning_rate": 0.00010664353049473086,
"loss": 0.2721,
"step": 1753
},
{
"epoch": 1.59,
"learning_rate": 0.00010619062045408501,
"loss": 0.2607,
"step": 1754
},
{
"epoch": 1.59,
"learning_rate": 0.00010573855991494657,
"loss": 0.2854,
"step": 1755
},
{
"epoch": 1.59,
"learning_rate": 0.00010528734985247317,
"loss": 0.2515,
"step": 1756
},
{
"epoch": 1.59,
"learning_rate": 0.00010483699123998718,
"loss": 0.2699,
"step": 1757
},
{
"epoch": 1.59,
"learning_rate": 0.00010438748504897495,
"loss": 0.2744,
"step": 1758
},
{
"epoch": 1.59,
"learning_rate": 0.00010393883224908357,
"loss": 0.2549,
"step": 1759
},
{
"epoch": 1.6,
"learning_rate": 0.00010349103380811953,
"loss": 0.2675,
"step": 1760
},
{
"epoch": 1.6,
"learning_rate": 0.00010304409069204629,
"loss": 0.2793,
"step": 1761
},
{
"epoch": 1.6,
"learning_rate": 0.00010259800386498202,
"loss": 0.2648,
"step": 1762
},
{
"epoch": 1.6,
"learning_rate": 0.00010215277428919806,
"loss": 0.2681,
"step": 1763
},
{
"epoch": 1.6,
"learning_rate": 0.0001017084029251164,
"loss": 0.2583,
"step": 1764
},
{
"epoch": 1.6,
"learning_rate": 0.00010126489073130779,
"loss": 0.2631,
"step": 1765
},
{
"epoch": 1.6,
"learning_rate": 0.00010082223866448953,
"loss": 0.288,
"step": 1766
},
{
"epoch": 1.6,
"learning_rate": 0.00010038044767952381,
"loss": 0.2612,
"step": 1767
},
{
"epoch": 1.6,
"learning_rate": 9.993951872941492e-05,
"loss": 0.2574,
"step": 1768
},
{
"epoch": 1.6,
"learning_rate": 9.949945276530781e-05,
"loss": 0.2737,
"step": 1769
},
{
"epoch": 1.6,
"learning_rate": 9.906025073648583e-05,
"loss": 0.2491,
"step": 1770
},
{
"epoch": 1.61,
"learning_rate": 9.862191359036882e-05,
"loss": 0.266,
"step": 1771
},
{
"epoch": 1.61,
"learning_rate": 9.818444227251089e-05,
"loss": 0.2782,
"step": 1772
},
{
"epoch": 1.61,
"learning_rate": 9.774783772659851e-05,
"loss": 0.2504,
"step": 1773
},
{
"epoch": 1.61,
"learning_rate": 9.731210089444804e-05,
"loss": 0.2565,
"step": 1774
},
{
"epoch": 1.61,
"learning_rate": 9.687723271600445e-05,
"loss": 0.2646,
"step": 1775
},
{
"epoch": 1.61,
"learning_rate": 9.644323412933876e-05,
"loss": 0.2687,
"step": 1776
},
{
"epoch": 1.61,
"learning_rate": 9.60101060706462e-05,
"loss": 0.2612,
"step": 1777
},
{
"epoch": 1.61,
"learning_rate": 9.557784947424414e-05,
"loss": 0.2675,
"step": 1778
},
{
"epoch": 1.61,
"learning_rate": 9.51464652725702e-05,
"loss": 0.2606,
"step": 1779
},
{
"epoch": 1.61,
"learning_rate": 9.471595439617986e-05,
"loss": 0.2537,
"step": 1780
},
{
"epoch": 1.61,
"learning_rate": 9.42863177737447e-05,
"loss": 0.2675,
"step": 1781
},
{
"epoch": 1.62,
"learning_rate": 9.385755633205073e-05,
"loss": 0.2732,
"step": 1782
},
{
"epoch": 1.62,
"learning_rate": 9.342967099599587e-05,
"loss": 0.2689,
"step": 1783
},
{
"epoch": 1.62,
"learning_rate": 9.30026626885882e-05,
"loss": 0.2677,
"step": 1784
},
{
"epoch": 1.62,
"learning_rate": 9.257653233094399e-05,
"loss": 0.2699,
"step": 1785
},
{
"epoch": 1.62,
"learning_rate": 9.215128084228563e-05,
"loss": 0.2581,
"step": 1786
},
{
"epoch": 1.62,
"learning_rate": 9.172690913993937e-05,
"loss": 0.2795,
"step": 1787
},
{
"epoch": 1.62,
"learning_rate": 9.130341813933407e-05,
"loss": 0.2469,
"step": 1788
},
{
"epoch": 1.62,
"learning_rate": 9.088080875399862e-05,
"loss": 0.2692,
"step": 1789
},
{
"epoch": 1.62,
"learning_rate": 9.045908189556007e-05,
"loss": 0.2626,
"step": 1790
},
{
"epoch": 1.62,
"learning_rate": 9.003823847374204e-05,
"loss": 0.2684,
"step": 1791
},
{
"epoch": 1.62,
"learning_rate": 8.961827939636197e-05,
"loss": 0.2521,
"step": 1792
},
{
"epoch": 1.63,
"learning_rate": 8.919920556932987e-05,
"loss": 0.2411,
"step": 1793
},
{
"epoch": 1.63,
"learning_rate": 8.878101789664622e-05,
"loss": 0.2687,
"step": 1794
},
{
"epoch": 1.63,
"learning_rate": 8.836371728039988e-05,
"loss": 0.2618,
"step": 1795
},
{
"epoch": 1.63,
"learning_rate": 8.794730462076633e-05,
"loss": 0.2781,
"step": 1796
},
{
"epoch": 1.63,
"learning_rate": 8.753178081600538e-05,
"loss": 0.2537,
"step": 1797
},
{
"epoch": 1.63,
"learning_rate": 8.711714676245974e-05,
"loss": 0.2601,
"step": 1798
},
{
"epoch": 1.63,
"learning_rate": 8.67034033545524e-05,
"loss": 0.2654,
"step": 1799
},
{
"epoch": 1.63,
"learning_rate": 8.629055148478543e-05,
"loss": 0.2471,
"step": 1800
},
{
"epoch": 1.63,
"learning_rate": 8.58785920437376e-05,
"loss": 0.2708,
"step": 1801
},
{
"epoch": 1.63,
"learning_rate": 8.546752592006268e-05,
"loss": 0.2644,
"step": 1802
},
{
"epoch": 1.63,
"learning_rate": 8.505735400048747e-05,
"loss": 0.2654,
"step": 1803
},
{
"epoch": 1.64,
"learning_rate": 8.46480771698096e-05,
"loss": 0.2709,
"step": 1804
},
{
"epoch": 1.64,
"learning_rate": 8.423969631089595e-05,
"loss": 0.2682,
"step": 1805
},
{
"epoch": 1.64,
"learning_rate": 8.38322123046808e-05,
"loss": 0.2476,
"step": 1806
},
{
"epoch": 1.64,
"learning_rate": 8.342562603016379e-05,
"loss": 0.256,
"step": 1807
},
{
"epoch": 1.64,
"learning_rate": 8.301993836440785e-05,
"loss": 0.2871,
"step": 1808
},
{
"epoch": 1.64,
"learning_rate": 8.261515018253773e-05,
"loss": 0.265,
"step": 1809
},
{
"epoch": 1.64,
"learning_rate": 8.221126235773779e-05,
"loss": 0.2542,
"step": 1810
},
{
"epoch": 1.64,
"learning_rate": 8.180827576124988e-05,
"loss": 0.2726,
"step": 1811
},
{
"epoch": 1.64,
"learning_rate": 8.140619126237226e-05,
"loss": 0.2627,
"step": 1812
},
{
"epoch": 1.64,
"learning_rate": 8.100500972845687e-05,
"loss": 0.2592,
"step": 1813
},
{
"epoch": 1.64,
"learning_rate": 8.060473202490809e-05,
"loss": 0.2485,
"step": 1814
},
{
"epoch": 1.65,
"learning_rate": 8.02053590151805e-05,
"loss": 0.2605,
"step": 1815
},
{
"epoch": 1.65,
"learning_rate": 7.980689156077719e-05,
"loss": 0.2765,
"step": 1816
},
{
"epoch": 1.65,
"learning_rate": 7.940933052124771e-05,
"loss": 0.2479,
"step": 1817
},
{
"epoch": 1.65,
"learning_rate": 7.901267675418627e-05,
"loss": 0.2775,
"step": 1818
},
{
"epoch": 1.65,
"learning_rate": 7.861693111523021e-05,
"loss": 0.2543,
"step": 1819
},
{
"epoch": 1.65,
"learning_rate": 7.822209445805795e-05,
"loss": 0.2783,
"step": 1820
},
{
"epoch": 1.65,
"learning_rate": 7.782816763438677e-05,
"loss": 0.2729,
"step": 1821
},
{
"epoch": 1.65,
"learning_rate": 7.743515149397185e-05,
"loss": 0.2649,
"step": 1822
},
{
"epoch": 1.65,
"learning_rate": 7.704304688460317e-05,
"loss": 0.2568,
"step": 1823
},
{
"epoch": 1.65,
"learning_rate": 7.6651854652105e-05,
"loss": 0.282,
"step": 1824
},
{
"epoch": 1.65,
"learning_rate": 7.626157564033331e-05,
"loss": 0.2745,
"step": 1825
},
{
"epoch": 1.66,
"learning_rate": 7.587221069117411e-05,
"loss": 0.2617,
"step": 1826
},
{
"epoch": 1.66,
"learning_rate": 7.548376064454165e-05,
"loss": 0.252,
"step": 1827
},
{
"epoch": 1.66,
"learning_rate": 7.509622633837671e-05,
"loss": 0.272,
"step": 1828
},
{
"epoch": 1.66,
"learning_rate": 7.470960860864446e-05,
"loss": 0.2769,
"step": 1829
},
{
"epoch": 1.66,
"learning_rate": 7.432390828933295e-05,
"loss": 0.2657,
"step": 1830
},
{
"epoch": 1.66,
"learning_rate": 7.393912621245142e-05,
"loss": 0.2574,
"step": 1831
},
{
"epoch": 1.66,
"learning_rate": 7.355526320802824e-05,
"loss": 0.2716,
"step": 1832
},
{
"epoch": 1.66,
"learning_rate": 7.317232010410918e-05,
"loss": 0.2748,
"step": 1833
},
{
"epoch": 1.66,
"learning_rate": 7.279029772675571e-05,
"loss": 0.2552,
"step": 1834
},
{
"epoch": 1.66,
"learning_rate": 7.240919690004322e-05,
"loss": 0.2469,
"step": 1835
},
{
"epoch": 1.66,
"learning_rate": 7.202901844605891e-05,
"loss": 0.2605,
"step": 1836
},
{
"epoch": 1.67,
"learning_rate": 7.164976318490058e-05,
"loss": 0.2539,
"step": 1837
},
{
"epoch": 1.67,
"learning_rate": 7.127143193467444e-05,
"loss": 0.2742,
"step": 1838
},
{
"epoch": 1.67,
"learning_rate": 7.089402551149365e-05,
"loss": 0.2426,
"step": 1839
},
{
"epoch": 1.67,
"learning_rate": 7.051754472947625e-05,
"loss": 0.2687,
"step": 1840
},
{
"epoch": 1.67,
"learning_rate": 7.014199040074353e-05,
"loss": 0.2604,
"step": 1841
},
{
"epoch": 1.67,
"learning_rate": 6.976736333541816e-05,
"loss": 0.2479,
"step": 1842
},
{
"epoch": 1.67,
"learning_rate": 6.939366434162287e-05,
"loss": 0.2546,
"step": 1843
},
{
"epoch": 1.67,
"learning_rate": 6.90208942254782e-05,
"loss": 0.2648,
"step": 1844
},
{
"epoch": 1.67,
"learning_rate": 6.864905379110114e-05,
"loss": 0.2526,
"step": 1845
},
{
"epoch": 1.67,
"learning_rate": 6.82781438406031e-05,
"loss": 0.2661,
"step": 1846
},
{
"epoch": 1.67,
"learning_rate": 6.790816517408849e-05,
"loss": 0.2332,
"step": 1847
},
{
"epoch": 1.68,
"learning_rate": 6.753911858965233e-05,
"loss": 0.2668,
"step": 1848
},
{
"epoch": 1.68,
"learning_rate": 6.717100488337952e-05,
"loss": 0.2399,
"step": 1849
},
{
"epoch": 1.68,
"learning_rate": 6.680382484934244e-05,
"loss": 0.2448,
"step": 1850
},
{
"epoch": 1.68,
"learning_rate": 6.643757927959943e-05,
"loss": 0.2665,
"step": 1851
},
{
"epoch": 1.68,
"learning_rate": 6.607226896419305e-05,
"loss": 0.2695,
"step": 1852
},
{
"epoch": 1.68,
"learning_rate": 6.570789469114823e-05,
"loss": 0.275,
"step": 1853
},
{
"epoch": 1.68,
"learning_rate": 6.534445724647109e-05,
"loss": 0.259,
"step": 1854
},
{
"epoch": 1.68,
"learning_rate": 6.498195741414637e-05,
"loss": 0.2615,
"step": 1855
},
{
"epoch": 1.68,
"learning_rate": 6.462039597613673e-05,
"loss": 0.2401,
"step": 1856
},
{
"epoch": 1.68,
"learning_rate": 6.425977371238029e-05,
"loss": 0.2681,
"step": 1857
},
{
"epoch": 1.68,
"learning_rate": 6.39000914007894e-05,
"loss": 0.2617,
"step": 1858
},
{
"epoch": 1.69,
"learning_rate": 6.35413498172489e-05,
"loss": 0.2614,
"step": 1859
},
{
"epoch": 1.69,
"learning_rate": 6.318354973561385e-05,
"loss": 0.2933,
"step": 1860
},
{
"epoch": 1.69,
"learning_rate": 6.282669192770896e-05,
"loss": 0.2438,
"step": 1861
},
{
"epoch": 1.69,
"learning_rate": 6.247077716332594e-05,
"loss": 0.2501,
"step": 1862
},
{
"epoch": 1.69,
"learning_rate": 6.211580621022245e-05,
"loss": 0.2535,
"step": 1863
},
{
"epoch": 1.69,
"learning_rate": 6.176177983412012e-05,
"loss": 0.2482,
"step": 1864
},
{
"epoch": 1.69,
"learning_rate": 6.140869879870287e-05,
"loss": 0.2621,
"step": 1865
},
{
"epoch": 1.69,
"learning_rate": 6.105656386561576e-05,
"loss": 0.2642,
"step": 1866
},
{
"epoch": 1.69,
"learning_rate": 6.070537579446239e-05,
"loss": 0.2737,
"step": 1867
},
{
"epoch": 1.69,
"learning_rate": 6.035513534280435e-05,
"loss": 0.2804,
"step": 1868
},
{
"epoch": 1.69,
"learning_rate": 6.000584326615899e-05,
"loss": 0.2653,
"step": 1869
},
{
"epoch": 1.7,
"learning_rate": 5.965750031799771e-05,
"loss": 0.243,
"step": 1870
},
{
"epoch": 1.7,
"learning_rate": 5.9310107249744784e-05,
"loss": 0.2622,
"step": 1871
},
{
"epoch": 1.7,
"learning_rate": 5.896366481077503e-05,
"loss": 0.2791,
"step": 1872
},
{
"epoch": 1.7,
"learning_rate": 5.8618173748413105e-05,
"loss": 0.2852,
"step": 1873
},
{
"epoch": 1.7,
"learning_rate": 5.827363480793108e-05,
"loss": 0.2686,
"step": 1874
},
{
"epoch": 1.7,
"learning_rate": 5.793004873254731e-05,
"loss": 0.2582,
"step": 1875
},
{
"epoch": 1.7,
"learning_rate": 5.758741626342479e-05,
"loss": 0.25,
"step": 1876
},
{
"epoch": 1.7,
"learning_rate": 5.72457381396691e-05,
"loss": 0.2716,
"step": 1877
},
{
"epoch": 1.7,
"learning_rate": 5.6905015098327575e-05,
"loss": 0.2635,
"step": 1878
},
{
"epoch": 1.7,
"learning_rate": 5.6565247874386996e-05,
"loss": 0.249,
"step": 1879
},
{
"epoch": 1.7,
"learning_rate": 5.622643720077247e-05,
"loss": 0.2531,
"step": 1880
},
{
"epoch": 1.71,
"learning_rate": 5.588858380834572e-05,
"loss": 0.2592,
"step": 1881
},
{
"epoch": 1.71,
"learning_rate": 5.55516884259033e-05,
"loss": 0.2518,
"step": 1882
},
{
"epoch": 1.71,
"learning_rate": 5.521575178017546e-05,
"loss": 0.2557,
"step": 1883
},
{
"epoch": 1.71,
"learning_rate": 5.4880774595824246e-05,
"loss": 0.2542,
"step": 1884
},
{
"epoch": 1.71,
"learning_rate": 5.454675759544175e-05,
"loss": 0.2831,
"step": 1885
},
{
"epoch": 1.71,
"learning_rate": 5.421370149954907e-05,
"loss": 0.2703,
"step": 1886
},
{
"epoch": 1.71,
"learning_rate": 5.388160702659445e-05,
"loss": 0.2579,
"step": 1887
},
{
"epoch": 1.71,
"learning_rate": 5.355047489295195e-05,
"loss": 0.2605,
"step": 1888
},
{
"epoch": 1.71,
"learning_rate": 5.3220305812919315e-05,
"loss": 0.255,
"step": 1889
},
{
"epoch": 1.71,
"learning_rate": 5.289110049871726e-05,
"loss": 0.2838,
"step": 1890
},
{
"epoch": 1.71,
"learning_rate": 5.256285966048718e-05,
"loss": 0.2606,
"step": 1891
},
{
"epoch": 1.72,
"learning_rate": 5.223558400629025e-05,
"loss": 0.2634,
"step": 1892
},
{
"epoch": 1.72,
"learning_rate": 5.190927424210545e-05,
"loss": 0.2692,
"step": 1893
},
{
"epoch": 1.72,
"learning_rate": 5.158393107182835e-05,
"loss": 0.259,
"step": 1894
},
{
"epoch": 1.72,
"learning_rate": 5.125955519726921e-05,
"loss": 0.2646,
"step": 1895
},
{
"epoch": 1.72,
"learning_rate": 5.0936147318151996e-05,
"loss": 0.2665,
"step": 1896
},
{
"epoch": 1.72,
"learning_rate": 5.061370813211219e-05,
"loss": 0.2594,
"step": 1897
},
{
"epoch": 1.72,
"learning_rate": 5.0292238334695986e-05,
"loss": 0.2698,
"step": 1898
},
{
"epoch": 1.72,
"learning_rate": 4.997173861935833e-05,
"loss": 0.2638,
"step": 1899
},
{
"epoch": 1.72,
"learning_rate": 4.965220967746181e-05,
"loss": 0.2625,
"step": 1900
},
{
"epoch": 1.72,
"learning_rate": 4.93336521982744e-05,
"loss": 0.2559,
"step": 1901
},
{
"epoch": 1.72,
"learning_rate": 4.901606686896898e-05,
"loss": 0.2714,
"step": 1902
},
{
"epoch": 1.73,
"learning_rate": 4.869945437462126e-05,
"loss": 0.2737,
"step": 1903
},
{
"epoch": 1.73,
"learning_rate": 4.838381539820818e-05,
"loss": 0.2446,
"step": 1904
},
{
"epoch": 1.73,
"learning_rate": 4.8069150620606906e-05,
"loss": 0.2604,
"step": 1905
},
{
"epoch": 1.73,
"learning_rate": 4.775546072059311e-05,
"loss": 0.2537,
"step": 1906
},
{
"epoch": 1.73,
"learning_rate": 4.7442746374839365e-05,
"loss": 0.2557,
"step": 1907
},
{
"epoch": 1.73,
"learning_rate": 4.713100825791411e-05,
"loss": 0.2701,
"step": 1908
},
{
"epoch": 1.73,
"learning_rate": 4.68202470422795e-05,
"loss": 0.2524,
"step": 1909
},
{
"epoch": 1.73,
"learning_rate": 4.651046339829079e-05,
"loss": 0.2614,
"step": 1910
},
{
"epoch": 1.73,
"learning_rate": 4.620165799419418e-05,
"loss": 0.2671,
"step": 1911
},
{
"epoch": 1.73,
"learning_rate": 4.5893831496126024e-05,
"loss": 0.2731,
"step": 1912
},
{
"epoch": 1.73,
"learning_rate": 4.558698456811056e-05,
"loss": 0.2664,
"step": 1913
},
{
"epoch": 1.74,
"learning_rate": 4.528111787205935e-05,
"loss": 0.2727,
"step": 1914
},
{
"epoch": 1.74,
"learning_rate": 4.497623206776935e-05,
"loss": 0.2561,
"step": 1915
},
{
"epoch": 1.74,
"learning_rate": 4.467232781292146e-05,
"loss": 0.2673,
"step": 1916
},
{
"epoch": 1.74,
"learning_rate": 4.436940576307941e-05,
"loss": 0.2612,
"step": 1917
},
{
"epoch": 1.74,
"learning_rate": 4.406746657168809e-05,
"loss": 0.2532,
"step": 1918
},
{
"epoch": 1.74,
"learning_rate": 4.376651089007227e-05,
"loss": 0.2438,
"step": 1919
},
{
"epoch": 1.74,
"learning_rate": 4.346653936743533e-05,
"loss": 0.2643,
"step": 1920
},
{
"epoch": 1.74,
"learning_rate": 4.316755265085715e-05,
"loss": 0.2732,
"step": 1921
},
{
"epoch": 1.74,
"learning_rate": 4.2869551385293807e-05,
"loss": 0.2748,
"step": 1922
},
{
"epoch": 1.74,
"learning_rate": 4.257253621357532e-05,
"loss": 0.2546,
"step": 1923
},
{
"epoch": 1.74,
"learning_rate": 4.227650777640474e-05,
"loss": 0.2767,
"step": 1924
},
{
"epoch": 1.75,
"learning_rate": 4.198146671235642e-05,
"loss": 0.2648,
"step": 1925
},
{
"epoch": 1.75,
"learning_rate": 4.168741365787487e-05,
"loss": 0.2544,
"step": 1926
},
{
"epoch": 1.75,
"learning_rate": 4.139434924727359e-05,
"loss": 0.2443,
"step": 1927
},
{
"epoch": 1.75,
"learning_rate": 4.110227411273287e-05,
"loss": 0.2471,
"step": 1928
},
{
"epoch": 1.75,
"learning_rate": 4.081118888429946e-05,
"loss": 0.2495,
"step": 1929
},
{
"epoch": 1.75,
"learning_rate": 4.05210941898847e-05,
"loss": 0.2664,
"step": 1930
},
{
"epoch": 1.75,
"learning_rate": 4.0231990655263004e-05,
"loss": 0.2793,
"step": 1931
},
{
"epoch": 1.75,
"learning_rate": 3.994387890407092e-05,
"loss": 0.2734,
"step": 1932
},
{
"epoch": 1.75,
"learning_rate": 3.965675955780551e-05,
"loss": 0.2432,
"step": 1933
},
{
"epoch": 1.75,
"learning_rate": 3.937063323582296e-05,
"loss": 0.2615,
"step": 1934
},
{
"epoch": 1.75,
"learning_rate": 3.908550055533755e-05,
"loss": 0.2699,
"step": 1935
},
{
"epoch": 1.76,
"learning_rate": 3.88013621314201e-05,
"loss": 0.2554,
"step": 1936
},
{
"epoch": 1.76,
"learning_rate": 3.85182185769965e-05,
"loss": 0.2708,
"step": 1937
},
{
"epoch": 1.76,
"learning_rate": 3.823607050284678e-05,
"loss": 0.2654,
"step": 1938
},
{
"epoch": 1.76,
"learning_rate": 3.7954918517603634e-05,
"loss": 0.2687,
"step": 1939
},
{
"epoch": 1.76,
"learning_rate": 3.7674763227750706e-05,
"loss": 0.2614,
"step": 1940
},
{
"epoch": 1.76,
"learning_rate": 3.739560523762192e-05,
"loss": 0.2335,
"step": 1941
},
{
"epoch": 1.76,
"learning_rate": 3.711744514939991e-05,
"loss": 0.2759,
"step": 1942
},
{
"epoch": 1.76,
"learning_rate": 3.684028356311453e-05,
"loss": 0.2495,
"step": 1943
},
{
"epoch": 1.76,
"learning_rate": 3.656412107664181e-05,
"loss": 0.2665,
"step": 1944
},
{
"epoch": 1.76,
"learning_rate": 3.6288958285702725e-05,
"loss": 0.2714,
"step": 1945
},
{
"epoch": 1.76,
"learning_rate": 3.601479578386135e-05,
"loss": 0.261,
"step": 1946
},
{
"epoch": 1.77,
"learning_rate": 3.57416341625244e-05,
"loss": 0.2546,
"step": 1947
},
{
"epoch": 1.77,
"learning_rate": 3.546947401093953e-05,
"loss": 0.2698,
"step": 1948
},
{
"epoch": 1.77,
"learning_rate": 3.519831591619377e-05,
"loss": 0.2695,
"step": 1949
},
{
"epoch": 1.77,
"learning_rate": 3.492816046321284e-05,
"loss": 0.2705,
"step": 1950
},
{
"epoch": 1.77,
"learning_rate": 3.465900823475959e-05,
"loss": 0.2319,
"step": 1951
},
{
"epoch": 1.77,
"learning_rate": 3.439085981143286e-05,
"loss": 0.2509,
"step": 1952
},
{
"epoch": 1.77,
"learning_rate": 3.4123715771665786e-05,
"loss": 0.2532,
"step": 1953
},
{
"epoch": 1.77,
"learning_rate": 3.3857576691725343e-05,
"loss": 0.2557,
"step": 1954
},
{
"epoch": 1.77,
"learning_rate": 3.359244314571036e-05,
"loss": 0.262,
"step": 1955
},
{
"epoch": 1.77,
"learning_rate": 3.332831570555073e-05,
"loss": 0.2908,
"step": 1956
},
{
"epoch": 1.77,
"learning_rate": 3.3065194941006184e-05,
"loss": 0.2666,
"step": 1957
},
{
"epoch": 1.78,
"learning_rate": 3.2803081419664485e-05,
"loss": 0.2566,
"step": 1958
},
{
"epoch": 1.78,
"learning_rate": 3.254197570694101e-05,
"loss": 0.2441,
"step": 1959
},
{
"epoch": 1.78,
"learning_rate": 3.228187836607704e-05,
"loss": 0.2679,
"step": 1960
},
{
"epoch": 1.78,
"learning_rate": 3.202278995813862e-05,
"loss": 0.2452,
"step": 1961
},
{
"epoch": 1.78,
"learning_rate": 3.176471104201545e-05,
"loss": 0.2731,
"step": 1962
},
{
"epoch": 1.78,
"learning_rate": 3.150764217441954e-05,
"loss": 0.2766,
"step": 1963
},
{
"epoch": 1.78,
"learning_rate": 3.125158390988425e-05,
"loss": 0.2651,
"step": 1964
},
{
"epoch": 1.78,
"learning_rate": 3.0996536800762586e-05,
"loss": 0.245,
"step": 1965
},
{
"epoch": 1.78,
"learning_rate": 3.074250139722678e-05,
"loss": 0.2833,
"step": 1966
},
{
"epoch": 1.78,
"learning_rate": 3.0489478247266332e-05,
"loss": 0.2583,
"step": 1967
},
{
"epoch": 1.78,
"learning_rate": 3.0237467896687353e-05,
"loss": 0.2684,
"step": 1968
},
{
"epoch": 1.79,
"learning_rate": 2.9986470889111274e-05,
"loss": 0.2681,
"step": 1969
},
{
"epoch": 1.79,
"learning_rate": 2.9736487765973263e-05,
"loss": 0.245,
"step": 1970
},
{
"epoch": 1.79,
"learning_rate": 2.948751906652175e-05,
"loss": 0.272,
"step": 1971
},
{
"epoch": 1.79,
"learning_rate": 2.9239565327816907e-05,
"loss": 0.2577,
"step": 1972
},
{
"epoch": 1.79,
"learning_rate": 2.8992627084729107e-05,
"loss": 0.2537,
"step": 1973
},
{
"epoch": 1.79,
"learning_rate": 2.874670486993858e-05,
"loss": 0.2789,
"step": 1974
},
{
"epoch": 1.79,
"learning_rate": 2.8501799213933644e-05,
"loss": 0.2629,
"step": 1975
},
{
"epoch": 1.79,
"learning_rate": 2.825791064500993e-05,
"loss": 0.269,
"step": 1976
},
{
"epoch": 1.79,
"learning_rate": 2.8015039689268773e-05,
"loss": 0.2701,
"step": 1977
},
{
"epoch": 1.79,
"learning_rate": 2.7773186870616586e-05,
"loss": 0.2501,
"step": 1978
},
{
"epoch": 1.79,
"learning_rate": 2.7532352710763486e-05,
"loss": 0.2546,
"step": 1979
},
{
"epoch": 1.8,
"learning_rate": 2.7292537729222243e-05,
"loss": 0.2581,
"step": 1980
},
{
"epoch": 1.8,
"learning_rate": 2.705374244330705e-05,
"loss": 0.2623,
"step": 1981
},
{
"epoch": 1.8,
"learning_rate": 2.6815967368132467e-05,
"loss": 0.2563,
"step": 1982
},
{
"epoch": 1.8,
"learning_rate": 2.6579213016612315e-05,
"loss": 0.2604,
"step": 1983
},
{
"epoch": 1.8,
"learning_rate": 2.6343479899458732e-05,
"loss": 0.2799,
"step": 1984
},
{
"epoch": 1.8,
"learning_rate": 2.6108768525180626e-05,
"loss": 0.2582,
"step": 1985
},
{
"epoch": 1.8,
"learning_rate": 2.5875079400083044e-05,
"loss": 0.2637,
"step": 1986
},
{
"epoch": 1.8,
"learning_rate": 2.5642413028265865e-05,
"loss": 0.2559,
"step": 1987
},
{
"epoch": 1.8,
"learning_rate": 2.5410769911622942e-05,
"loss": 0.2631,
"step": 1988
},
{
"epoch": 1.8,
"learning_rate": 2.518015054984041e-05,
"loss": 0.2657,
"step": 1989
},
{
"epoch": 1.8,
"learning_rate": 2.4950555440396317e-05,
"loss": 0.2413,
"step": 1990
},
{
"epoch": 1.81,
"learning_rate": 2.4721985078559216e-05,
"loss": 0.2418,
"step": 1991
},
{
"epoch": 1.81,
"learning_rate": 2.4494439957387094e-05,
"loss": 0.2662,
"step": 1992
},
{
"epoch": 1.81,
"learning_rate": 2.4267920567726364e-05,
"loss": 0.2581,
"step": 1993
},
{
"epoch": 1.81,
"learning_rate": 2.4042427398210943e-05,
"loss": 0.2482,
"step": 1994
},
{
"epoch": 1.81,
"learning_rate": 2.3817960935260673e-05,
"loss": 0.2552,
"step": 1995
},
{
"epoch": 1.81,
"learning_rate": 2.359452166308107e-05,
"loss": 0.2401,
"step": 1996
},
{
"epoch": 1.81,
"learning_rate": 2.337211006366147e-05,
"loss": 0.2574,
"step": 1997
},
{
"epoch": 1.81,
"learning_rate": 2.3150726616774697e-05,
"loss": 0.2626,
"step": 1998
},
{
"epoch": 1.81,
"learning_rate": 2.2930371799975592e-05,
"loss": 0.245,
"step": 1999
},
{
"epoch": 1.81,
"learning_rate": 2.2711046088600028e-05,
"loss": 0.2717,
"step": 2000
},
{
"epoch": 1.81,
"learning_rate": 2.2492749955764168e-05,
"loss": 0.2585,
"step": 2001
},
{
"epoch": 1.82,
"learning_rate": 2.2275483872362835e-05,
"loss": 0.2731,
"step": 2002
},
{
"epoch": 1.82,
"learning_rate": 2.205924830706929e-05,
"loss": 0.2592,
"step": 2003
},
{
"epoch": 1.82,
"learning_rate": 2.184404372633364e-05,
"loss": 0.2589,
"step": 2004
},
{
"epoch": 1.82,
"learning_rate": 2.162987059438204e-05,
"loss": 0.2706,
"step": 2005
},
{
"epoch": 1.82,
"learning_rate": 2.141672937321576e-05,
"loss": 0.261,
"step": 2006
},
{
"epoch": 1.82,
"learning_rate": 2.1204620522609853e-05,
"loss": 0.2531,
"step": 2007
},
{
"epoch": 1.82,
"learning_rate": 2.0993544500112706e-05,
"loss": 0.2604,
"step": 2008
},
{
"epoch": 1.82,
"learning_rate": 2.0783501761044434e-05,
"loss": 0.2628,
"step": 2009
},
{
"epoch": 1.82,
"learning_rate": 2.057449275849649e-05,
"loss": 0.2631,
"step": 2010
},
{
"epoch": 1.82,
"learning_rate": 2.0366517943330275e-05,
"loss": 0.2562,
"step": 2011
},
{
"epoch": 1.82,
"learning_rate": 2.0159577764176428e-05,
"loss": 0.2589,
"step": 2012
},
{
"epoch": 1.83,
"learning_rate": 1.9953672667433585e-05,
"loss": 0.276,
"step": 2013
},
{
"epoch": 1.83,
"learning_rate": 1.974880309726762e-05,
"loss": 0.2596,
"step": 2014
},
{
"epoch": 1.83,
"learning_rate": 1.954496949561063e-05,
"loss": 0.2561,
"step": 2015
},
{
"epoch": 1.83,
"learning_rate": 1.934217230216001e-05,
"loss": 0.2473,
"step": 2016
},
{
"epoch": 1.83,
"learning_rate": 1.9140411954377433e-05,
"loss": 0.2632,
"step": 2017
},
{
"epoch": 1.83,
"learning_rate": 1.893968888748804e-05,
"loss": 0.2509,
"step": 2018
},
{
"epoch": 1.83,
"learning_rate": 1.8740003534479357e-05,
"loss": 0.251,
"step": 2019
},
{
"epoch": 1.83,
"learning_rate": 1.8541356326100432e-05,
"loss": 0.2684,
"step": 2020
},
{
"epoch": 1.83,
"learning_rate": 1.8343747690860723e-05,
"loss": 0.2595,
"step": 2021
},
{
"epoch": 1.83,
"learning_rate": 1.8147178055029577e-05,
"loss": 0.2728,
"step": 2022
},
{
"epoch": 1.83,
"learning_rate": 1.795164784263503e-05,
"loss": 0.2798,
"step": 2023
},
{
"epoch": 1.83,
"learning_rate": 1.7757157475462916e-05,
"loss": 0.2689,
"step": 2024
},
{
"epoch": 1.84,
"learning_rate": 1.7563707373055904e-05,
"loss": 0.2552,
"step": 2025
},
{
"epoch": 1.84,
"learning_rate": 1.737129795271275e-05,
"loss": 0.2506,
"step": 2026
},
{
"epoch": 1.84,
"learning_rate": 1.7179929629487223e-05,
"loss": 0.2424,
"step": 2027
},
{
"epoch": 1.84,
"learning_rate": 1.6989602816187442e-05,
"loss": 0.249,
"step": 2028
},
{
"epoch": 1.84,
"learning_rate": 1.6800317923374718e-05,
"loss": 0.257,
"step": 2029
},
{
"epoch": 1.84,
"learning_rate": 1.6612075359362877e-05,
"loss": 0.272,
"step": 2030
},
{
"epoch": 1.84,
"learning_rate": 1.642487553021732e-05,
"loss": 0.2642,
"step": 2031
},
{
"epoch": 1.84,
"learning_rate": 1.6238718839753973e-05,
"loss": 0.2626,
"step": 2032
},
{
"epoch": 1.84,
"learning_rate": 1.6053605689538675e-05,
"loss": 0.2456,
"step": 2033
},
{
"epoch": 1.84,
"learning_rate": 1.5869536478886228e-05,
"loss": 0.2478,
"step": 2034
},
{
"epoch": 1.84,
"learning_rate": 1.5686511604859455e-05,
"loss": 0.2614,
"step": 2035
},
{
"epoch": 1.85,
"learning_rate": 1.550453146226838e-05,
"loss": 0.2722,
"step": 2036
},
{
"epoch": 1.85,
"learning_rate": 1.5323596443669484e-05,
"loss": 0.2648,
"step": 2037
},
{
"epoch": 1.85,
"learning_rate": 1.5143706939364843e-05,
"loss": 0.2722,
"step": 2038
},
{
"epoch": 1.85,
"learning_rate": 1.496486333740088e-05,
"loss": 0.2748,
"step": 2039
},
{
"epoch": 1.85,
"learning_rate": 1.4787066023568107e-05,
"loss": 0.2672,
"step": 2040
},
{
"epoch": 1.85,
"learning_rate": 1.4610315381400175e-05,
"loss": 0.2502,
"step": 2041
},
{
"epoch": 1.85,
"learning_rate": 1.443461179217259e-05,
"loss": 0.2582,
"step": 2042
},
{
"epoch": 1.85,
"learning_rate": 1.425995563490251e-05,
"loss": 0.2606,
"step": 2043
},
{
"epoch": 1.85,
"learning_rate": 1.40863472863475e-05,
"loss": 0.2386,
"step": 2044
},
{
"epoch": 1.85,
"learning_rate": 1.3913787121004718e-05,
"loss": 0.2667,
"step": 2045
},
{
"epoch": 1.85,
"learning_rate": 1.3742275511110569e-05,
"loss": 0.2526,
"step": 2046
},
{
"epoch": 1.86,
"learning_rate": 1.3571812826639329e-05,
"loss": 0.2655,
"step": 2047
},
{
"epoch": 1.86,
"learning_rate": 1.3402399435302693e-05,
"loss": 0.2675,
"step": 2048
},
{
"epoch": 1.86,
"learning_rate": 1.3234035702548886e-05,
"loss": 0.2507,
"step": 2049
},
{
"epoch": 1.86,
"learning_rate": 1.3066721991561892e-05,
"loss": 0.2573,
"step": 2050
},
{
"epoch": 1.86,
"learning_rate": 1.2900458663260506e-05,
"loss": 0.2601,
"step": 2051
},
{
"epoch": 1.86,
"learning_rate": 1.2735246076297835e-05,
"loss": 0.2488,
"step": 2052
},
{
"epoch": 1.86,
"learning_rate": 1.2571084587060467e-05,
"loss": 0.2576,
"step": 2053
},
{
"epoch": 1.86,
"learning_rate": 1.240797454966741e-05,
"loss": 0.2639,
"step": 2054
},
{
"epoch": 1.86,
"learning_rate": 1.2245916315969774e-05,
"loss": 0.2662,
"step": 2055
},
{
"epoch": 1.86,
"learning_rate": 1.2084910235549584e-05,
"loss": 0.2682,
"step": 2056
},
{
"epoch": 1.86,
"learning_rate": 1.1924956655719355e-05,
"loss": 0.2732,
"step": 2057
},
{
"epoch": 1.87,
"learning_rate": 1.176605592152108e-05,
"loss": 0.2502,
"step": 2058
},
{
"epoch": 1.87,
"learning_rate": 1.1608208375725793e-05,
"loss": 0.2673,
"step": 2059
},
{
"epoch": 1.87,
"learning_rate": 1.1451414358832513e-05,
"loss": 0.2557,
"step": 2060
},
{
"epoch": 1.87,
"learning_rate": 1.1295674209067742e-05,
"loss": 0.2522,
"step": 2061
},
{
"epoch": 1.87,
"learning_rate": 1.1140988262384633e-05,
"loss": 0.2485,
"step": 2062
},
{
"epoch": 1.87,
"learning_rate": 1.098735685246216e-05,
"loss": 0.2572,
"step": 2063
},
{
"epoch": 1.87,
"learning_rate": 1.0834780310704672e-05,
"loss": 0.2629,
"step": 2064
},
{
"epoch": 1.87,
"learning_rate": 1.0683258966240949e-05,
"loss": 0.2538,
"step": 2065
},
{
"epoch": 1.87,
"learning_rate": 1.0532793145923535e-05,
"loss": 0.2499,
"step": 2066
},
{
"epoch": 1.87,
"learning_rate": 1.0383383174328076e-05,
"loss": 0.2556,
"step": 2067
},
{
"epoch": 1.87,
"learning_rate": 1.0235029373752758e-05,
"loss": 0.2655,
"step": 2068
},
{
"epoch": 1.88,
"learning_rate": 1.0087732064217204e-05,
"loss": 0.2751,
"step": 2069
},
{
"epoch": 1.88,
"learning_rate": 9.94149156346219e-06,
"loss": 0.2561,
"step": 2070
},
{
"epoch": 1.88,
"learning_rate": 9.79630818694871e-06,
"loss": 0.25,
"step": 2071
},
{
"epoch": 1.88,
"learning_rate": 9.652182247857578e-06,
"loss": 0.2361,
"step": 2072
},
{
"epoch": 1.88,
"learning_rate": 9.509114057088374e-06,
"loss": 0.238,
"step": 2073
},
{
"epoch": 1.88,
"learning_rate": 9.367103923259123e-06,
"loss": 0.2672,
"step": 2074
},
{
"epoch": 1.88,
"learning_rate": 9.226152152705336e-06,
"loss": 0.2787,
"step": 2075
},
{
"epoch": 1.88,
"learning_rate": 9.086259049479573e-06,
"loss": 0.2554,
"step": 2076
},
{
"epoch": 1.88,
"learning_rate": 8.94742491535072e-06,
"loss": 0.2798,
"step": 2077
},
{
"epoch": 1.88,
"learning_rate": 8.80965004980322e-06,
"loss": 0.2341,
"step": 2078
},
{
"epoch": 1.88,
"learning_rate": 8.672934750036554e-06,
"loss": 0.2441,
"step": 2079
},
{
"epoch": 1.89,
"learning_rate": 8.537279310964763e-06,
"loss": 0.2561,
"step": 2080
},
{
"epoch": 1.89,
"learning_rate": 8.402684025215323e-06,
"loss": 0.2507,
"step": 2081
},
{
"epoch": 1.89,
"learning_rate": 8.269149183128988e-06,
"loss": 0.2722,
"step": 2082
},
{
"epoch": 1.89,
"learning_rate": 8.136675072758947e-06,
"loss": 0.2649,
"step": 2083
},
{
"epoch": 1.89,
"learning_rate": 8.005261979870227e-06,
"loss": 0.2511,
"step": 2084
},
{
"epoch": 1.89,
"learning_rate": 7.874910187939178e-06,
"loss": 0.2887,
"step": 2085
},
{
"epoch": 1.89,
"learning_rate": 7.745619978152651e-06,
"loss": 0.264,
"step": 2086
},
{
"epoch": 1.89,
"learning_rate": 7.617391629407611e-06,
"loss": 0.2502,
"step": 2087
},
{
"epoch": 1.89,
"learning_rate": 7.490225418310403e-06,
"loss": 0.2637,
"step": 2088
},
{
"epoch": 1.89,
"learning_rate": 7.364121619176212e-06,
"loss": 0.2633,
"step": 2089
},
{
"epoch": 1.89,
"learning_rate": 7.239080504028439e-06,
"loss": 0.2433,
"step": 2090
},
{
"epoch": 1.9,
"learning_rate": 7.115102342598101e-06,
"loss": 0.2615,
"step": 2091
},
{
"epoch": 1.9,
"learning_rate": 6.99218740232338e-06,
"loss": 0.2648,
"step": 2092
},
{
"epoch": 1.9,
"learning_rate": 6.870335948348793e-06,
"loss": 0.2766,
"step": 2093
},
{
"epoch": 1.9,
"learning_rate": 6.749548243524806e-06,
"loss": 0.2721,
"step": 2094
},
{
"epoch": 1.9,
"learning_rate": 6.6298245484073815e-06,
"loss": 0.2573,
"step": 2095
},
{
"epoch": 1.9,
"learning_rate": 6.511165121257045e-06,
"loss": 0.2545,
"step": 2096
},
{
"epoch": 1.9,
"learning_rate": 6.393570218038713e-06,
"loss": 0.2552,
"step": 2097
},
{
"epoch": 1.9,
"learning_rate": 6.277040092420916e-06,
"loss": 0.2545,
"step": 2098
},
{
"epoch": 1.9,
"learning_rate": 6.161574995775354e-06,
"loss": 0.2406,
"step": 2099
},
{
"epoch": 1.9,
"learning_rate": 6.047175177176178e-06,
"loss": 0.2479,
"step": 2100
},
{
"epoch": 1.9,
"learning_rate": 5.933840883399766e-06,
"loss": 0.2717,
"step": 2101
},
{
"epoch": 1.91,
"learning_rate": 5.821572358923832e-06,
"loss": 0.2767,
"step": 2102
},
{
"epoch": 1.91,
"learning_rate": 5.710369845927266e-06,
"loss": 0.2622,
"step": 2103
},
{
"epoch": 1.91,
"learning_rate": 5.600233584289294e-06,
"loss": 0.2589,
"step": 2104
},
{
"epoch": 1.91,
"learning_rate": 5.4911638115891505e-06,
"loss": 0.2518,
"step": 2105
},
{
"epoch": 1.91,
"learning_rate": 5.38316076310541e-06,
"loss": 0.2545,
"step": 2106
},
{
"epoch": 1.91,
"learning_rate": 5.276224671815654e-06,
"loss": 0.2495,
"step": 2107
},
{
"epoch": 1.91,
"learning_rate": 5.170355768395918e-06,
"loss": 0.2695,
"step": 2108
},
{
"epoch": 1.91,
"learning_rate": 5.065554281220075e-06,
"loss": 0.2616,
"step": 2109
},
{
"epoch": 1.91,
"learning_rate": 4.961820436359565e-06,
"loss": 0.2478,
"step": 2110
},
{
"epoch": 1.91,
"learning_rate": 4.859154457582671e-06,
"loss": 0.2629,
"step": 2111
},
{
"epoch": 1.91,
"learning_rate": 4.757556566354071e-06,
"loss": 0.2513,
"step": 2112
},
{
"epoch": 1.92,
"learning_rate": 4.657026981834622e-06,
"loss": 0.2603,
"step": 2113
},
{
"epoch": 1.92,
"learning_rate": 4.55756592088058e-06,
"loss": 0.2532,
"step": 2114
},
{
"epoch": 1.92,
"learning_rate": 4.4591735980432094e-06,
"loss": 0.2776,
"step": 2115
},
{
"epoch": 1.92,
"learning_rate": 4.361850225568453e-06,
"loss": 0.2512,
"step": 2116
},
{
"epoch": 1.92,
"learning_rate": 4.265596013396378e-06,
"loss": 0.2499,
"step": 2117
},
{
"epoch": 1.92,
"learning_rate": 4.170411169160615e-06,
"loss": 0.2448,
"step": 2118
},
{
"epoch": 1.92,
"learning_rate": 4.0762958981880875e-06,
"loss": 0.2504,
"step": 2119
},
{
"epoch": 1.92,
"learning_rate": 3.983250403498506e-06,
"loss": 0.2399,
"step": 2120
},
{
"epoch": 1.92,
"learning_rate": 3.891274885803986e-06,
"loss": 0.2413,
"step": 2121
},
{
"epoch": 1.92,
"learning_rate": 3.8003695435084305e-06,
"loss": 0.2576,
"step": 2122
},
{
"epoch": 1.92,
"learning_rate": 3.7105345727073135e-06,
"loss": 0.2649,
"step": 2123
},
{
"epoch": 1.93,
"learning_rate": 3.621770167187066e-06,
"loss": 0.2466,
"step": 2124
},
{
"epoch": 1.93,
"learning_rate": 3.534076518424967e-06,
"loss": 0.2529,
"step": 2125
},
{
"epoch": 1.93,
"learning_rate": 3.447453815588253e-06,
"loss": 0.2587,
"step": 2126
},
{
"epoch": 1.93,
"learning_rate": 3.361902245534176e-06,
"loss": 0.2505,
"step": 2127
},
{
"epoch": 1.93,
"learning_rate": 3.2774219928094483e-06,
"loss": 0.2378,
"step": 2128
},
{
"epoch": 1.93,
"learning_rate": 3.194013239649518e-06,
"loss": 0.2476,
"step": 2129
},
{
"epoch": 1.93,
"learning_rate": 3.111676165978794e-06,
"loss": 0.2804,
"step": 2130
},
{
"epoch": 1.93,
"learning_rate": 3.030410949409701e-06,
"loss": 0.2686,
"step": 2131
},
{
"epoch": 1.93,
"learning_rate": 2.9502177652426242e-06,
"loss": 0.2472,
"step": 2132
},
{
"epoch": 1.93,
"learning_rate": 2.87109678646541e-06,
"loss": 0.272,
"step": 2133
},
{
"epoch": 1.93,
"learning_rate": 2.793048183752922e-06,
"loss": 0.2496,
"step": 2134
},
{
"epoch": 1.94,
"learning_rate": 2.7160721254669283e-06,
"loss": 0.2408,
"step": 2135
},
{
"epoch": 1.94,
"learning_rate": 2.6401687776553827e-06,
"loss": 0.256,
"step": 2136
},
{
"epoch": 1.94,
"learning_rate": 2.5653383040524227e-06,
"loss": 0.2632,
"step": 2137
},
{
"epoch": 1.94,
"learning_rate": 2.4915808660777583e-06,
"loss": 0.2634,
"step": 2138
},
{
"epoch": 1.94,
"learning_rate": 2.4188966228363418e-06,
"loss": 0.2766,
"step": 2139
},
{
"epoch": 1.94,
"learning_rate": 2.3472857311183093e-06,
"loss": 0.2542,
"step": 2140
},
{
"epoch": 1.94,
"learning_rate": 2.2767483453981496e-06,
"loss": 0.2343,
"step": 2141
},
{
"epoch": 1.94,
"learning_rate": 2.207284617834926e-06,
"loss": 0.2603,
"step": 2142
},
{
"epoch": 1.94,
"learning_rate": 2.1388946982714986e-06,
"loss": 0.2502,
"step": 2143
},
{
"epoch": 1.94,
"learning_rate": 2.0715787342343583e-06,
"loss": 0.261,
"step": 2144
},
{
"epoch": 1.94,
"learning_rate": 2.0053368709334606e-06,
"loss": 0.2471,
"step": 2145
},
{
"epoch": 1.95,
"learning_rate": 1.940169251261725e-06,
"loss": 0.2655,
"step": 2146
},
{
"epoch": 1.95,
"learning_rate": 1.8760760157946476e-06,
"loss": 0.2573,
"step": 2147
},
{
"epoch": 1.95,
"learning_rate": 1.8130573027902997e-06,
"loss": 0.2679,
"step": 2148
},
{
"epoch": 1.95,
"learning_rate": 1.7511132481888293e-06,
"loss": 0.2561,
"step": 2149
},
{
"epoch": 1.95,
"learning_rate": 1.690243985612072e-06,
"loss": 0.2637,
"step": 2150
},
{
"epoch": 1.95,
"learning_rate": 1.630449646363552e-06,
"loss": 0.2462,
"step": 2151
},
{
"epoch": 1.95,
"learning_rate": 1.5717303594279808e-06,
"loss": 0.2509,
"step": 2152
},
{
"epoch": 1.95,
"learning_rate": 1.5140862514709254e-06,
"loss": 0.233,
"step": 2153
},
{
"epoch": 1.95,
"learning_rate": 1.4575174468388074e-06,
"loss": 0.2714,
"step": 2154
},
{
"epoch": 1.95,
"learning_rate": 1.4020240675583495e-06,
"loss": 0.2474,
"step": 2155
},
{
"epoch": 1.95,
"learning_rate": 1.3476062333364625e-06,
"loss": 0.2405,
"step": 2156
},
{
"epoch": 1.96,
"learning_rate": 1.2942640615600243e-06,
"loss": 0.2727,
"step": 2157
},
{
"epoch": 1.96,
"learning_rate": 1.2419976672954358e-06,
"loss": 0.2673,
"step": 2158
},
{
"epoch": 1.96,
"learning_rate": 1.1908071632886764e-06,
"loss": 0.2539,
"step": 2159
},
{
"epoch": 1.96,
"learning_rate": 1.1406926599646372e-06,
"loss": 0.2423,
"step": 2160
},
{
"epoch": 1.96,
"learning_rate": 1.0916542654273443e-06,
"loss": 0.2275,
"step": 2161
},
{
"epoch": 1.96,
"learning_rate": 1.0436920854594023e-06,
"loss": 0.2725,
"step": 2162
},
{
"epoch": 1.96,
"learning_rate": 9.968062235218845e-07,
"loss": 0.2634,
"step": 2163
},
{
"epoch": 1.96,
"learning_rate": 9.509967807541098e-07,
"loss": 0.2643,
"step": 2164
},
{
"epoch": 1.96,
"learning_rate": 9.062638559734216e-07,
"loss": 0.2404,
"step": 2165
},
{
"epoch": 1.96,
"learning_rate": 8.626075456749093e-07,
"loss": 0.2308,
"step": 2166
},
{
"epoch": 1.96,
"learning_rate": 8.20027944031354e-07,
"loss": 0.2584,
"step": 2167
},
{
"epoch": 1.97,
"learning_rate": 7.785251428928386e-07,
"loss": 0.2578,
"step": 2168
},
{
"epoch": 1.97,
"learning_rate": 7.380992317866375e-07,
"loss": 0.2734,
"step": 2169
},
{
"epoch": 1.97,
"learning_rate": 6.987502979170502e-07,
"loss": 0.2362,
"step": 2170
},
{
"epoch": 1.97,
"learning_rate": 6.604784261652896e-07,
"loss": 0.2445,
"step": 2171
},
{
"epoch": 1.97,
"learning_rate": 6.232836990889834e-07,
"loss": 0.2428,
"step": 2172
},
{
"epoch": 1.97,
"learning_rate": 5.871661969223951e-07,
"loss": 0.2667,
"step": 2173
},
{
"epoch": 1.97,
"learning_rate": 5.521259975759252e-07,
"loss": 0.257,
"step": 2174
},
{
"epoch": 1.97,
"learning_rate": 5.181631766362216e-07,
"loss": 0.2518,
"step": 2175
},
{
"epoch": 1.97,
"learning_rate": 4.85277807365736e-07,
"loss": 0.2335,
"step": 2176
},
{
"epoch": 1.97,
"learning_rate": 4.5346996070283476e-07,
"loss": 0.2578,
"step": 2177
},
{
"epoch": 1.97,
"learning_rate": 4.2273970526141017e-07,
"loss": 0.2716,
"step": 2178
},
{
"epoch": 1.98,
"learning_rate": 3.930871073309361e-07,
"loss": 0.2678,
"step": 2179
},
{
"epoch": 1.98,
"learning_rate": 3.645122308761906e-07,
"loss": 0.2529,
"step": 2180
},
{
"epoch": 1.98,
"learning_rate": 3.3701513753703337e-07,
"loss": 0.2493,
"step": 2181
},
{
"epoch": 1.98,
"learning_rate": 3.10595886628684e-07,
"loss": 0.2817,
"step": 2182
},
{
"epoch": 1.98,
"learning_rate": 2.852545351409996e-07,
"loss": 0.278,
"step": 2183
},
{
"epoch": 1.98,
"learning_rate": 2.6099113773880856e-07,
"loss": 0.2672,
"step": 2184
},
{
"epoch": 1.98,
"learning_rate": 2.3780574676174338e-07,
"loss": 0.2762,
"step": 2185
},
{
"epoch": 1.98,
"learning_rate": 2.1569841222374155e-07,
"loss": 0.2532,
"step": 2186
},
{
"epoch": 1.98,
"learning_rate": 1.9466918181354487e-07,
"loss": 0.2771,
"step": 2187
},
{
"epoch": 1.98,
"learning_rate": 1.7471810089403352e-07,
"loss": 0.2596,
"step": 2188
},
{
"epoch": 1.98,
"learning_rate": 1.5584521250250339e-07,
"loss": 0.2656,
"step": 2189
},
{
"epoch": 1.99,
"learning_rate": 1.3805055735038874e-07,
"loss": 0.2614,
"step": 2190
},
{
"epoch": 1.99,
"learning_rate": 1.2133417382320656e-07,
"loss": 0.2355,
"step": 2191
},
{
"epoch": 1.99,
"learning_rate": 1.0569609798050106e-07,
"loss": 0.2566,
"step": 2192
},
{
"epoch": 1.99,
"learning_rate": 9.113636355578825e-08,
"loss": 0.2584,
"step": 2193
},
{
"epoch": 1.99,
"learning_rate": 7.765500195650033e-08,
"loss": 0.2661,
"step": 2194
},
{
"epoch": 1.99,
"learning_rate": 6.525204226370817e-08,
"loss": 0.2511,
"step": 2195
},
{
"epoch": 1.99,
"learning_rate": 5.392751123239892e-08,
"loss": 0.261,
"step": 2196
},
{
"epoch": 1.99,
"learning_rate": 4.368143329114283e-08,
"loss": 0.2828,
"step": 2197
},
{
"epoch": 1.99,
"learning_rate": 3.4513830542093337e-08,
"loss": 0.2676,
"step": 2198
},
{
"epoch": 1.99,
"learning_rate": 2.6424722761098087e-08,
"loss": 0.2799,
"step": 2199
},
{
"epoch": 1.99,
"learning_rate": 1.9414127397476833e-08,
"loss": 0.2661,
"step": 2200
},
{
"epoch": 2.0,
"learning_rate": 1.3482059574021488e-08,
"loss": 0.2688,
"step": 2201
},
{
"epoch": 2.0,
"learning_rate": 8.628532087107122e-09,
"loss": 0.2728,
"step": 2202
},
{
"epoch": 2.0,
"learning_rate": 4.8535554063589e-09,
"loss": 0.241,
"step": 2203
},
{
"epoch": 2.0,
"learning_rate": 2.1571376750406658e-09,
"loss": 0.2667,
"step": 2204
},
{
"epoch": 2.0,
"learning_rate": 5.392847095553321e-10,
"loss": 0.2637,
"step": 2205
},
{
"epoch": 2.0,
"learning_rate": 0.0,
"loss": 0.2899,
"step": 2206
},
{
"epoch": 2.0,
"step": 2206,
"total_flos": 246370907652096.0,
"train_loss": 0.36736955409253175,
"train_runtime": 29993.253,
"train_samples_per_second": 18.828,
"train_steps_per_second": 0.074
}
],
"max_steps": 2206,
"num_train_epochs": 2,
"total_flos": 246370907652096.0,
"trial_name": null,
"trial_params": null
}