random_MnTs14eMPInyonQq / trainer_state.json
cutelemonlili's picture
Add files using upload-large-folder tool
aea87a6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 200,
"global_step": 170,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011764705882352941,
"grad_norm": 0.035323630468616266,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.1079,
"step": 1
},
{
"epoch": 0.023529411764705882,
"grad_norm": 0.03136221824048882,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.1141,
"step": 2
},
{
"epoch": 0.03529411764705882,
"grad_norm": 0.03804619743095702,
"learning_rate": 5e-05,
"loss": 0.1204,
"step": 3
},
{
"epoch": 0.047058823529411764,
"grad_norm": 0.04070206833240694,
"learning_rate": 6.666666666666667e-05,
"loss": 0.1276,
"step": 4
},
{
"epoch": 0.058823529411764705,
"grad_norm": 0.07589226019496102,
"learning_rate": 8.333333333333334e-05,
"loss": 0.1689,
"step": 5
},
{
"epoch": 0.07058823529411765,
"grad_norm": 0.07319090807548947,
"learning_rate": 0.0001,
"loss": 0.1345,
"step": 6
},
{
"epoch": 0.08235294117647059,
"grad_norm": 0.06996995505114396,
"learning_rate": 9.999082642158973e-05,
"loss": 0.1202,
"step": 7
},
{
"epoch": 0.09411764705882353,
"grad_norm": 0.05248182376644038,
"learning_rate": 9.99633090525405e-05,
"loss": 0.0983,
"step": 8
},
{
"epoch": 0.10588235294117647,
"grad_norm": 0.06219777780481942,
"learning_rate": 9.991745799016206e-05,
"loss": 0.0954,
"step": 9
},
{
"epoch": 0.11764705882352941,
"grad_norm": 0.06595531802798588,
"learning_rate": 9.985329005918702e-05,
"loss": 0.0977,
"step": 10
},
{
"epoch": 0.12941176470588237,
"grad_norm": 0.03606112500648923,
"learning_rate": 9.977082880559725e-05,
"loss": 0.0626,
"step": 11
},
{
"epoch": 0.1411764705882353,
"grad_norm": 0.0722550944651642,
"learning_rate": 9.967010448798375e-05,
"loss": 0.0929,
"step": 12
},
{
"epoch": 0.15294117647058825,
"grad_norm": 0.054771186088142604,
"learning_rate": 9.955115406644356e-05,
"loss": 0.0845,
"step": 13
},
{
"epoch": 0.16470588235294117,
"grad_norm": 0.051628745573165394,
"learning_rate": 9.941402118901744e-05,
"loss": 0.0659,
"step": 14
},
{
"epoch": 0.17647058823529413,
"grad_norm": 0.0486190850226073,
"learning_rate": 9.92587561756735e-05,
"loss": 0.0531,
"step": 15
},
{
"epoch": 0.18823529411764706,
"grad_norm": 0.05588842841049585,
"learning_rate": 9.908541599984276e-05,
"loss": 0.0642,
"step": 16
},
{
"epoch": 0.2,
"grad_norm": 0.05641046123379798,
"learning_rate": 9.889406426751296e-05,
"loss": 0.0588,
"step": 17
},
{
"epoch": 0.21176470588235294,
"grad_norm": 0.054058284278693136,
"learning_rate": 9.868477119388896e-05,
"loss": 0.0649,
"step": 18
},
{
"epoch": 0.2235294117647059,
"grad_norm": 0.0531804463754218,
"learning_rate": 9.84576135776276e-05,
"loss": 0.0677,
"step": 19
},
{
"epoch": 0.23529411764705882,
"grad_norm": 0.058590111826598894,
"learning_rate": 9.821267477265705e-05,
"loss": 0.0606,
"step": 20
},
{
"epoch": 0.24705882352941178,
"grad_norm": 0.04785785363882365,
"learning_rate": 9.795004465759065e-05,
"loss": 0.0609,
"step": 21
},
{
"epoch": 0.25882352941176473,
"grad_norm": 0.05206751702161926,
"learning_rate": 9.766981960274653e-05,
"loss": 0.0475,
"step": 22
},
{
"epoch": 0.27058823529411763,
"grad_norm": 0.037054145575494114,
"learning_rate": 9.737210243478521e-05,
"loss": 0.0415,
"step": 23
},
{
"epoch": 0.2823529411764706,
"grad_norm": 0.04044144505681365,
"learning_rate": 9.705700239897809e-05,
"loss": 0.0474,
"step": 24
},
{
"epoch": 0.29411764705882354,
"grad_norm": 0.030749676244442067,
"learning_rate": 9.672463511912055e-05,
"loss": 0.0422,
"step": 25
},
{
"epoch": 0.3058823529411765,
"grad_norm": 0.035338570129669074,
"learning_rate": 9.637512255510475e-05,
"loss": 0.044,
"step": 26
},
{
"epoch": 0.3176470588235294,
"grad_norm": 0.03379157572576983,
"learning_rate": 9.600859295816708e-05,
"loss": 0.0398,
"step": 27
},
{
"epoch": 0.32941176470588235,
"grad_norm": 0.04025818097716925,
"learning_rate": 9.56251808238275e-05,
"loss": 0.0386,
"step": 28
},
{
"epoch": 0.3411764705882353,
"grad_norm": 0.04406959077203071,
"learning_rate": 9.522502684253709e-05,
"loss": 0.0432,
"step": 29
},
{
"epoch": 0.35294117647058826,
"grad_norm": 0.03978101147616123,
"learning_rate": 9.480827784805278e-05,
"loss": 0.0437,
"step": 30
},
{
"epoch": 0.36470588235294116,
"grad_norm": 0.06465311190984376,
"learning_rate": 9.437508676355773e-05,
"loss": 0.0361,
"step": 31
},
{
"epoch": 0.3764705882352941,
"grad_norm": 0.040734739666672064,
"learning_rate": 9.392561254554713e-05,
"loss": 0.0418,
"step": 32
},
{
"epoch": 0.38823529411764707,
"grad_norm": 0.049975058896066196,
"learning_rate": 9.346002012550027e-05,
"loss": 0.0392,
"step": 33
},
{
"epoch": 0.4,
"grad_norm": 0.035517771488692076,
"learning_rate": 9.297848034936006e-05,
"loss": 0.0339,
"step": 34
},
{
"epoch": 0.4117647058823529,
"grad_norm": 0.03716302242601897,
"learning_rate": 9.248116991484229e-05,
"loss": 0.0342,
"step": 35
},
{
"epoch": 0.4235294117647059,
"grad_norm": 0.025634143517022603,
"learning_rate": 9.19682713065975e-05,
"loss": 0.0288,
"step": 36
},
{
"epoch": 0.43529411764705883,
"grad_norm": 0.03813921333883497,
"learning_rate": 9.143997272924973e-05,
"loss": 0.0369,
"step": 37
},
{
"epoch": 0.4470588235294118,
"grad_norm": 0.035523177764954665,
"learning_rate": 9.089646803833589e-05,
"loss": 0.0342,
"step": 38
},
{
"epoch": 0.4588235294117647,
"grad_norm": 0.04123934232531618,
"learning_rate": 9.033795666917191e-05,
"loss": 0.0388,
"step": 39
},
{
"epoch": 0.47058823529411764,
"grad_norm": 0.03480415683019527,
"learning_rate": 8.976464356367134e-05,
"loss": 0.0345,
"step": 40
},
{
"epoch": 0.4823529411764706,
"grad_norm": 0.04301396030731473,
"learning_rate": 8.917673909514322e-05,
"loss": 0.0408,
"step": 41
},
{
"epoch": 0.49411764705882355,
"grad_norm": 0.04119245023494202,
"learning_rate": 8.857445899109715e-05,
"loss": 0.0319,
"step": 42
},
{
"epoch": 0.5058823529411764,
"grad_norm": 0.03800838266272386,
"learning_rate": 8.795802425408352e-05,
"loss": 0.0285,
"step": 43
},
{
"epoch": 0.5176470588235295,
"grad_norm": 0.026205323264901936,
"learning_rate": 8.732766108059813e-05,
"loss": 0.0322,
"step": 44
},
{
"epoch": 0.5294117647058824,
"grad_norm": 0.03931353228016249,
"learning_rate": 8.668360077808093e-05,
"loss": 0.0346,
"step": 45
},
{
"epoch": 0.5411764705882353,
"grad_norm": 0.047847025199635816,
"learning_rate": 8.602607968003935e-05,
"loss": 0.0402,
"step": 46
},
{
"epoch": 0.5529411764705883,
"grad_norm": 0.03579784853738504,
"learning_rate": 8.535533905932738e-05,
"loss": 0.0262,
"step": 47
},
{
"epoch": 0.5647058823529412,
"grad_norm": 0.06521605078365518,
"learning_rate": 8.467162503961208e-05,
"loss": 0.0274,
"step": 48
},
{
"epoch": 0.5764705882352941,
"grad_norm": 0.027318223641263305,
"learning_rate": 8.397518850506028e-05,
"loss": 0.0321,
"step": 49
},
{
"epoch": 0.5882352941176471,
"grad_norm": 0.03667920290649668,
"learning_rate": 8.326628500827826e-05,
"loss": 0.0348,
"step": 50
},
{
"epoch": 0.6,
"grad_norm": 0.03841473526092613,
"learning_rate": 8.254517467653858e-05,
"loss": 0.0286,
"step": 51
},
{
"epoch": 0.611764705882353,
"grad_norm": 0.03512510354697874,
"learning_rate": 8.181212211632799e-05,
"loss": 0.0335,
"step": 52
},
{
"epoch": 0.6235294117647059,
"grad_norm": 0.03758958960229201,
"learning_rate": 8.106739631625217e-05,
"loss": 0.0351,
"step": 53
},
{
"epoch": 0.6352941176470588,
"grad_norm": 0.03141384942404683,
"learning_rate": 8.03112705483319e-05,
"loss": 0.0292,
"step": 54
},
{
"epoch": 0.6470588235294118,
"grad_norm": 0.05123314473482919,
"learning_rate": 7.954402226772804e-05,
"loss": 0.0315,
"step": 55
},
{
"epoch": 0.6588235294117647,
"grad_norm": 0.05526844683394433,
"learning_rate": 7.876593301093104e-05,
"loss": 0.0376,
"step": 56
},
{
"epoch": 0.6705882352941176,
"grad_norm": 0.03442418193147859,
"learning_rate": 7.797728829245321e-05,
"loss": 0.0298,
"step": 57
},
{
"epoch": 0.6823529411764706,
"grad_norm": 0.05381333860636251,
"learning_rate": 7.717837750006106e-05,
"loss": 0.0285,
"step": 58
},
{
"epoch": 0.6941176470588235,
"grad_norm": 0.04595131495742476,
"learning_rate": 7.636949378858646e-05,
"loss": 0.0323,
"step": 59
},
{
"epoch": 0.7058823529411765,
"grad_norm": 0.04547237722119376,
"learning_rate": 7.555093397235552e-05,
"loss": 0.0374,
"step": 60
},
{
"epoch": 0.7176470588235294,
"grad_norm": 0.029277787154415737,
"learning_rate": 7.472299841627451e-05,
"loss": 0.0305,
"step": 61
},
{
"epoch": 0.7294117647058823,
"grad_norm": 0.054416629180822844,
"learning_rate": 7.388599092561315e-05,
"loss": 0.0305,
"step": 62
},
{
"epoch": 0.7411764705882353,
"grad_norm": 0.07521136626264392,
"learning_rate": 7.304021863452524e-05,
"loss": 0.0337,
"step": 63
},
{
"epoch": 0.7529411764705882,
"grad_norm": 0.027594110933489315,
"learning_rate": 7.218599189334799e-05,
"loss": 0.0268,
"step": 64
},
{
"epoch": 0.7647058823529411,
"grad_norm": 0.044043131009126026,
"learning_rate": 7.1323624154721e-05,
"loss": 0.0333,
"step": 65
},
{
"epoch": 0.7764705882352941,
"grad_norm": 0.088281551033726,
"learning_rate": 7.045343185856701e-05,
"loss": 0.0367,
"step": 66
},
{
"epoch": 0.788235294117647,
"grad_norm": 0.04328862177911276,
"learning_rate": 6.957573431597646e-05,
"loss": 0.0327,
"step": 67
},
{
"epoch": 0.8,
"grad_norm": 0.03272017374355755,
"learning_rate": 6.869085359203844e-05,
"loss": 0.0309,
"step": 68
},
{
"epoch": 0.8117647058823529,
"grad_norm": 0.06280711357209813,
"learning_rate": 6.779911438766116e-05,
"loss": 0.0327,
"step": 69
},
{
"epoch": 0.8235294117647058,
"grad_norm": 0.052927974729157626,
"learning_rate": 6.690084392042513e-05,
"loss": 0.0312,
"step": 70
},
{
"epoch": 0.8352941176470589,
"grad_norm": 0.05429533945696401,
"learning_rate": 6.599637180451294e-05,
"loss": 0.0348,
"step": 71
},
{
"epoch": 0.8470588235294118,
"grad_norm": 0.048917527979116436,
"learning_rate": 6.508602992975963e-05,
"loss": 0.0317,
"step": 72
},
{
"epoch": 0.8588235294117647,
"grad_norm": 0.031886426687786254,
"learning_rate": 6.417015233986786e-05,
"loss": 0.0311,
"step": 73
},
{
"epoch": 0.8705882352941177,
"grad_norm": 0.03376820411697015,
"learning_rate": 6.32490751098331e-05,
"loss": 0.0263,
"step": 74
},
{
"epoch": 0.8823529411764706,
"grad_norm": 0.03767361887537691,
"learning_rate": 6.232313622262296e-05,
"loss": 0.0343,
"step": 75
},
{
"epoch": 0.8941176470588236,
"grad_norm": 0.04332615343931221,
"learning_rate": 6.139267544515689e-05,
"loss": 0.0304,
"step": 76
},
{
"epoch": 0.9058823529411765,
"grad_norm": 0.04146445958416543,
"learning_rate": 6.045803420363084e-05,
"loss": 0.0318,
"step": 77
},
{
"epoch": 0.9176470588235294,
"grad_norm": 0.03675850801075872,
"learning_rate": 5.951955545823342e-05,
"loss": 0.0286,
"step": 78
},
{
"epoch": 0.9294117647058824,
"grad_norm": 0.03951469611584691,
"learning_rate": 5.8577583577298924e-05,
"loss": 0.0297,
"step": 79
},
{
"epoch": 0.9411764705882353,
"grad_norm": 0.03613565309891407,
"learning_rate": 5.7632464210943726e-05,
"loss": 0.0261,
"step": 80
},
{
"epoch": 0.9529411764705882,
"grad_norm": 0.031008899018911143,
"learning_rate": 5.668454416423242e-05,
"loss": 0.026,
"step": 81
},
{
"epoch": 0.9647058823529412,
"grad_norm": 0.0692628338321925,
"learning_rate": 5.573417126992003e-05,
"loss": 0.0266,
"step": 82
},
{
"epoch": 0.9764705882352941,
"grad_norm": 0.05852763315877547,
"learning_rate": 5.478169426081712e-05,
"loss": 0.0295,
"step": 83
},
{
"epoch": 0.9882352941176471,
"grad_norm": 0.03199303327911728,
"learning_rate": 5.38274626418248e-05,
"loss": 0.0292,
"step": 84
},
{
"epoch": 1.0,
"grad_norm": 0.03733696573812923,
"learning_rate": 5.287182656168618e-05,
"loss": 0.0287,
"step": 85
},
{
"epoch": 1.011764705882353,
"grad_norm": 0.04222433319800323,
"learning_rate": 5.191513668450178e-05,
"loss": 0.0275,
"step": 86
},
{
"epoch": 1.0235294117647058,
"grad_norm": 0.033149244513485374,
"learning_rate": 5.095774406105571e-05,
"loss": 0.0245,
"step": 87
},
{
"epoch": 1.035294117647059,
"grad_norm": 0.030866296785570155,
"learning_rate": 5e-05,
"loss": 0.0227,
"step": 88
},
{
"epoch": 1.0470588235294118,
"grad_norm": 0.029363896734282895,
"learning_rate": 4.9042255938944296e-05,
"loss": 0.0284,
"step": 89
},
{
"epoch": 1.0588235294117647,
"grad_norm": 0.03458859192281693,
"learning_rate": 4.8084863315498234e-05,
"loss": 0.0256,
"step": 90
},
{
"epoch": 1.0705882352941176,
"grad_norm": 0.033499852353591555,
"learning_rate": 4.712817343831384e-05,
"loss": 0.028,
"step": 91
},
{
"epoch": 1.0823529411764705,
"grad_norm": 0.03348874233244485,
"learning_rate": 4.6172537358175214e-05,
"loss": 0.0269,
"step": 92
},
{
"epoch": 1.0941176470588236,
"grad_norm": 0.03268399220561992,
"learning_rate": 4.521830573918289e-05,
"loss": 0.0268,
"step": 93
},
{
"epoch": 1.1058823529411765,
"grad_norm": 0.03535497654299178,
"learning_rate": 4.4265828730079987e-05,
"loss": 0.0287,
"step": 94
},
{
"epoch": 1.1176470588235294,
"grad_norm": 0.035249786662349535,
"learning_rate": 4.331545583576758e-05,
"loss": 0.0254,
"step": 95
},
{
"epoch": 1.1294117647058823,
"grad_norm": 0.042058533392068366,
"learning_rate": 4.236753578905627e-05,
"loss": 0.0257,
"step": 96
},
{
"epoch": 1.1411764705882352,
"grad_norm": 0.035662240284239075,
"learning_rate": 4.142241642270108e-05,
"loss": 0.023,
"step": 97
},
{
"epoch": 1.1529411764705881,
"grad_norm": 0.03205942374398337,
"learning_rate": 4.0480444541766576e-05,
"loss": 0.0227,
"step": 98
},
{
"epoch": 1.1647058823529413,
"grad_norm": 0.03410677741547236,
"learning_rate": 3.954196579636918e-05,
"loss": 0.025,
"step": 99
},
{
"epoch": 1.1764705882352942,
"grad_norm": 0.037977473156353136,
"learning_rate": 3.8607324554843136e-05,
"loss": 0.0245,
"step": 100
},
{
"epoch": 1.188235294117647,
"grad_norm": 0.034304594099498986,
"learning_rate": 3.7676863777377054e-05,
"loss": 0.0251,
"step": 101
},
{
"epoch": 1.2,
"grad_norm": 0.04024140735442772,
"learning_rate": 3.675092489016693e-05,
"loss": 0.0258,
"step": 102
},
{
"epoch": 1.2117647058823529,
"grad_norm": 0.03640815220830249,
"learning_rate": 3.582984766013215e-05,
"loss": 0.0254,
"step": 103
},
{
"epoch": 1.223529411764706,
"grad_norm": 0.05308696935770744,
"learning_rate": 3.4913970070240386e-05,
"loss": 0.0317,
"step": 104
},
{
"epoch": 1.2352941176470589,
"grad_norm": 0.03532605452768607,
"learning_rate": 3.4003628195487057e-05,
"loss": 0.0272,
"step": 105
},
{
"epoch": 1.2470588235294118,
"grad_norm": 0.03399447551991043,
"learning_rate": 3.309915607957487e-05,
"loss": 0.0256,
"step": 106
},
{
"epoch": 1.2588235294117647,
"grad_norm": 0.03154244903849424,
"learning_rate": 3.2200885612338845e-05,
"loss": 0.0207,
"step": 107
},
{
"epoch": 1.2705882352941176,
"grad_norm": 0.04861329315126559,
"learning_rate": 3.130914640796157e-05,
"loss": 0.0297,
"step": 108
},
{
"epoch": 1.2823529411764705,
"grad_norm": 0.04026826659177659,
"learning_rate": 3.0424265684023558e-05,
"loss": 0.0238,
"step": 109
},
{
"epoch": 1.2941176470588236,
"grad_norm": 0.03529433347937337,
"learning_rate": 2.9546568141433006e-05,
"loss": 0.0247,
"step": 110
},
{
"epoch": 1.3058823529411765,
"grad_norm": 0.035218945260167514,
"learning_rate": 2.8676375845279013e-05,
"loss": 0.0271,
"step": 111
},
{
"epoch": 1.3176470588235294,
"grad_norm": 0.03409897136114451,
"learning_rate": 2.7814008106652012e-05,
"loss": 0.0253,
"step": 112
},
{
"epoch": 1.3294117647058823,
"grad_norm": 0.03345892031561033,
"learning_rate": 2.6959781365474758e-05,
"loss": 0.0196,
"step": 113
},
{
"epoch": 1.3411764705882354,
"grad_norm": 0.041711810588722247,
"learning_rate": 2.6114009074386846e-05,
"loss": 0.025,
"step": 114
},
{
"epoch": 1.3529411764705883,
"grad_norm": 0.03404740591421045,
"learning_rate": 2.527700158372548e-05,
"loss": 0.0245,
"step": 115
},
{
"epoch": 1.3647058823529412,
"grad_norm": 0.03806934179838023,
"learning_rate": 2.4449066027644475e-05,
"loss": 0.0202,
"step": 116
},
{
"epoch": 1.3764705882352941,
"grad_norm": 0.037833218608223404,
"learning_rate": 2.363050621141354e-05,
"loss": 0.0269,
"step": 117
},
{
"epoch": 1.388235294117647,
"grad_norm": 0.040060220347060395,
"learning_rate": 2.282162249993895e-05,
"loss": 0.0259,
"step": 118
},
{
"epoch": 1.4,
"grad_norm": 0.039492305883863446,
"learning_rate": 2.20227117075468e-05,
"loss": 0.0263,
"step": 119
},
{
"epoch": 1.4117647058823528,
"grad_norm": 0.04146681928718736,
"learning_rate": 2.1234066989068972e-05,
"loss": 0.0237,
"step": 120
},
{
"epoch": 1.423529411764706,
"grad_norm": 0.044440473554041314,
"learning_rate": 2.0455977732271993e-05,
"loss": 0.0221,
"step": 121
},
{
"epoch": 1.4352941176470588,
"grad_norm": 0.033828391729890175,
"learning_rate": 1.9688729451668114e-05,
"loss": 0.0203,
"step": 122
},
{
"epoch": 1.4470588235294117,
"grad_norm": 0.038308578853440155,
"learning_rate": 1.893260368374786e-05,
"loss": 0.0271,
"step": 123
},
{
"epoch": 1.4588235294117646,
"grad_norm": 0.035696891554767116,
"learning_rate": 1.818787788367202e-05,
"loss": 0.0241,
"step": 124
},
{
"epoch": 1.4705882352941178,
"grad_norm": 0.03450606035436337,
"learning_rate": 1.7454825323461448e-05,
"loss": 0.0227,
"step": 125
},
{
"epoch": 1.4823529411764707,
"grad_norm": 0.04502948564553977,
"learning_rate": 1.673371499172174e-05,
"loss": 0.0282,
"step": 126
},
{
"epoch": 1.4941176470588236,
"grad_norm": 0.03775360455129196,
"learning_rate": 1.6024811494939724e-05,
"loss": 0.022,
"step": 127
},
{
"epoch": 1.5058823529411764,
"grad_norm": 0.035858486799161725,
"learning_rate": 1.532837496038792e-05,
"loss": 0.0247,
"step": 128
},
{
"epoch": 1.5176470588235293,
"grad_norm": 0.03737088577494754,
"learning_rate": 1.4644660940672627e-05,
"loss": 0.0257,
"step": 129
},
{
"epoch": 1.5294117647058822,
"grad_norm": 0.03461325321460445,
"learning_rate": 1.3973920319960655e-05,
"loss": 0.0217,
"step": 130
},
{
"epoch": 1.5411764705882351,
"grad_norm": 0.034501790257859405,
"learning_rate": 1.3316399221919074e-05,
"loss": 0.0232,
"step": 131
},
{
"epoch": 1.5529411764705883,
"grad_norm": 0.03782875214918853,
"learning_rate": 1.2672338919401866e-05,
"loss": 0.0225,
"step": 132
},
{
"epoch": 1.5647058823529412,
"grad_norm": 0.04306989405034864,
"learning_rate": 1.2041975745916472e-05,
"loss": 0.0269,
"step": 133
},
{
"epoch": 1.576470588235294,
"grad_norm": 0.037700226280294416,
"learning_rate": 1.1425541008902851e-05,
"loss": 0.0249,
"step": 134
},
{
"epoch": 1.5882352941176472,
"grad_norm": 0.03890317937719569,
"learning_rate": 1.082326090485679e-05,
"loss": 0.0255,
"step": 135
},
{
"epoch": 1.6,
"grad_norm": 0.04219733674070769,
"learning_rate": 1.0235356436328675e-05,
"loss": 0.0286,
"step": 136
},
{
"epoch": 1.611764705882353,
"grad_norm": 0.03273981321791409,
"learning_rate": 9.662043330828085e-06,
"loss": 0.0201,
"step": 137
},
{
"epoch": 1.6235294117647059,
"grad_norm": 0.03302849889769006,
"learning_rate": 9.103531961664118e-06,
"loss": 0.0212,
"step": 138
},
{
"epoch": 1.6352941176470588,
"grad_norm": 0.04021301444135266,
"learning_rate": 8.560027270750277e-06,
"loss": 0.0244,
"step": 139
},
{
"epoch": 1.6470588235294117,
"grad_norm": 0.034002953931695744,
"learning_rate": 8.031728693402502e-06,
"loss": 0.0193,
"step": 140
},
{
"epoch": 1.6588235294117646,
"grad_norm": 0.03494170057614315,
"learning_rate": 7.518830085157735e-06,
"loss": 0.0232,
"step": 141
},
{
"epoch": 1.6705882352941175,
"grad_norm": 0.05067951397515134,
"learning_rate": 7.0215196506399515e-06,
"loss": 0.0217,
"step": 142
},
{
"epoch": 1.6823529411764706,
"grad_norm": 0.035476236926014315,
"learning_rate": 6.539979874499747e-06,
"loss": 0.0213,
"step": 143
},
{
"epoch": 1.6941176470588235,
"grad_norm": 0.036655504933770414,
"learning_rate": 6.07438745445289e-06,
"loss": 0.0234,
"step": 144
},
{
"epoch": 1.7058823529411766,
"grad_norm": 0.041657363570965146,
"learning_rate": 5.624913236442286e-06,
"loss": 0.0294,
"step": 145
},
{
"epoch": 1.7176470588235295,
"grad_norm": 0.05150076449892236,
"learning_rate": 5.191722151947226e-06,
"loss": 0.0258,
"step": 146
},
{
"epoch": 1.7294117647058824,
"grad_norm": 0.0404588183080739,
"learning_rate": 4.7749731574629196e-06,
"loss": 0.0224,
"step": 147
},
{
"epoch": 1.7411764705882353,
"grad_norm": 0.033224356486454734,
"learning_rate": 4.374819176172501e-06,
"loss": 0.021,
"step": 148
},
{
"epoch": 1.7529411764705882,
"grad_norm": 0.03795419337831155,
"learning_rate": 3.991407041832912e-06,
"loss": 0.022,
"step": 149
},
{
"epoch": 1.7647058823529411,
"grad_norm": 0.04200165421199087,
"learning_rate": 3.6248774448952695e-06,
"loss": 0.0265,
"step": 150
},
{
"epoch": 1.776470588235294,
"grad_norm": 0.03845197891559611,
"learning_rate": 3.2753648808794503e-06,
"loss": 0.024,
"step": 151
},
{
"epoch": 1.788235294117647,
"grad_norm": 0.037815699401078935,
"learning_rate": 2.942997601021924e-06,
"loss": 0.0257,
"step": 152
},
{
"epoch": 1.8,
"grad_norm": 0.03519274514771082,
"learning_rate": 2.6278975652147875e-06,
"loss": 0.0232,
"step": 153
},
{
"epoch": 1.811764705882353,
"grad_norm": 0.03706158210535721,
"learning_rate": 2.330180397253473e-06,
"loss": 0.0232,
"step": 154
},
{
"epoch": 1.8235294117647058,
"grad_norm": 0.04248155649655448,
"learning_rate": 2.049955342409349e-06,
"loss": 0.0227,
"step": 155
},
{
"epoch": 1.835294117647059,
"grad_norm": 0.0368432870786149,
"learning_rate": 1.7873252273429509e-06,
"loss": 0.0224,
"step": 156
},
{
"epoch": 1.8470588235294119,
"grad_norm": 0.03332568698884207,
"learning_rate": 1.542386422372405e-06,
"loss": 0.0208,
"step": 157
},
{
"epoch": 1.8588235294117648,
"grad_norm": 0.036213210721090155,
"learning_rate": 1.3152288061110518e-06,
"loss": 0.0237,
"step": 158
},
{
"epoch": 1.8705882352941177,
"grad_norm": 0.035507819488375524,
"learning_rate": 1.1059357324870455e-06,
"loss": 0.0212,
"step": 159
},
{
"epoch": 1.8823529411764706,
"grad_norm": 0.032773052246892385,
"learning_rate": 9.145840001572537e-07,
"loss": 0.0209,
"step": 160
},
{
"epoch": 1.8941176470588235,
"grad_norm": 0.03720092396666804,
"learning_rate": 7.41243824326504e-07,
"loss": 0.0237,
"step": 161
},
{
"epoch": 1.9058823529411764,
"grad_norm": 0.061643042263861664,
"learning_rate": 5.859788109825793e-07,
"loss": 0.0263,
"step": 162
},
{
"epoch": 1.9176470588235293,
"grad_norm": 0.037441015710064,
"learning_rate": 4.48845933556441e-07,
"loss": 0.025,
"step": 163
},
{
"epoch": 1.9294117647058824,
"grad_norm": 0.03459803638637435,
"learning_rate": 3.2989551201624835e-07,
"loss": 0.0215,
"step": 164
},
{
"epoch": 1.9411764705882353,
"grad_norm": 0.04948419428076397,
"learning_rate": 2.2917119440275524e-07,
"loss": 0.0273,
"step": 165
},
{
"epoch": 1.9529411764705882,
"grad_norm": 0.040939656098896986,
"learning_rate": 1.4670994081297795e-07,
"loss": 0.0251,
"step": 166
},
{
"epoch": 1.9647058823529413,
"grad_norm": 0.03941121437437399,
"learning_rate": 8.254200983794369e-08,
"loss": 0.0273,
"step": 167
},
{
"epoch": 1.9764705882352942,
"grad_norm": 0.04847566902018068,
"learning_rate": 3.669094745950008e-08,
"loss": 0.0239,
"step": 168
},
{
"epoch": 1.988235294117647,
"grad_norm": 0.04650396689287853,
"learning_rate": 9.17357841028199e-09,
"loss": 0.0207,
"step": 169
},
{
"epoch": 2.0,
"grad_norm": 0.04099329304128327,
"learning_rate": 0.0,
"loss": 0.0232,
"step": 170
},
{
"epoch": 2.0,
"step": 170,
"total_flos": 861011422740480.0,
"train_loss": 0.03621660071041654,
"train_runtime": 2036.7134,
"train_samples_per_second": 0.668,
"train_steps_per_second": 0.083
}
],
"logging_steps": 1,
"max_steps": 170,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 50000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 861011422740480.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}