VisionSelector-Qwen2.5-VL-7B / trainer_state.json
JulietChoo's picture
Upload folder using huggingface_hub
cc11b39 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9982300884955753,
"eval_steps": 500,
"global_step": 282,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0035398230088495575,
"grad_norm": 0.014968921455505688,
"learning_rate": 5.555555555555556e-06,
"loss": 2.1572,
"step": 1
},
{
"epoch": 0.007079646017699115,
"grad_norm": 0.011437004760788206,
"learning_rate": 1.1111111111111112e-05,
"loss": 2.107,
"step": 2
},
{
"epoch": 0.010619469026548672,
"grad_norm": 0.037465559679398804,
"learning_rate": 1.6666666666666667e-05,
"loss": 2.2589,
"step": 3
},
{
"epoch": 0.01415929203539823,
"grad_norm": 0.05454537578994139,
"learning_rate": 2.2222222222222223e-05,
"loss": 2.034,
"step": 4
},
{
"epoch": 0.017699115044247787,
"grad_norm": 0.1571730702304739,
"learning_rate": 2.777777777777778e-05,
"loss": 2.2764,
"step": 5
},
{
"epoch": 0.021238938053097345,
"grad_norm": 0.23702733026323008,
"learning_rate": 3.3333333333333335e-05,
"loss": 2.2573,
"step": 6
},
{
"epoch": 0.024778761061946902,
"grad_norm": 0.4231145053916906,
"learning_rate": 3.888888888888889e-05,
"loss": 2.218,
"step": 7
},
{
"epoch": 0.02831858407079646,
"grad_norm": 0.61941148194414,
"learning_rate": 4.4444444444444447e-05,
"loss": 2.1493,
"step": 8
},
{
"epoch": 0.03185840707964602,
"grad_norm": 0.8161556182954559,
"learning_rate": 5e-05,
"loss": 2.0541,
"step": 9
},
{
"epoch": 0.035398230088495575,
"grad_norm": 1.104704245155024,
"learning_rate": 4.9998344688731027e-05,
"loss": 2.0539,
"step": 10
},
{
"epoch": 0.03893805309734513,
"grad_norm": 0.9892480165180763,
"learning_rate": 4.999337897412852e-05,
"loss": 2.0266,
"step": 11
},
{
"epoch": 0.04247787610619469,
"grad_norm": 0.48679604721665365,
"learning_rate": 4.9985103513776764e-05,
"loss": 1.7714,
"step": 12
},
{
"epoch": 0.04601769911504425,
"grad_norm": 0.6641453909307959,
"learning_rate": 4.997351940355277e-05,
"loss": 1.8569,
"step": 13
},
{
"epoch": 0.049557522123893805,
"grad_norm": 0.4596566997606365,
"learning_rate": 4.9958628177481195e-05,
"loss": 1.7574,
"step": 14
},
{
"epoch": 0.05309734513274336,
"grad_norm": 0.3912601442043214,
"learning_rate": 4.99404318075312e-05,
"loss": 1.6832,
"step": 15
},
{
"epoch": 0.05663716814159292,
"grad_norm": 0.4400958018245371,
"learning_rate": 4.9918932703355256e-05,
"loss": 1.6784,
"step": 16
},
{
"epoch": 0.06017699115044248,
"grad_norm": 0.35169505721314703,
"learning_rate": 4.989413371197013e-05,
"loss": 1.5881,
"step": 17
},
{
"epoch": 0.06371681415929203,
"grad_norm": 0.3506392072576239,
"learning_rate": 4.9866038117379824e-05,
"loss": 1.6368,
"step": 18
},
{
"epoch": 0.06725663716814159,
"grad_norm": 0.29289238684825075,
"learning_rate": 4.9834649640140664e-05,
"loss": 1.5951,
"step": 19
},
{
"epoch": 0.07079646017699115,
"grad_norm": 0.2811111005250868,
"learning_rate": 4.979997243686868e-05,
"loss": 1.5501,
"step": 20
},
{
"epoch": 0.0743362831858407,
"grad_norm": 0.22287349650796306,
"learning_rate": 4.976201109968908e-05,
"loss": 1.5089,
"step": 21
},
{
"epoch": 0.07787610619469026,
"grad_norm": 0.2411382508079467,
"learning_rate": 4.972077065562821e-05,
"loss": 1.4887,
"step": 22
},
{
"epoch": 0.08141592920353982,
"grad_norm": 0.23405299969321355,
"learning_rate": 4.967625656594782e-05,
"loss": 1.5078,
"step": 23
},
{
"epoch": 0.08495575221238938,
"grad_norm": 0.24898400278289592,
"learning_rate": 4.962847472542185e-05,
"loss": 1.5222,
"step": 24
},
{
"epoch": 0.08849557522123894,
"grad_norm": 0.24700246382161703,
"learning_rate": 4.957743146155581e-05,
"loss": 1.5576,
"step": 25
},
{
"epoch": 0.0920353982300885,
"grad_norm": 0.2587819540119162,
"learning_rate": 4.952313353374891e-05,
"loss": 1.524,
"step": 26
},
{
"epoch": 0.09557522123893805,
"grad_norm": 0.24867732021555267,
"learning_rate": 4.946558813239888e-05,
"loss": 1.4653,
"step": 27
},
{
"epoch": 0.09911504424778761,
"grad_norm": 0.3081887492909938,
"learning_rate": 4.9404802877949843e-05,
"loss": 1.5227,
"step": 28
},
{
"epoch": 0.10265486725663717,
"grad_norm": 0.38210912317420137,
"learning_rate": 4.934078581988311e-05,
"loss": 1.5163,
"step": 29
},
{
"epoch": 0.10619469026548672,
"grad_norm": 0.7013464407809188,
"learning_rate": 4.92735454356513e-05,
"loss": 1.5138,
"step": 30
},
{
"epoch": 0.10973451327433628,
"grad_norm": 0.47112796801369383,
"learning_rate": 4.920309062955568e-05,
"loss": 1.5108,
"step": 31
},
{
"epoch": 0.11327433628318584,
"grad_norm": 0.28457319565968886,
"learning_rate": 4.912943073156701e-05,
"loss": 1.4629,
"step": 32
},
{
"epoch": 0.1168141592920354,
"grad_norm": 0.47049870670775934,
"learning_rate": 4.9052575496090016e-05,
"loss": 1.4611,
"step": 33
},
{
"epoch": 0.12035398230088495,
"grad_norm": 0.33000778146504633,
"learning_rate": 4.897253510067169e-05,
"loss": 1.4329,
"step": 34
},
{
"epoch": 0.12389380530973451,
"grad_norm": 0.4386960770505876,
"learning_rate": 4.888932014465352e-05,
"loss": 1.4591,
"step": 35
},
{
"epoch": 0.12743362831858407,
"grad_norm": 0.3628284067349331,
"learning_rate": 4.8802941647767856e-05,
"loss": 1.4484,
"step": 36
},
{
"epoch": 0.13097345132743363,
"grad_norm": 0.46785673204717787,
"learning_rate": 4.8713411048678635e-05,
"loss": 1.4166,
"step": 37
},
{
"epoch": 0.13451327433628318,
"grad_norm": 0.3555068973478974,
"learning_rate": 4.862074020346664e-05,
"loss": 1.3474,
"step": 38
},
{
"epoch": 0.13805309734513274,
"grad_norm": 0.32448258276292713,
"learning_rate": 4.8524941384059415e-05,
"loss": 1.4098,
"step": 39
},
{
"epoch": 0.1415929203539823,
"grad_norm": 0.22422182292818518,
"learning_rate": 4.842602727660618e-05,
"loss": 1.4523,
"step": 40
},
{
"epoch": 0.14513274336283186,
"grad_norm": 0.26143288039726004,
"learning_rate": 4.8324010979797875e-05,
"loss": 1.4037,
"step": 41
},
{
"epoch": 0.1486725663716814,
"grad_norm": 0.2930995625884043,
"learning_rate": 4.8218906003132555e-05,
"loss": 1.4117,
"step": 42
},
{
"epoch": 0.15221238938053097,
"grad_norm": 0.44012678716478415,
"learning_rate": 4.811072626512642e-05,
"loss": 1.4183,
"step": 43
},
{
"epoch": 0.15575221238938053,
"grad_norm": 0.40282521531923454,
"learning_rate": 4.799948609147061e-05,
"loss": 1.4343,
"step": 44
},
{
"epoch": 0.1592920353982301,
"grad_norm": 0.3548537305066757,
"learning_rate": 4.7885200213134164e-05,
"loss": 1.4123,
"step": 45
},
{
"epoch": 0.16283185840707964,
"grad_norm": 0.18896366313769444,
"learning_rate": 4.7767883764413266e-05,
"loss": 1.4341,
"step": 46
},
{
"epoch": 0.1663716814159292,
"grad_norm": 0.18341171851007929,
"learning_rate": 4.7647552280927086e-05,
"loss": 1.4068,
"step": 47
},
{
"epoch": 0.16991150442477876,
"grad_norm": 0.21351492088740873,
"learning_rate": 4.752422169756048e-05,
"loss": 1.454,
"step": 48
},
{
"epoch": 0.17345132743362832,
"grad_norm": 0.29096575232736366,
"learning_rate": 4.7397908346353796e-05,
"loss": 1.4382,
"step": 49
},
{
"epoch": 0.17699115044247787,
"grad_norm": 0.3475137808261636,
"learning_rate": 4.7268628954340136e-05,
"loss": 1.4064,
"step": 50
},
{
"epoch": 0.18053097345132743,
"grad_norm": 0.35130460927670765,
"learning_rate": 4.713640064133025e-05,
"loss": 1.4386,
"step": 51
},
{
"epoch": 0.184070796460177,
"grad_norm": 0.3056244311350184,
"learning_rate": 4.7001240917645465e-05,
"loss": 1.3835,
"step": 52
},
{
"epoch": 0.18761061946902655,
"grad_norm": 0.21680753741736908,
"learning_rate": 4.686316768179889e-05,
"loss": 1.4115,
"step": 53
},
{
"epoch": 0.1911504424778761,
"grad_norm": 0.24305442966955867,
"learning_rate": 4.672219921812517e-05,
"loss": 1.3966,
"step": 54
},
{
"epoch": 0.19469026548672566,
"grad_norm": 0.16091322568175684,
"learning_rate": 4.6578354194359227e-05,
"loss": 1.3585,
"step": 55
},
{
"epoch": 0.19823008849557522,
"grad_norm": 0.17723290216839938,
"learning_rate": 4.6431651659164174e-05,
"loss": 1.4289,
"step": 56
},
{
"epoch": 0.20176991150442478,
"grad_norm": 0.26160374576544143,
"learning_rate": 4.6282111039608784e-05,
"loss": 1.3914,
"step": 57
},
{
"epoch": 0.20530973451327433,
"grad_norm": 0.22226248632808485,
"learning_rate": 4.6129752138594874e-05,
"loss": 1.3962,
"step": 58
},
{
"epoch": 0.2088495575221239,
"grad_norm": 0.20009113160493155,
"learning_rate": 4.59745951322349e-05,
"loss": 1.4087,
"step": 59
},
{
"epoch": 0.21238938053097345,
"grad_norm": 0.19780714745729258,
"learning_rate": 4.581666056718016e-05,
"loss": 1.3653,
"step": 60
},
{
"epoch": 0.215929203539823,
"grad_norm": 0.14022953988134976,
"learning_rate": 4.5655969357899874e-05,
"loss": 1.3999,
"step": 61
},
{
"epoch": 0.21946902654867256,
"grad_norm": 0.164745362208849,
"learning_rate": 4.54925427839116e-05,
"loss": 1.328,
"step": 62
},
{
"epoch": 0.22300884955752212,
"grad_norm": 0.11773879110072756,
"learning_rate": 4.532640248696331e-05,
"loss": 1.3576,
"step": 63
},
{
"epoch": 0.22654867256637168,
"grad_norm": 0.14713521584311004,
"learning_rate": 4.5157570468167464e-05,
"loss": 1.3734,
"step": 64
},
{
"epoch": 0.23008849557522124,
"grad_norm": 0.12150757743176828,
"learning_rate": 4.498606908508754e-05,
"loss": 1.3901,
"step": 65
},
{
"epoch": 0.2336283185840708,
"grad_norm": 0.17335782705846536,
"learning_rate": 4.481192104877726e-05,
"loss": 1.3379,
"step": 66
},
{
"epoch": 0.23716814159292035,
"grad_norm": 0.15751237594808964,
"learning_rate": 4.463514942077323e-05,
"loss": 1.3856,
"step": 67
},
{
"epoch": 0.2407079646017699,
"grad_norm": 0.3214390865300867,
"learning_rate": 4.4455777610040846e-05,
"loss": 1.3402,
"step": 68
},
{
"epoch": 0.24424778761061947,
"grad_norm": 0.1505432033259721,
"learning_rate": 4.427382936987449e-05,
"loss": 1.4107,
"step": 69
},
{
"epoch": 0.24778761061946902,
"grad_norm": 0.1246996380600049,
"learning_rate": 4.4089328794751954e-05,
"loss": 1.3601,
"step": 70
},
{
"epoch": 0.2513274336283186,
"grad_norm": 0.11727561562909336,
"learning_rate": 4.3902300317143726e-05,
"loss": 1.4374,
"step": 71
},
{
"epoch": 0.25486725663716814,
"grad_norm": 0.12074273017325926,
"learning_rate": 4.371276870427753e-05,
"loss": 1.3879,
"step": 72
},
{
"epoch": 0.2584070796460177,
"grad_norm": 0.19796910383444757,
"learning_rate": 4.352075905485854e-05,
"loss": 1.3909,
"step": 73
},
{
"epoch": 0.26194690265486725,
"grad_norm": 0.17913857322680019,
"learning_rate": 4.332629679574566e-05,
"loss": 1.3591,
"step": 74
},
{
"epoch": 0.26548672566371684,
"grad_norm": 0.21612189066738186,
"learning_rate": 4.312940767858441e-05,
"loss": 1.3981,
"step": 75
},
{
"epoch": 0.26902654867256637,
"grad_norm": 0.15021608593853492,
"learning_rate": 4.293011777639675e-05,
"loss": 1.4217,
"step": 76
},
{
"epoch": 0.27256637168141595,
"grad_norm": 0.13863221146764798,
"learning_rate": 4.272845348012833e-05,
"loss": 1.3392,
"step": 77
},
{
"epoch": 0.2761061946902655,
"grad_norm": 0.15019927813117484,
"learning_rate": 4.252444149515374e-05,
"loss": 1.3865,
"step": 78
},
{
"epoch": 0.27964601769911507,
"grad_norm": 0.12191330805249051,
"learning_rate": 4.231810883773999e-05,
"loss": 1.3545,
"step": 79
},
{
"epoch": 0.2831858407079646,
"grad_norm": 0.2446045853389079,
"learning_rate": 4.210948283146892e-05,
"loss": 1.3849,
"step": 80
},
{
"epoch": 0.2867256637168142,
"grad_norm": 0.15363154813349278,
"learning_rate": 4.189859110361886e-05,
"loss": 1.3954,
"step": 81
},
{
"epoch": 0.2902654867256637,
"grad_norm": 0.1395549815462078,
"learning_rate": 4.1685461581506115e-05,
"loss": 1.3324,
"step": 82
},
{
"epoch": 0.2938053097345133,
"grad_norm": 0.17551930372267743,
"learning_rate": 4.1470122488786645e-05,
"loss": 1.4272,
"step": 83
},
{
"epoch": 0.2973451327433628,
"grad_norm": 0.13713537229983466,
"learning_rate": 4.125260234171861e-05,
"loss": 1.3876,
"step": 84
},
{
"epoch": 0.3008849557522124,
"grad_norm": 0.1295928704936419,
"learning_rate": 4.103292994538605e-05,
"loss": 1.4066,
"step": 85
},
{
"epoch": 0.30442477876106194,
"grad_norm": 0.1202033500035325,
"learning_rate": 4.0811134389884433e-05,
"loss": 1.4204,
"step": 86
},
{
"epoch": 0.30796460176991153,
"grad_norm": 0.1713978920098714,
"learning_rate": 4.058724504646834e-05,
"loss": 1.3673,
"step": 87
},
{
"epoch": 0.31150442477876106,
"grad_norm": 0.1631957644367236,
"learning_rate": 4.036129156366203e-05,
"loss": 1.3936,
"step": 88
},
{
"epoch": 0.31504424778761064,
"grad_norm": 0.1269834739898777,
"learning_rate": 4.013330386333321e-05,
"loss": 1.3609,
"step": 89
},
{
"epoch": 0.3185840707964602,
"grad_norm": 0.29003377036786065,
"learning_rate": 3.9903312136730634e-05,
"loss": 1.4405,
"step": 90
},
{
"epoch": 0.32212389380530976,
"grad_norm": 0.1550597273050285,
"learning_rate": 3.967134684048607e-05,
"loss": 1.4192,
"step": 91
},
{
"epoch": 0.3256637168141593,
"grad_norm": 0.14307429007534203,
"learning_rate": 3.9437438692581e-05,
"loss": 1.3483,
"step": 92
},
{
"epoch": 0.3292035398230089,
"grad_norm": 0.11390632629452387,
"learning_rate": 3.920161866827889e-05,
"loss": 1.3705,
"step": 93
},
{
"epoch": 0.3327433628318584,
"grad_norm": 0.28577123310852803,
"learning_rate": 3.8963917996023245e-05,
"loss": 1.3899,
"step": 94
},
{
"epoch": 0.336283185840708,
"grad_norm": 0.24148914707858088,
"learning_rate": 3.8724368153302166e-05,
"loss": 1.3827,
"step": 95
},
{
"epoch": 0.3398230088495575,
"grad_norm": 0.12888492035814383,
"learning_rate": 3.8483000862479986e-05,
"loss": 1.3774,
"step": 96
},
{
"epoch": 0.3433628318584071,
"grad_norm": 0.27468578419570394,
"learning_rate": 3.823984808659641e-05,
"loss": 1.3675,
"step": 97
},
{
"epoch": 0.34690265486725663,
"grad_norm": 0.2522995801673515,
"learning_rate": 3.799494202513386e-05,
"loss": 1.3994,
"step": 98
},
{
"epoch": 0.3504424778761062,
"grad_norm": 0.19369450862361592,
"learning_rate": 3.77483151097534e-05,
"loss": 1.3676,
"step": 99
},
{
"epoch": 0.35398230088495575,
"grad_norm": 0.1364981691071651,
"learning_rate": 3.7500000000000003e-05,
"loss": 1.351,
"step": 100
},
{
"epoch": 0.35752212389380533,
"grad_norm": 0.19824556305622026,
"learning_rate": 3.7250029578977625e-05,
"loss": 1.3983,
"step": 101
},
{
"epoch": 0.36106194690265486,
"grad_norm": 0.258166693738195,
"learning_rate": 3.699843694899467e-05,
"loss": 1.3097,
"step": 102
},
{
"epoch": 0.36460176991150445,
"grad_norm": 0.15288998693054637,
"learning_rate": 3.674525542718035e-05,
"loss": 1.3191,
"step": 103
},
{
"epoch": 0.368141592920354,
"grad_norm": 0.1356079709541582,
"learning_rate": 3.64905185410728e-05,
"loss": 1.3446,
"step": 104
},
{
"epoch": 0.37168141592920356,
"grad_norm": 0.14784692543277705,
"learning_rate": 3.6234260024179033e-05,
"loss": 1.3885,
"step": 105
},
{
"epoch": 0.3752212389380531,
"grad_norm": 0.659660827895679,
"learning_rate": 3.597651381150795e-05,
"loss": 1.3731,
"step": 106
},
{
"epoch": 0.3787610619469027,
"grad_norm": 0.1340122038319879,
"learning_rate": 3.5717314035076355e-05,
"loss": 1.37,
"step": 107
},
{
"epoch": 0.3823008849557522,
"grad_norm": 0.17555880073923438,
"learning_rate": 3.545669501938913e-05,
"loss": 1.3771,
"step": 108
},
{
"epoch": 0.3858407079646018,
"grad_norm": 0.2749432492658443,
"learning_rate": 3.5194691276893755e-05,
"loss": 1.4188,
"step": 109
},
{
"epoch": 0.3893805309734513,
"grad_norm": 0.16422845586243,
"learning_rate": 3.4931337503410034e-05,
"loss": 1.3907,
"step": 110
},
{
"epoch": 0.3929203539823009,
"grad_norm": 0.29765257271852646,
"learning_rate": 3.466666857353547e-05,
"loss": 1.3313,
"step": 111
},
{
"epoch": 0.39646017699115044,
"grad_norm": 0.14967484015655694,
"learning_rate": 3.4400719536027056e-05,
"loss": 1.3716,
"step": 112
},
{
"epoch": 0.4,
"grad_norm": 0.19336396632327799,
"learning_rate": 3.413352560915988e-05,
"loss": 1.3418,
"step": 113
},
{
"epoch": 0.40353982300884955,
"grad_norm": 0.14974407429618758,
"learning_rate": 3.386512217606339e-05,
"loss": 1.3987,
"step": 114
},
{
"epoch": 0.40707964601769914,
"grad_norm": 0.13138145553916636,
"learning_rate": 3.359554478003579e-05,
"loss": 1.372,
"step": 115
},
{
"epoch": 0.41061946902654867,
"grad_norm": 0.15087834260387423,
"learning_rate": 3.332482911983721e-05,
"loss": 1.3418,
"step": 116
},
{
"epoch": 0.41415929203539825,
"grad_norm": 0.15396500859534987,
"learning_rate": 3.305301104496227e-05,
"loss": 1.339,
"step": 117
},
{
"epoch": 0.4176991150442478,
"grad_norm": 0.21912037148617267,
"learning_rate": 3.278012655089277e-05,
"loss": 1.3877,
"step": 118
},
{
"epoch": 0.42123893805309737,
"grad_norm": 0.16910612610739376,
"learning_rate": 3.250621177433097e-05,
"loss": 1.4116,
"step": 119
},
{
"epoch": 0.4247787610619469,
"grad_norm": 0.41976499689222374,
"learning_rate": 3.2231302988414194e-05,
"loss": 1.4068,
"step": 120
},
{
"epoch": 0.4283185840707965,
"grad_norm": 0.16685047210835632,
"learning_rate": 3.195543659791132e-05,
"loss": 1.3695,
"step": 121
},
{
"epoch": 0.431858407079646,
"grad_norm": 0.12533128029061685,
"learning_rate": 3.167864913440195e-05,
"loss": 1.3561,
"step": 122
},
{
"epoch": 0.4353982300884956,
"grad_norm": 0.12335769952527838,
"learning_rate": 3.140097725143868e-05,
"loss": 1.3248,
"step": 123
},
{
"epoch": 0.4389380530973451,
"grad_norm": 0.16480478639945306,
"learning_rate": 3.112245771969327e-05,
"loss": 1.3889,
"step": 124
},
{
"epoch": 0.4424778761061947,
"grad_norm": 0.12236451730713493,
"learning_rate": 3.084312742208728e-05,
"loss": 1.3578,
"step": 125
},
{
"epoch": 0.44601769911504424,
"grad_norm": 0.17612171732594184,
"learning_rate": 3.056302334890786e-05,
"loss": 1.4233,
"step": 126
},
{
"epoch": 0.4495575221238938,
"grad_norm": 0.1857045286019139,
"learning_rate": 3.028218259290932e-05,
"loss": 1.3877,
"step": 127
},
{
"epoch": 0.45309734513274336,
"grad_norm": 0.23845826617765498,
"learning_rate": 3.0000642344401113e-05,
"loss": 1.3951,
"step": 128
},
{
"epoch": 0.45663716814159294,
"grad_norm": 0.20335694786532152,
"learning_rate": 2.971843988632292e-05,
"loss": 1.3895,
"step": 129
},
{
"epoch": 0.46017699115044247,
"grad_norm": 0.16324247793893487,
"learning_rate": 2.9435612589307458e-05,
"loss": 1.3704,
"step": 130
},
{
"epoch": 0.46371681415929206,
"grad_norm": 0.22068197102315823,
"learning_rate": 2.9152197906731687e-05,
"loss": 1.3763,
"step": 131
},
{
"epoch": 0.4672566371681416,
"grad_norm": 0.13328424989654036,
"learning_rate": 2.886823336975703e-05,
"loss": 1.4059,
"step": 132
},
{
"epoch": 0.47079646017699117,
"grad_norm": 0.1853269220432702,
"learning_rate": 2.8583756582359338e-05,
"loss": 1.3553,
"step": 133
},
{
"epoch": 0.4743362831858407,
"grad_norm": 0.17477870642537813,
"learning_rate": 2.8298805216349167e-05,
"loss": 1.3538,
"step": 134
},
{
"epoch": 0.4778761061946903,
"grad_norm": 0.22830537461041098,
"learning_rate": 2.8013417006383076e-05,
"loss": 1.3057,
"step": 135
},
{
"epoch": 0.4814159292035398,
"grad_norm": 0.19882386738471552,
"learning_rate": 2.7727629744966695e-05,
"loss": 1.369,
"step": 136
},
{
"epoch": 0.4849557522123894,
"grad_norm": 0.18011895559282673,
"learning_rate": 2.7441481277449954e-05,
"loss": 1.394,
"step": 137
},
{
"epoch": 0.48849557522123893,
"grad_norm": 0.2320794287860154,
"learning_rate": 2.715500949701549e-05,
"loss": 1.3963,
"step": 138
},
{
"epoch": 0.4920353982300885,
"grad_norm": 0.2136574838846041,
"learning_rate": 2.686825233966061e-05,
"loss": 1.3882,
"step": 139
},
{
"epoch": 0.49557522123893805,
"grad_norm": 0.14317691765167115,
"learning_rate": 2.6581247779173635e-05,
"loss": 1.3702,
"step": 140
},
{
"epoch": 0.49911504424778763,
"grad_norm": 0.3110139016551138,
"learning_rate": 2.629403382210524e-05,
"loss": 1.362,
"step": 141
},
{
"epoch": 0.5026548672566372,
"grad_norm": 0.2905968311582129,
"learning_rate": 2.600664850273538e-05,
"loss": 1.377,
"step": 142
},
{
"epoch": 0.5061946902654867,
"grad_norm": 0.23781505531603292,
"learning_rate": 2.5719129878036686e-05,
"loss": 1.3922,
"step": 143
},
{
"epoch": 0.5097345132743363,
"grad_norm": 0.17830734890592495,
"learning_rate": 2.5431516022634715e-05,
"loss": 1.3797,
"step": 144
},
{
"epoch": 0.5132743362831859,
"grad_norm": 0.1830712952884457,
"learning_rate": 2.5143845023765943e-05,
"loss": 1.3441,
"step": 145
},
{
"epoch": 0.5168141592920354,
"grad_norm": 0.2816874187150034,
"learning_rate": 2.4856154976234063e-05,
"loss": 1.3733,
"step": 146
},
{
"epoch": 0.5203539823008849,
"grad_norm": 0.17322438937284276,
"learning_rate": 2.456848397736529e-05,
"loss": 1.4381,
"step": 147
},
{
"epoch": 0.5238938053097345,
"grad_norm": 0.19984978304386464,
"learning_rate": 2.4280870121963323e-05,
"loss": 1.385,
"step": 148
},
{
"epoch": 0.5274336283185841,
"grad_norm": 0.24003529120628253,
"learning_rate": 2.399335149726463e-05,
"loss": 1.3378,
"step": 149
},
{
"epoch": 0.5309734513274337,
"grad_norm": 0.1863666969548861,
"learning_rate": 2.370596617789476e-05,
"loss": 1.3711,
"step": 150
},
{
"epoch": 0.5345132743362832,
"grad_norm": 0.15186994434245493,
"learning_rate": 2.3418752220826364e-05,
"loss": 1.3917,
"step": 151
},
{
"epoch": 0.5380530973451327,
"grad_norm": 0.18488533185567366,
"learning_rate": 2.3131747660339394e-05,
"loss": 1.3352,
"step": 152
},
{
"epoch": 0.5415929203539823,
"grad_norm": 0.14709637692023,
"learning_rate": 2.2844990502984513e-05,
"loss": 1.3606,
"step": 153
},
{
"epoch": 0.5451327433628319,
"grad_norm": 0.1929509569633954,
"learning_rate": 2.2558518722550048e-05,
"loss": 1.3675,
"step": 154
},
{
"epoch": 0.5486725663716814,
"grad_norm": 0.21437689825380304,
"learning_rate": 2.2272370255033314e-05,
"loss": 1.3784,
"step": 155
},
{
"epoch": 0.552212389380531,
"grad_norm": 0.23426391248511405,
"learning_rate": 2.1986582993616926e-05,
"loss": 1.393,
"step": 156
},
{
"epoch": 0.5557522123893806,
"grad_norm": 0.15338867979069432,
"learning_rate": 2.1701194783650846e-05,
"loss": 1.4128,
"step": 157
},
{
"epoch": 0.5592920353982301,
"grad_norm": 0.19084022015663935,
"learning_rate": 2.1416243417640668e-05,
"loss": 1.349,
"step": 158
},
{
"epoch": 0.5628318584070796,
"grad_norm": 0.18723374415049818,
"learning_rate": 2.1131766630242966e-05,
"loss": 1.3617,
"step": 159
},
{
"epoch": 0.5663716814159292,
"grad_norm": 0.1843740823402695,
"learning_rate": 2.084780209326831e-05,
"loss": 1.3863,
"step": 160
},
{
"epoch": 0.5699115044247788,
"grad_norm": 0.13776831606484746,
"learning_rate": 2.0564387410692544e-05,
"loss": 1.3505,
"step": 161
},
{
"epoch": 0.5734513274336284,
"grad_norm": 0.2185855169811869,
"learning_rate": 2.0281560113677086e-05,
"loss": 1.357,
"step": 162
},
{
"epoch": 0.5769911504424778,
"grad_norm": 0.1479496112029382,
"learning_rate": 1.9999357655598893e-05,
"loss": 1.3996,
"step": 163
},
{
"epoch": 0.5805309734513274,
"grad_norm": 0.14230152435027588,
"learning_rate": 1.971781740709068e-05,
"loss": 1.4579,
"step": 164
},
{
"epoch": 0.584070796460177,
"grad_norm": 0.24415411312146668,
"learning_rate": 1.9436976651092144e-05,
"loss": 1.3951,
"step": 165
},
{
"epoch": 0.5876106194690266,
"grad_norm": 0.1531379017839431,
"learning_rate": 1.915687257791273e-05,
"loss": 1.4066,
"step": 166
},
{
"epoch": 0.5911504424778761,
"grad_norm": 0.12572602144186015,
"learning_rate": 1.8877542280306728e-05,
"loss": 1.4049,
"step": 167
},
{
"epoch": 0.5946902654867257,
"grad_norm": 0.28651665368702545,
"learning_rate": 1.8599022748561325e-05,
"loss": 1.3875,
"step": 168
},
{
"epoch": 0.5982300884955752,
"grad_norm": 0.2853818788671804,
"learning_rate": 1.8321350865598057e-05,
"loss": 1.3547,
"step": 169
},
{
"epoch": 0.6017699115044248,
"grad_norm": 0.20397260857195473,
"learning_rate": 1.8044563402088684e-05,
"loss": 1.362,
"step": 170
},
{
"epoch": 0.6053097345132743,
"grad_norm": 0.22013691384673548,
"learning_rate": 1.776869701158581e-05,
"loss": 1.3619,
"step": 171
},
{
"epoch": 0.6088495575221239,
"grad_norm": 0.208809460722862,
"learning_rate": 1.7493788225669027e-05,
"loss": 1.3896,
"step": 172
},
{
"epoch": 0.6123893805309735,
"grad_norm": 0.4200222837373971,
"learning_rate": 1.7219873449107233e-05,
"loss": 1.3647,
"step": 173
},
{
"epoch": 0.6159292035398231,
"grad_norm": 0.25501118855440547,
"learning_rate": 1.694698895503774e-05,
"loss": 1.4591,
"step": 174
},
{
"epoch": 0.6194690265486725,
"grad_norm": 0.1600911540410192,
"learning_rate": 1.66751708801628e-05,
"loss": 1.3436,
"step": 175
},
{
"epoch": 0.6230088495575221,
"grad_norm": 0.16733007446645096,
"learning_rate": 1.6404455219964203e-05,
"loss": 1.3455,
"step": 176
},
{
"epoch": 0.6265486725663717,
"grad_norm": 0.1899434164816737,
"learning_rate": 1.613487782393661e-05,
"loss": 1.3823,
"step": 177
},
{
"epoch": 0.6300884955752213,
"grad_norm": 0.20941491888387903,
"learning_rate": 1.5866474390840125e-05,
"loss": 1.3896,
"step": 178
},
{
"epoch": 0.6336283185840708,
"grad_norm": 0.14977102391347238,
"learning_rate": 1.5599280463972953e-05,
"loss": 1.3498,
"step": 179
},
{
"epoch": 0.6371681415929203,
"grad_norm": 0.14005156844353428,
"learning_rate": 1.533333142646453e-05,
"loss": 1.3439,
"step": 180
},
{
"epoch": 0.6407079646017699,
"grad_norm": 0.24243476683329468,
"learning_rate": 1.5068662496589975e-05,
"loss": 1.4248,
"step": 181
},
{
"epoch": 0.6442477876106195,
"grad_norm": 0.13257433719785397,
"learning_rate": 1.4805308723106248e-05,
"loss": 1.3372,
"step": 182
},
{
"epoch": 0.647787610619469,
"grad_norm": 0.1360543958451605,
"learning_rate": 1.4543304980610878e-05,
"loss": 1.3502,
"step": 183
},
{
"epoch": 0.6513274336283186,
"grad_norm": 0.171896129628506,
"learning_rate": 1.4282685964923642e-05,
"loss": 1.3754,
"step": 184
},
{
"epoch": 0.6548672566371682,
"grad_norm": 0.14433841954600432,
"learning_rate": 1.4023486188492052e-05,
"loss": 1.3855,
"step": 185
},
{
"epoch": 0.6584070796460177,
"grad_norm": 0.27499627341386756,
"learning_rate": 1.3765739975820962e-05,
"loss": 1.3843,
"step": 186
},
{
"epoch": 0.6619469026548672,
"grad_norm": 0.1832518338825052,
"learning_rate": 1.3509481458927209e-05,
"loss": 1.4067,
"step": 187
},
{
"epoch": 0.6654867256637168,
"grad_norm": 0.17627269284963698,
"learning_rate": 1.3254744572819658e-05,
"loss": 1.358,
"step": 188
},
{
"epoch": 0.6690265486725664,
"grad_norm": 0.12972103232452611,
"learning_rate": 1.3001563051005347e-05,
"loss": 1.3372,
"step": 189
},
{
"epoch": 0.672566371681416,
"grad_norm": 0.17778610418067095,
"learning_rate": 1.2749970421022381e-05,
"loss": 1.4093,
"step": 190
},
{
"epoch": 0.6761061946902654,
"grad_norm": 0.17366530190012722,
"learning_rate": 1.2500000000000006e-05,
"loss": 1.4115,
"step": 191
},
{
"epoch": 0.679646017699115,
"grad_norm": 0.2338226961364743,
"learning_rate": 1.225168489024661e-05,
"loss": 1.3919,
"step": 192
},
{
"epoch": 0.6831858407079646,
"grad_norm": 0.12473946880268197,
"learning_rate": 1.2005057974866135e-05,
"loss": 1.3683,
"step": 193
},
{
"epoch": 0.6867256637168142,
"grad_norm": 0.29788214242861744,
"learning_rate": 1.1760151913403583e-05,
"loss": 1.3742,
"step": 194
},
{
"epoch": 0.6902654867256637,
"grad_norm": 0.25161388399332285,
"learning_rate": 1.1516999137520023e-05,
"loss": 1.3436,
"step": 195
},
{
"epoch": 0.6938053097345133,
"grad_norm": 0.20275718493470796,
"learning_rate": 1.127563184669784e-05,
"loss": 1.3785,
"step": 196
},
{
"epoch": 0.6973451327433628,
"grad_norm": 0.2369309742864934,
"learning_rate": 1.1036082003976759e-05,
"loss": 1.413,
"step": 197
},
{
"epoch": 0.7008849557522124,
"grad_norm": 0.1403641760235718,
"learning_rate": 1.0798381331721109e-05,
"loss": 1.406,
"step": 198
},
{
"epoch": 0.7044247787610619,
"grad_norm": 0.27046989828036216,
"learning_rate": 1.0562561307419005e-05,
"loss": 1.3781,
"step": 199
},
{
"epoch": 0.7079646017699115,
"grad_norm": 0.16492162334449362,
"learning_rate": 1.032865315951394e-05,
"loss": 1.3735,
"step": 200
},
{
"epoch": 0.7115044247787611,
"grad_norm": 0.2832750784253182,
"learning_rate": 1.0096687863269368e-05,
"loss": 1.4337,
"step": 201
},
{
"epoch": 0.7150442477876107,
"grad_norm": 0.17170795646186768,
"learning_rate": 9.866696136666798e-06,
"loss": 1.4072,
"step": 202
},
{
"epoch": 0.7185840707964601,
"grad_norm": 0.13847113237321784,
"learning_rate": 9.638708436337976e-06,
"loss": 1.4178,
"step": 203
},
{
"epoch": 0.7221238938053097,
"grad_norm": 0.17526884575066398,
"learning_rate": 9.412754953531663e-06,
"loss": 1.3727,
"step": 204
},
{
"epoch": 0.7256637168141593,
"grad_norm": 0.11417158087114691,
"learning_rate": 9.18886561011557e-06,
"loss": 1.3738,
"step": 205
},
{
"epoch": 0.7292035398230089,
"grad_norm": 0.16272670657433552,
"learning_rate": 8.967070054613949e-06,
"loss": 1.3713,
"step": 206
},
{
"epoch": 0.7327433628318584,
"grad_norm": 0.3092931883528065,
"learning_rate": 8.747397658281395e-06,
"loss": 1.3996,
"step": 207
},
{
"epoch": 0.736283185840708,
"grad_norm": 0.1935926337883175,
"learning_rate": 8.529877511213357e-06,
"loss": 1.3924,
"step": 208
},
{
"epoch": 0.7398230088495575,
"grad_norm": 0.2888163198022463,
"learning_rate": 8.314538418493892e-06,
"loss": 1.3962,
"step": 209
},
{
"epoch": 0.7433628318584071,
"grad_norm": 0.2037500223476182,
"learning_rate": 8.101408896381141e-06,
"loss": 1.4393,
"step": 210
},
{
"epoch": 0.7469026548672566,
"grad_norm": 0.20365280760954532,
"learning_rate": 7.890517168531086e-06,
"loss": 1.432,
"step": 211
},
{
"epoch": 0.7504424778761062,
"grad_norm": 0.245973715435359,
"learning_rate": 7.681891162260015e-06,
"loss": 1.4125,
"step": 212
},
{
"epoch": 0.7539823008849558,
"grad_norm": 0.18396574884320455,
"learning_rate": 7.475558504846264e-06,
"loss": 1.393,
"step": 213
},
{
"epoch": 0.7575221238938054,
"grad_norm": 0.14343988163912846,
"learning_rate": 7.271546519871672e-06,
"loss": 1.3735,
"step": 214
},
{
"epoch": 0.7610619469026548,
"grad_norm": 0.19050518039826148,
"learning_rate": 7.0698822236032554e-06,
"loss": 1.3552,
"step": 215
},
{
"epoch": 0.7646017699115044,
"grad_norm": 0.16749967589709347,
"learning_rate": 6.8705923214155945e-06,
"loss": 1.3955,
"step": 216
},
{
"epoch": 0.768141592920354,
"grad_norm": 0.20340220156851388,
"learning_rate": 6.673703204254347e-06,
"loss": 1.4241,
"step": 217
},
{
"epoch": 0.7716814159292036,
"grad_norm": 0.2774385020407223,
"learning_rate": 6.4792409451414735e-06,
"loss": 1.3569,
"step": 218
},
{
"epoch": 0.7752212389380531,
"grad_norm": 0.16257195802961893,
"learning_rate": 6.28723129572247e-06,
"loss": 1.4044,
"step": 219
},
{
"epoch": 0.7787610619469026,
"grad_norm": 0.19178231846523627,
"learning_rate": 6.097699682856275e-06,
"loss": 1.4115,
"step": 220
},
{
"epoch": 0.7823008849557522,
"grad_norm": 0.31772086840796093,
"learning_rate": 5.910671205248045e-06,
"loss": 1.3743,
"step": 221
},
{
"epoch": 0.7858407079646018,
"grad_norm": 0.3344955593081661,
"learning_rate": 5.72617063012551e-06,
"loss": 1.4144,
"step": 222
},
{
"epoch": 0.7893805309734513,
"grad_norm": 0.41331884709239125,
"learning_rate": 5.544222389959164e-06,
"loss": 1.4096,
"step": 223
},
{
"epoch": 0.7929203539823009,
"grad_norm": 0.31601068127978066,
"learning_rate": 5.3648505792267825e-06,
"loss": 1.3797,
"step": 224
},
{
"epoch": 0.7964601769911505,
"grad_norm": 0.12615271990952526,
"learning_rate": 5.188078951222744e-06,
"loss": 1.4181,
"step": 225
},
{
"epoch": 0.8,
"grad_norm": 0.23681972723294573,
"learning_rate": 5.013930914912476e-06,
"loss": 1.3862,
"step": 226
},
{
"epoch": 0.8035398230088495,
"grad_norm": 0.23506910056480054,
"learning_rate": 4.842429531832529e-06,
"loss": 1.406,
"step": 227
},
{
"epoch": 0.8070796460176991,
"grad_norm": 0.1603225379263149,
"learning_rate": 4.673597513036684e-06,
"loss": 1.3943,
"step": 228
},
{
"epoch": 0.8106194690265487,
"grad_norm": 0.1750016849687585,
"learning_rate": 4.507457216088396e-06,
"loss": 1.3752,
"step": 229
},
{
"epoch": 0.8141592920353983,
"grad_norm": 0.33015856465351145,
"learning_rate": 4.344030642100133e-06,
"loss": 1.4204,
"step": 230
},
{
"epoch": 0.8176991150442477,
"grad_norm": 0.17198765892059484,
"learning_rate": 4.183339432819844e-06,
"loss": 1.3832,
"step": 231
},
{
"epoch": 0.8212389380530973,
"grad_norm": 0.2494399367301955,
"learning_rate": 4.025404867765103e-06,
"loss": 1.3654,
"step": 232
},
{
"epoch": 0.8247787610619469,
"grad_norm": 0.15522718151820233,
"learning_rate": 3.8702478614051355e-06,
"loss": 1.3921,
"step": 233
},
{
"epoch": 0.8283185840707965,
"grad_norm": 0.13980066687088336,
"learning_rate": 3.717888960391222e-06,
"loss": 1.3558,
"step": 234
},
{
"epoch": 0.831858407079646,
"grad_norm": 0.15487363597370576,
"learning_rate": 3.5683483408358307e-06,
"loss": 1.4594,
"step": 235
},
{
"epoch": 0.8353982300884956,
"grad_norm": 0.1602659742055085,
"learning_rate": 3.4216458056407775e-06,
"loss": 1.4266,
"step": 236
},
{
"epoch": 0.8389380530973451,
"grad_norm": 0.19010269637116808,
"learning_rate": 3.2778007818748376e-06,
"loss": 1.4116,
"step": 237
},
{
"epoch": 0.8424778761061947,
"grad_norm": 0.13278314739658872,
"learning_rate": 3.136832318201119e-06,
"loss": 1.4198,
"step": 238
},
{
"epoch": 0.8460176991150442,
"grad_norm": 0.175606952118794,
"learning_rate": 2.998759082354538e-06,
"loss": 1.3803,
"step": 239
},
{
"epoch": 0.8495575221238938,
"grad_norm": 0.2481514536808128,
"learning_rate": 2.8635993586697553e-06,
"loss": 1.4044,
"step": 240
},
{
"epoch": 0.8530973451327434,
"grad_norm": 0.282431156272049,
"learning_rate": 2.7313710456598667e-06,
"loss": 1.4294,
"step": 241
},
{
"epoch": 0.856637168141593,
"grad_norm": 0.2698384131452036,
"learning_rate": 2.602091653646205e-06,
"loss": 1.4682,
"step": 242
},
{
"epoch": 0.8601769911504424,
"grad_norm": 0.177268179533375,
"learning_rate": 2.475778302439524e-06,
"loss": 1.4015,
"step": 243
},
{
"epoch": 0.863716814159292,
"grad_norm": 0.14802302054659544,
"learning_rate": 2.3524477190729144e-06,
"loss": 1.4131,
"step": 244
},
{
"epoch": 0.8672566371681416,
"grad_norm": 0.2718087485947451,
"learning_rate": 2.232116235586737e-06,
"loss": 1.3715,
"step": 245
},
{
"epoch": 0.8707964601769912,
"grad_norm": 0.28440166381934995,
"learning_rate": 2.1147997868658425e-06,
"loss": 1.4285,
"step": 246
},
{
"epoch": 0.8743362831858407,
"grad_norm": 0.16098873943678169,
"learning_rate": 2.0005139085293945e-06,
"loss": 1.3959,
"step": 247
},
{
"epoch": 0.8778761061946903,
"grad_norm": 0.14588882919671864,
"learning_rate": 1.8892737348735812e-06,
"loss": 1.4136,
"step": 248
},
{
"epoch": 0.8814159292035398,
"grad_norm": 0.16837051864441668,
"learning_rate": 1.7810939968674418e-06,
"loss": 1.3834,
"step": 249
},
{
"epoch": 0.8849557522123894,
"grad_norm": 0.13634066486185534,
"learning_rate": 1.6759890202021289e-06,
"loss": 1.4102,
"step": 250
},
{
"epoch": 0.8884955752212389,
"grad_norm": 0.19189176615372466,
"learning_rate": 1.5739727233938239e-06,
"loss": 1.3727,
"step": 251
},
{
"epoch": 0.8920353982300885,
"grad_norm": 0.2130365343341551,
"learning_rate": 1.4750586159405915e-06,
"loss": 1.3547,
"step": 252
},
{
"epoch": 0.8955752212389381,
"grad_norm": 0.14014293619438611,
"learning_rate": 1.3792597965333581e-06,
"loss": 1.4127,
"step": 253
},
{
"epoch": 0.8991150442477877,
"grad_norm": 0.19706946980035872,
"learning_rate": 1.286588951321363e-06,
"loss": 1.4366,
"step": 254
},
{
"epoch": 0.9026548672566371,
"grad_norm": 0.16141347931533398,
"learning_rate": 1.1970583522321472e-06,
"loss": 1.4402,
"step": 255
},
{
"epoch": 0.9061946902654867,
"grad_norm": 0.16886776303202308,
"learning_rate": 1.1106798553464804e-06,
"loss": 1.4466,
"step": 256
},
{
"epoch": 0.9097345132743363,
"grad_norm": 0.1289021524060537,
"learning_rate": 1.0274648993283093e-06,
"loss": 1.4022,
"step": 257
},
{
"epoch": 0.9132743362831859,
"grad_norm": 0.2166369005456705,
"learning_rate": 9.474245039099882e-07,
"loss": 1.4273,
"step": 258
},
{
"epoch": 0.9168141592920354,
"grad_norm": 0.2055233570255993,
"learning_rate": 8.705692684329969e-07,
"loss": 1.4447,
"step": 259
},
{
"epoch": 0.9203539823008849,
"grad_norm": 0.24003258637066974,
"learning_rate": 7.969093704443209e-07,
"loss": 1.395,
"step": 260
},
{
"epoch": 0.9238938053097345,
"grad_norm": 0.24643248474581458,
"learning_rate": 7.264545643486997e-07,
"loss": 1.4292,
"step": 261
},
{
"epoch": 0.9274336283185841,
"grad_norm": 0.14588590756720407,
"learning_rate": 6.592141801168933e-07,
"loss": 1.4206,
"step": 262
},
{
"epoch": 0.9309734513274336,
"grad_norm": 0.2098810341079189,
"learning_rate": 5.951971220501645e-07,
"loss": 1.4003,
"step": 263
},
{
"epoch": 0.9345132743362832,
"grad_norm": 0.2134185708217152,
"learning_rate": 5.344118676011172e-07,
"loss": 1.4529,
"step": 264
},
{
"epoch": 0.9380530973451328,
"grad_norm": 0.1901396806615015,
"learning_rate": 4.768664662510941e-07,
"loss": 1.4181,
"step": 265
},
{
"epoch": 0.9415929203539823,
"grad_norm": 0.1740201436765075,
"learning_rate": 4.225685384441902e-07,
"loss": 1.4086,
"step": 266
},
{
"epoch": 0.9451327433628318,
"grad_norm": 0.14602401119980518,
"learning_rate": 3.71525274578155e-07,
"loss": 1.4262,
"step": 267
},
{
"epoch": 0.9486725663716814,
"grad_norm": 0.2295789342688302,
"learning_rate": 3.237434340521789e-07,
"loss": 1.4068,
"step": 268
},
{
"epoch": 0.952212389380531,
"grad_norm": 0.14106911620373858,
"learning_rate": 2.7922934437178695e-07,
"loss": 1.447,
"step": 269
},
{
"epoch": 0.9557522123893806,
"grad_norm": 0.1862366502924384,
"learning_rate": 2.3798890031092037e-07,
"loss": 1.4125,
"step": 270
},
{
"epoch": 0.95929203539823,
"grad_norm": 0.17289316045885814,
"learning_rate": 2.0002756313132475e-07,
"loss": 1.4104,
"step": 271
},
{
"epoch": 0.9628318584070796,
"grad_norm": 0.17621766011633694,
"learning_rate": 1.65350359859337e-07,
"loss": 1.3574,
"step": 272
},
{
"epoch": 0.9663716814159292,
"grad_norm": 0.14324064947353912,
"learning_rate": 1.3396188262018438e-07,
"loss": 1.4327,
"step": 273
},
{
"epoch": 0.9699115044247788,
"grad_norm": 0.21936984291113176,
"learning_rate": 1.0586628802987108e-07,
"loss": 1.4428,
"step": 274
},
{
"epoch": 0.9734513274336283,
"grad_norm": 0.17815388829010623,
"learning_rate": 8.106729664475176e-08,
"loss": 1.43,
"step": 275
},
{
"epoch": 0.9769911504424779,
"grad_norm": 0.19414185063188497,
"learning_rate": 5.956819246881185e-08,
"loss": 1.4066,
"step": 276
},
{
"epoch": 0.9805309734513274,
"grad_norm": 0.20422480602927082,
"learning_rate": 4.1371822518804224e-08,
"loss": 1.4236,
"step": 277
},
{
"epoch": 0.984070796460177,
"grad_norm": 0.1757997100349613,
"learning_rate": 2.648059644723144e-08,
"loss": 1.448,
"step": 278
},
{
"epoch": 0.9876106194690265,
"grad_norm": 0.22895459311601085,
"learning_rate": 1.4896486223239802e-08,
"loss": 1.4495,
"step": 279
},
{
"epoch": 0.9911504424778761,
"grad_norm": 0.40857682028655,
"learning_rate": 6.621025871481057e-09,
"loss": 1.4162,
"step": 280
},
{
"epoch": 0.9946902654867257,
"grad_norm": 0.1575487199309586,
"learning_rate": 1.6553112689776662e-09,
"loss": 1.4067,
"step": 281
},
{
"epoch": 0.9982300884955753,
"grad_norm": 0.1926481633107631,
"learning_rate": 0.0,
"loss": 1.4527,
"step": 282
},
{
"epoch": 0.9982300884955753,
"step": 282,
"total_flos": 1677476333879296.0,
"train_loss": 0.0,
"train_runtime": 0.0106,
"train_samples_per_second": 13689138.49,
"train_steps_per_second": 26706.264
}
],
"logging_steps": 1.0,
"max_steps": 282,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1677476333879296.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}