9b-45-1 / trainer_state.json
furproxy's picture
Upload folder using huggingface_hub
433e136 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.954887218045113,
"eval_steps": 500,
"global_step": 260,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.015037593984962405,
"grad_norm": 0.4758685231208801,
"learning_rate": 7.407407407407407e-07,
"loss": 1.9719613790512085,
"step": 2
},
{
"epoch": 0.03007518796992481,
"grad_norm": 1.2873609066009521,
"learning_rate": 2.222222222222222e-06,
"loss": 2.2840397357940674,
"step": 4
},
{
"epoch": 0.045112781954887216,
"grad_norm": 0.4226410984992981,
"learning_rate": 3.7037037037037037e-06,
"loss": 2.0337564945220947,
"step": 6
},
{
"epoch": 0.06015037593984962,
"grad_norm": 0.20389820635318756,
"learning_rate": 5.185185185185185e-06,
"loss": 1.938320517539978,
"step": 8
},
{
"epoch": 0.07518796992481203,
"grad_norm": 1.1121097803115845,
"learning_rate": 6.666666666666667e-06,
"loss": 1.9521509408950806,
"step": 10
},
{
"epoch": 0.09022556390977443,
"grad_norm": 1.0452852249145508,
"learning_rate": 8.148148148148148e-06,
"loss": 2.3038036823272705,
"step": 12
},
{
"epoch": 0.10526315789473684,
"grad_norm": 0.17127464711666107,
"learning_rate": 9.62962962962963e-06,
"loss": 1.9031141996383667,
"step": 14
},
{
"epoch": 0.12030075187969924,
"grad_norm": 1.0378482341766357,
"learning_rate": 1.1111111111111113e-05,
"loss": 2.545740842819214,
"step": 16
},
{
"epoch": 0.13533834586466165,
"grad_norm": 1.3770403861999512,
"learning_rate": 1.2592592592592593e-05,
"loss": 3.2497336864471436,
"step": 18
},
{
"epoch": 0.15037593984962405,
"grad_norm": 0.6125630140304565,
"learning_rate": 1.4074074074074075e-05,
"loss": 1.7776570320129395,
"step": 20
},
{
"epoch": 0.16541353383458646,
"grad_norm": 0.6343645453453064,
"learning_rate": 1.555555555555556e-05,
"loss": 1.9716706275939941,
"step": 22
},
{
"epoch": 0.18045112781954886,
"grad_norm": 0.6397086977958679,
"learning_rate": 1.7037037037037038e-05,
"loss": 1.9381436109542847,
"step": 24
},
{
"epoch": 0.19548872180451127,
"grad_norm": 1.0100289583206177,
"learning_rate": 1.851851851851852e-05,
"loss": 1.9333921670913696,
"step": 26
},
{
"epoch": 0.21052631578947367,
"grad_norm": 0.6244422793388367,
"learning_rate": 2e-05,
"loss": 1.7134058475494385,
"step": 28
},
{
"epoch": 0.22556390977443608,
"grad_norm": 1.4532567262649536,
"learning_rate": 1.9994079505294254e-05,
"loss": 2.0251219272613525,
"step": 30
},
{
"epoch": 0.24060150375939848,
"grad_norm": 0.5191428661346436,
"learning_rate": 1.9976326268767035e-05,
"loss": 1.578896164894104,
"step": 32
},
{
"epoch": 0.2556390977443609,
"grad_norm": 0.2822546064853668,
"learning_rate": 1.994676502169901e-05,
"loss": 1.5274699926376343,
"step": 34
},
{
"epoch": 0.2706766917293233,
"grad_norm": 0.26378244161605835,
"learning_rate": 1.9905436944609424e-05,
"loss": 1.510546326637268,
"step": 36
},
{
"epoch": 0.2857142857142857,
"grad_norm": 2.3758437633514404,
"learning_rate": 1.9852399609889242e-05,
"loss": 2.110567331314087,
"step": 38
},
{
"epoch": 0.3007518796992481,
"grad_norm": 0.419758677482605,
"learning_rate": 1.9787726901599502e-05,
"loss": 1.1862285137176514,
"step": 40
},
{
"epoch": 0.3157894736842105,
"grad_norm": 0.24496647715568542,
"learning_rate": 1.9711508912546566e-05,
"loss": 1.440342903137207,
"step": 42
},
{
"epoch": 0.3308270676691729,
"grad_norm": 0.3002743721008301,
"learning_rate": 1.9623851818777652e-05,
"loss": 1.0962785482406616,
"step": 44
},
{
"epoch": 0.3458646616541353,
"grad_norm": 0.11880263686180115,
"learning_rate": 1.9524877731671482e-05,
"loss": 1.493391513824463,
"step": 46
},
{
"epoch": 0.3609022556390977,
"grad_norm": 1.7871476411819458,
"learning_rate": 1.941472452783011e-05,
"loss": 1.2433573007583618,
"step": 48
},
{
"epoch": 0.37593984962406013,
"grad_norm": 0.2921432852745056,
"learning_rate": 1.9293545657008865e-05,
"loss": 1.1390293836593628,
"step": 50
},
{
"epoch": 0.39097744360902253,
"grad_norm": 0.12673601508140564,
"learning_rate": 1.9161509928352017e-05,
"loss": 1.0903499126434326,
"step": 52
},
{
"epoch": 0.40601503759398494,
"grad_norm": 0.1648157387971878,
"learning_rate": 1.901880127523192e-05,
"loss": 1.1544872522354126,
"step": 54
},
{
"epoch": 0.42105263157894735,
"grad_norm": 0.08962542563676834,
"learning_rate": 1.886561849901922e-05,
"loss": 1.0822100639343262,
"step": 56
},
{
"epoch": 0.43609022556390975,
"grad_norm": 0.724937379360199,
"learning_rate": 1.870217499214111e-05,
"loss": 1.0416043996810913,
"step": 58
},
{
"epoch": 0.45112781954887216,
"grad_norm": 0.19841401278972626,
"learning_rate": 1.8528698440813397e-05,
"loss": 1.1815505027770996,
"step": 60
},
{
"epoch": 0.46616541353383456,
"grad_norm": 0.2616511285305023,
"learning_rate": 1.8345430507860478e-05,
"loss": 0.9047210812568665,
"step": 62
},
{
"epoch": 0.48120300751879697,
"grad_norm": 0.22353343665599823,
"learning_rate": 1.8152626496065128e-05,
"loss": 1.201892614364624,
"step": 64
},
{
"epoch": 0.49624060150375937,
"grad_norm": 0.6634237766265869,
"learning_rate": 1.7950554992517014e-05,
"loss": 1.1795772314071655,
"step": 66
},
{
"epoch": 0.5112781954887218,
"grad_norm": 0.13962750136852264,
"learning_rate": 1.7739497494455412e-05,
"loss": 1.3534270524978638,
"step": 68
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.665572464466095,
"learning_rate": 1.7519748017127354e-05,
"loss": 1.125345230102539,
"step": 70
},
{
"epoch": 0.5413533834586466,
"grad_norm": 0.179820716381073,
"learning_rate": 1.729161268420746e-05,
"loss": 0.8090606331825256,
"step": 72
},
{
"epoch": 0.556390977443609,
"grad_norm": 0.11726067215204239,
"learning_rate": 1.7055409301350013e-05,
"loss": 0.9213717579841614,
"step": 74
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.1627010852098465,
"learning_rate": 1.681146691346742e-05,
"loss": 1.0821505784988403,
"step": 76
},
{
"epoch": 0.5864661654135338,
"grad_norm": 0.1170893982052803,
"learning_rate": 1.6560125346351663e-05,
"loss": 1.300316333770752,
"step": 78
},
{
"epoch": 0.6015037593984962,
"grad_norm": 0.5334263443946838,
"learning_rate": 1.6301734733277442e-05,
"loss": 0.6484270691871643,
"step": 80
},
{
"epoch": 0.6165413533834586,
"grad_norm": 0.101778045296669,
"learning_rate": 1.603665502724633e-05,
"loss": 1.2577356100082397,
"step": 82
},
{
"epoch": 0.631578947368421,
"grad_norm": 0.1670590043067932,
"learning_rate": 1.576525549955156e-05,
"loss": 1.3543009757995605,
"step": 84
},
{
"epoch": 0.6466165413533834,
"grad_norm": 0.20087628066539764,
"learning_rate": 1.548791422536178e-05,
"loss": 0.9327285885810852,
"step": 86
},
{
"epoch": 0.6616541353383458,
"grad_norm": 0.23011328279972076,
"learning_rate": 1.5205017557040656e-05,
"loss": 1.1237722635269165,
"step": 88
},
{
"epoch": 0.6766917293233082,
"grad_norm": 0.159201517701149,
"learning_rate": 1.4916959585935732e-05,
"loss": 1.1964070796966553,
"step": 90
},
{
"epoch": 0.6917293233082706,
"grad_norm": 0.44444212317466736,
"learning_rate": 1.4624141593386507e-05,
"loss": 1.0308165550231934,
"step": 92
},
{
"epoch": 0.706766917293233,
"grad_norm": 0.22338902950286865,
"learning_rate": 1.4326971491716427e-05,
"loss": 0.9982426762580872,
"step": 94
},
{
"epoch": 0.7218045112781954,
"grad_norm": 0.2096806913614273,
"learning_rate": 1.402586325598752e-05,
"loss": 1.3940727710723877,
"step": 96
},
{
"epoch": 0.7368421052631579,
"grad_norm": 0.21466873586177826,
"learning_rate": 1.3721236347309314e-05,
"loss": 1.1801196336746216,
"step": 98
},
{
"epoch": 0.7518796992481203,
"grad_norm": 0.30590999126434326,
"learning_rate": 1.3413515128505363e-05,
"loss": 0.6430416703224182,
"step": 100
},
{
"epoch": 0.7669172932330827,
"grad_norm": 0.13401509821414948,
"learning_rate": 1.3103128272951363e-05,
"loss": 1.3783133029937744,
"step": 102
},
{
"epoch": 0.7819548872180451,
"grad_norm": 0.12506185472011566,
"learning_rate": 1.2790508167408509e-05,
"loss": 0.9889219403266907,
"step": 104
},
{
"epoch": 0.7969924812030075,
"grad_norm": 0.22044184803962708,
"learning_rate": 1.2476090309683804e-05,
"loss": 0.6871194243431091,
"step": 106
},
{
"epoch": 0.8120300751879699,
"grad_norm": 0.18642327189445496,
"learning_rate": 1.2160312701956553e-05,
"loss": 1.0068978071212769,
"step": 108
},
{
"epoch": 0.8270676691729323,
"grad_norm": 0.5397107601165771,
"learning_rate": 1.1843615240616111e-05,
"loss": 0.8988245725631714,
"step": 110
},
{
"epoch": 0.8421052631578947,
"grad_norm": 0.23860491812229156,
"learning_rate": 1.1526439103460874e-05,
"loss": 0.7688661813735962,
"step": 112
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.1502920240163803,
"learning_rate": 1.120922613511221e-05,
"loss": 1.0024021863937378,
"step": 114
},
{
"epoch": 0.8721804511278195,
"grad_norm": 0.16608496010303497,
"learning_rate": 1.0892418231499461e-05,
"loss": 1.4191375970840454,
"step": 116
},
{
"epoch": 0.8872180451127819,
"grad_norm": 0.17546993494033813,
"learning_rate": 1.057645672427347e-05,
"loss": 1.073761224746704,
"step": 118
},
{
"epoch": 0.9022556390977443,
"grad_norm": 5.0469841957092285,
"learning_rate": 1.0261781766006174e-05,
"loss": 0.9056495428085327,
"step": 120
},
{
"epoch": 0.9172932330827067,
"grad_norm": 0.36213457584381104,
"learning_rate": 9.948831717032738e-06,
"loss": 0.8672894835472107,
"step": 122
},
{
"epoch": 0.9323308270676691,
"grad_norm": 0.4218904972076416,
"learning_rate": 9.638042534790373e-06,
"loss": 0.827739417552948,
"step": 124
},
{
"epoch": 0.9473684210526315,
"grad_norm": 0.0983240008354187,
"learning_rate": 9.329847166504497e-06,
"loss": 0.7799423933029175,
"step": 126
},
{
"epoch": 0.9624060150375939,
"grad_norm": 0.14850644767284393,
"learning_rate": 9.024674946068357e-06,
"loss": 1.0653791427612305,
"step": 128
},
{
"epoch": 0.9774436090225563,
"grad_norm": 0.27504855394363403,
"learning_rate": 8.722950995956172e-06,
"loss": 0.9135006666183472,
"step": 130
},
{
"epoch": 0.9924812030075187,
"grad_norm": 0.27222368121147156,
"learning_rate": 8.425095635003053e-06,
"loss": 1.0402815341949463,
"step": 132
},
{
"epoch": 1.0075187969924813,
"grad_norm": 0.1408149003982544,
"learning_rate": 8.13152379287667e-06,
"loss": 0.7629735469818115,
"step": 134
},
{
"epoch": 1.0225563909774436,
"grad_norm": 0.15156933665275574,
"learning_rate": 7.842644432056336e-06,
"loss": 0.9513287544250488,
"step": 136
},
{
"epoch": 1.037593984962406,
"grad_norm": 0.9048015475273132,
"learning_rate": 7.55885997812472e-06,
"loss": 0.6946667432785034,
"step": 138
},
{
"epoch": 1.0526315789473684,
"grad_norm": 0.24624674022197723,
"learning_rate": 7.280565759165833e-06,
"loss": 0.7876754403114319,
"step": 140
},
{
"epoch": 1.0676691729323309,
"grad_norm": 0.09362401068210602,
"learning_rate": 7.008149455050264e-06,
"loss": 0.8406305909156799,
"step": 142
},
{
"epoch": 1.0827067669172932,
"grad_norm": 0.17641493678092957,
"learning_rate": 6.741990557374784e-06,
"loss": 1.0424906015396118,
"step": 144
},
{
"epoch": 1.0977443609022557,
"grad_norm": 0.12022742629051208,
"learning_rate": 6.4824598408087015e-06,
"loss": 1.0722497701644897,
"step": 146
},
{
"epoch": 1.112781954887218,
"grad_norm": 0.08780567348003387,
"learning_rate": 6.229918846583414e-06,
"loss": 1.0312786102294922,
"step": 148
},
{
"epoch": 1.1278195488721805,
"grad_norm": 0.3304075598716736,
"learning_rate": 5.984719378844628e-06,
"loss": 1.0075746774673462,
"step": 150
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.10900267213582993,
"learning_rate": 5.7472030145689604e-06,
"loss": 1.0427347421646118,
"step": 152
},
{
"epoch": 1.1578947368421053,
"grad_norm": 0.09693319350481033,
"learning_rate": 5.51770062772752e-06,
"loss": 1.0927042961120605,
"step": 154
},
{
"epoch": 1.1729323308270676,
"grad_norm": 0.12883000075817108,
"learning_rate": 5.296531928359431e-06,
"loss": 0.9705473780632019,
"step": 156
},
{
"epoch": 1.1879699248120301,
"grad_norm": 0.11416517943143845,
"learning_rate": 5.084005017197318e-06,
"loss": 1.0172467231750488,
"step": 158
},
{
"epoch": 1.2030075187969924,
"grad_norm": 0.10797861963510513,
"learning_rate": 4.8804159564652665e-06,
"loss": 0.5409541726112366,
"step": 160
},
{
"epoch": 1.218045112781955,
"grad_norm": 0.24861538410186768,
"learning_rate": 4.686048357447095e-06,
"loss": 0.9430153965950012,
"step": 162
},
{
"epoch": 1.2330827067669172,
"grad_norm": 0.14524255692958832,
"learning_rate": 4.501172985399498e-06,
"loss": 1.178081750869751,
"step": 164
},
{
"epoch": 1.2481203007518797,
"grad_norm": 0.2940176725387573,
"learning_rate": 4.326047382360457e-06,
"loss": 0.8844167590141296,
"step": 166
},
{
"epoch": 1.263157894736842,
"grad_norm": 0.1306038349866867,
"learning_rate": 4.160915508378359e-06,
"loss": 0.7063813209533691,
"step": 168
},
{
"epoch": 1.2781954887218046,
"grad_norm": 0.15677547454833984,
"learning_rate": 4.006007401661596e-06,
"loss": 0.7762787938117981,
"step": 170
},
{
"epoch": 1.2932330827067668,
"grad_norm": 0.14480236172676086,
"learning_rate": 3.861538858122092e-06,
"loss": 0.937335193157196,
"step": 172
},
{
"epoch": 1.3082706766917294,
"grad_norm": 0.15142542123794556,
"learning_rate": 3.727711130759182e-06,
"loss": 0.9747655987739563,
"step": 174
},
{
"epoch": 1.3233082706766917,
"grad_norm": 0.18726477026939392,
"learning_rate": 3.6047106493025923e-06,
"loss": 0.7746855020523071,
"step": 176
},
{
"epoch": 1.3383458646616542,
"grad_norm": 2.189209461212158,
"learning_rate": 3.492708760505093e-06,
"loss": 0.6926825642585754,
"step": 178
},
{
"epoch": 1.3533834586466165,
"grad_norm": 0.12323262542486191,
"learning_rate": 3.3918614894466045e-06,
"loss": 1.2502151727676392,
"step": 180
},
{
"epoch": 1.368421052631579,
"grad_norm": 0.1458672434091568,
"learning_rate": 3.3023093221822746e-06,
"loss": 0.9960780143737793,
"step": 182
},
{
"epoch": 1.3834586466165413,
"grad_norm": 0.1072113886475563,
"learning_rate": 3.224177010037323e-06,
"loss": 0.8326720595359802,
"step": 184
},
{
"epoch": 1.3984962406015038,
"grad_norm": 0.09054487943649292,
"learning_rate": 3.1575733958212563e-06,
"loss": 1.1920455694198608,
"step": 186
},
{
"epoch": 1.413533834586466,
"grad_norm": 0.13850678503513336,
"learning_rate": 3.1025912622035687e-06,
"loss": 0.5979896783828735,
"step": 188
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.2317809760570526,
"learning_rate": 3.0593072024621396e-06,
"loss": 0.969947099685669,
"step": 190
},
{
"epoch": 1.443609022556391,
"grad_norm": 0.29049012064933777,
"learning_rate": 3.0277815137843917e-06,
"loss": 0.7010709643363953,
"step": 192
},
{
"epoch": 1.4586466165413534,
"grad_norm": 0.17280922830104828,
"learning_rate": 3.008058113269836e-06,
"loss": 0.7660905718803406,
"step": 194
},
{
"epoch": 1.4736842105263157,
"grad_norm": 0.12446308135986328,
"learning_rate": 3.0001644767510154e-06,
"loss": 0.9958880543708801,
"step": 196
},
{
"epoch": 1.4887218045112782,
"grad_norm": 0.1736506223678589,
"learning_rate": 3.0041116005181016e-06,
"loss": 0.9368724226951599,
"step": 198
},
{
"epoch": 1.5037593984962405,
"grad_norm": 0.15870751440525055,
"learning_rate": 3.0198939860004202e-06,
"loss": 0.9826479554176331,
"step": 200
},
{
"epoch": 1.518796992481203,
"grad_norm": 0.15492001175880432,
"learning_rate": 3.0474896474262772e-06,
"loss": 1.3954254388809204,
"step": 202
},
{
"epoch": 1.5338345864661656,
"grad_norm": 0.1334741860628128,
"learning_rate": 3.08686014245041e-06,
"loss": 1.0462982654571533,
"step": 204
},
{
"epoch": 1.5488721804511278,
"grad_norm": 0.26873454451560974,
"learning_rate": 3.1379506257063825e-06,
"loss": 0.8277729153633118,
"step": 206
},
{
"epoch": 1.5639097744360901,
"grad_norm": 0.16168057918548584,
"learning_rate": 3.20068992520934e-06,
"loss": 0.985795259475708,
"step": 208
},
{
"epoch": 1.5789473684210527,
"grad_norm": 0.1431337147951126,
"learning_rate": 3.274990641502683e-06,
"loss": 0.908364474773407,
"step": 210
},
{
"epoch": 1.5939849624060152,
"grad_norm": 0.2742830514907837,
"learning_rate": 3.3607492694105405e-06,
"loss": 0.6370347142219543,
"step": 212
},
{
"epoch": 1.6090225563909775,
"grad_norm": 0.14003214240074158,
"learning_rate": 3.457846342226442e-06,
"loss": 0.9584491848945618,
"step": 214
},
{
"epoch": 1.6240601503759398,
"grad_norm": 0.29483214020729065,
"learning_rate": 3.5661465981373183e-06,
"loss": 1.0774601697921753,
"step": 216
},
{
"epoch": 1.6390977443609023,
"grad_norm": 0.18227490782737732,
"learning_rate": 3.6854991686509906e-06,
"loss": 0.7987947463989258,
"step": 218
},
{
"epoch": 1.6541353383458648,
"grad_norm": 0.20908379554748535,
"learning_rate": 3.815737788764674e-06,
"loss": 0.8292507529258728,
"step": 220
},
{
"epoch": 1.669172932330827,
"grad_norm": 0.23883360624313354,
"learning_rate": 3.956681028581693e-06,
"loss": 0.9586336612701416,
"step": 222
},
{
"epoch": 1.6842105263157894,
"grad_norm": 0.3358386158943176,
"learning_rate": 4.108132546053779e-06,
"loss": 0.8150299191474915,
"step": 224
},
{
"epoch": 1.699248120300752,
"grad_norm": 0.298909068107605,
"learning_rate": 4.269881360496842e-06,
"loss": 1.0946331024169922,
"step": 226
},
{
"epoch": 1.7142857142857144,
"grad_norm": 0.24479779601097107,
"learning_rate": 4.441702146499222e-06,
"loss": 0.9061790108680725,
"step": 228
},
{
"epoch": 1.7293233082706767,
"grad_norm": 0.2154252976179123,
"learning_rate": 4.623355547812946e-06,
"loss": 1.0011441707611084,
"step": 230
},
{
"epoch": 1.744360902255639,
"grad_norm": 0.1481187492609024,
"learning_rate": 4.814588510790782e-06,
"loss": 1.1533119678497314,
"step": 232
},
{
"epoch": 1.7593984962406015,
"grad_norm": 0.10200405865907669,
"learning_rate": 5.01513463690452e-06,
"loss": 1.1073795557022095,
"step": 234
},
{
"epoch": 1.774436090225564,
"grad_norm": 0.19221612811088562,
"learning_rate": 5.224714553853478e-06,
"loss": 1.148139476776123,
"step": 236
},
{
"epoch": 1.7894736842105263,
"grad_norm": 0.1456657350063324,
"learning_rate": 5.443036304746191e-06,
"loss": 0.6271846294403076,
"step": 238
},
{
"epoch": 1.8045112781954886,
"grad_norm": 0.3602541983127594,
"learning_rate": 5.66979575481317e-06,
"loss": 1.0237879753112793,
"step": 240
},
{
"epoch": 1.8195488721804511,
"grad_norm": 0.14290253818035126,
"learning_rate": 5.904677015084159e-06,
"loss": 0.7585715055465698,
"step": 242
},
{
"epoch": 1.8345864661654137,
"grad_norm": 0.15334200859069824,
"learning_rate": 6.147352882439652e-06,
"loss": 1.1932705640792847,
"step": 244
},
{
"epoch": 1.849624060150376,
"grad_norm": 0.09177304059267044,
"learning_rate": 6.397485295423669e-06,
"loss": 1.1568275690078735,
"step": 246
},
{
"epoch": 1.8646616541353382,
"grad_norm": 0.08284302055835724,
"learning_rate": 6.6547258051828426e-06,
"loss": 0.9782670736312866,
"step": 248
},
{
"epoch": 1.8796992481203008,
"grad_norm": 0.12089983373880386,
"learning_rate": 6.918716060875743e-06,
"loss": 1.2261128425598145,
"step": 250
},
{
"epoch": 1.8947368421052633,
"grad_norm": 0.1200575977563858,
"learning_rate": 7.1890883088761885e-06,
"loss": 1.0725328922271729,
"step": 252
},
{
"epoch": 1.9097744360902256,
"grad_norm": 0.2572805881500244,
"learning_rate": 7.4654659050752845e-06,
"loss": 1.2271314859390259,
"step": 254
},
{
"epoch": 1.9248120300751879,
"grad_norm": 0.2713569104671478,
"learning_rate": 7.747463839568292e-06,
"loss": 0.8813698291778564,
"step": 256
},
{
"epoch": 1.9398496240601504,
"grad_norm": 0.5769055485725403,
"learning_rate": 8.034689272995649e-06,
"loss": 0.5630529522895813,
"step": 258
},
{
"epoch": 1.954887218045113,
"grad_norm": 0.13255445659160614,
"learning_rate": 8.32674208379076e-06,
"loss": 1.0115076303482056,
"step": 260
}
],
"logging_steps": 2,
"max_steps": 532,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 260,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.8566094440628224e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}