WebSynthesis / TextUI-Trans-7B /trainer_state.json
yifeigao's picture
Upload folder using huggingface_hub
9d334a0 verified
{
"best_global_step": 150,
"best_metric": 0.8430656790733337,
"best_model_checkpoint": "/root/autodl-tmp/model/lora-textui/stage2_cap_func_trans/checkpoint-150",
"epoch": 1.2732123799359658,
"eval_steps": 25,
"global_step": 150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008537886872998933,
"grad_norm": 3.569350242614746,
"learning_rate": 0.0,
"loss": 1.5618,
"step": 1
},
{
"epoch": 0.017075773745997867,
"grad_norm": 3.5417115688323975,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.5366,
"step": 2
},
{
"epoch": 0.025613660618996798,
"grad_norm": 3.087338924407959,
"learning_rate": 6.666666666666667e-06,
"loss": 1.5274,
"step": 3
},
{
"epoch": 0.03415154749199573,
"grad_norm": 2.873836040496826,
"learning_rate": 1e-05,
"loss": 1.3764,
"step": 4
},
{
"epoch": 0.042689434364994665,
"grad_norm": 2.4693570137023926,
"learning_rate": 1.3333333333333333e-05,
"loss": 1.3908,
"step": 5
},
{
"epoch": 0.051227321237993596,
"grad_norm": 1.9929084777832031,
"learning_rate": 1.6666666666666667e-05,
"loss": 1.3127,
"step": 6
},
{
"epoch": 0.05976520811099253,
"grad_norm": 1.4752813577651978,
"learning_rate": 2e-05,
"loss": 1.2693,
"step": 7
},
{
"epoch": 0.06830309498399147,
"grad_norm": 1.0920029878616333,
"learning_rate": 2.3333333333333336e-05,
"loss": 1.3037,
"step": 8
},
{
"epoch": 0.0768409818569904,
"grad_norm": 0.8210710287094116,
"learning_rate": 2.6666666666666667e-05,
"loss": 1.1871,
"step": 9
},
{
"epoch": 0.08537886872998933,
"grad_norm": 0.7052881717681885,
"learning_rate": 3e-05,
"loss": 1.15,
"step": 10
},
{
"epoch": 0.09391675560298826,
"grad_norm": 0.6850194334983826,
"learning_rate": 3.3333333333333335e-05,
"loss": 1.2154,
"step": 11
},
{
"epoch": 0.10245464247598719,
"grad_norm": 0.6714601516723633,
"learning_rate": 3.6666666666666666e-05,
"loss": 1.1687,
"step": 12
},
{
"epoch": 0.11099252934898612,
"grad_norm": 0.6564128994941711,
"learning_rate": 4e-05,
"loss": 1.2469,
"step": 13
},
{
"epoch": 0.11953041622198506,
"grad_norm": 0.6847338080406189,
"learning_rate": 4.3333333333333334e-05,
"loss": 1.0827,
"step": 14
},
{
"epoch": 0.128068303094984,
"grad_norm": 0.5931199789047241,
"learning_rate": 4.666666666666667e-05,
"loss": 1.188,
"step": 15
},
{
"epoch": 0.13660618996798293,
"grad_norm": 0.5378791689872742,
"learning_rate": 5e-05,
"loss": 1.1076,
"step": 16
},
{
"epoch": 0.14514407684098185,
"grad_norm": 0.503239631652832,
"learning_rate": 5.333333333333333e-05,
"loss": 1.047,
"step": 17
},
{
"epoch": 0.1536819637139808,
"grad_norm": 0.4387917220592499,
"learning_rate": 5.666666666666667e-05,
"loss": 1.0498,
"step": 18
},
{
"epoch": 0.1622198505869797,
"grad_norm": 0.4416464865207672,
"learning_rate": 6e-05,
"loss": 1.0226,
"step": 19
},
{
"epoch": 0.17075773745997866,
"grad_norm": 0.44988617300987244,
"learning_rate": 6.333333333333333e-05,
"loss": 1.0192,
"step": 20
},
{
"epoch": 0.17929562433297758,
"grad_norm": 0.42736634612083435,
"learning_rate": 6.666666666666667e-05,
"loss": 1.0255,
"step": 21
},
{
"epoch": 0.18783351120597652,
"grad_norm": 0.43649032711982727,
"learning_rate": 7e-05,
"loss": 1.0127,
"step": 22
},
{
"epoch": 0.19637139807897544,
"grad_norm": 0.4321700632572174,
"learning_rate": 7.333333333333333e-05,
"loss": 1.0648,
"step": 23
},
{
"epoch": 0.20490928495197439,
"grad_norm": 0.4347881078720093,
"learning_rate": 7.666666666666667e-05,
"loss": 0.9899,
"step": 24
},
{
"epoch": 0.21344717182497333,
"grad_norm": 0.3858855962753296,
"learning_rate": 8e-05,
"loss": 0.9888,
"step": 25
},
{
"epoch": 0.21344717182497333,
"eval_loss": 0.9922655820846558,
"eval_runtime": 126.3706,
"eval_samples_per_second": 7.739,
"eval_steps_per_second": 0.491,
"step": 25
},
{
"epoch": 0.22198505869797225,
"grad_norm": 0.36364927887916565,
"learning_rate": 8.333333333333334e-05,
"loss": 0.9876,
"step": 26
},
{
"epoch": 0.2305229455709712,
"grad_norm": 0.3517715334892273,
"learning_rate": 8.666666666666667e-05,
"loss": 0.9778,
"step": 27
},
{
"epoch": 0.2390608324439701,
"grad_norm": 0.31786659359931946,
"learning_rate": 9e-05,
"loss": 0.9561,
"step": 28
},
{
"epoch": 0.24759871931696906,
"grad_norm": 0.34954020380973816,
"learning_rate": 9.333333333333334e-05,
"loss": 0.9958,
"step": 29
},
{
"epoch": 0.256136606189968,
"grad_norm": 0.3508739769458771,
"learning_rate": 9.666666666666667e-05,
"loss": 1.0282,
"step": 30
},
{
"epoch": 0.2646744930629669,
"grad_norm": 0.33114153146743774,
"learning_rate": 0.0001,
"loss": 0.9626,
"step": 31
},
{
"epoch": 0.27321237993596587,
"grad_norm": 0.3469662666320801,
"learning_rate": 9.999661540018812e-05,
"loss": 1.0061,
"step": 32
},
{
"epoch": 0.28175026680896476,
"grad_norm": 0.34702596068382263,
"learning_rate": 9.998646205897309e-05,
"loss": 0.9061,
"step": 33
},
{
"epoch": 0.2902881536819637,
"grad_norm": 0.30591893196105957,
"learning_rate": 9.99695413509548e-05,
"loss": 0.9743,
"step": 34
},
{
"epoch": 0.29882604055496265,
"grad_norm": 0.30477747321128845,
"learning_rate": 9.994585556692624e-05,
"loss": 0.9536,
"step": 35
},
{
"epoch": 0.3073639274279616,
"grad_norm": 0.31681594252586365,
"learning_rate": 9.991540791356342e-05,
"loss": 0.998,
"step": 36
},
{
"epoch": 0.31590181430096054,
"grad_norm": 0.30779823660850525,
"learning_rate": 9.987820251299122e-05,
"loss": 1.0049,
"step": 37
},
{
"epoch": 0.3244397011739594,
"grad_norm": 0.30715224146842957,
"learning_rate": 9.983424440222531e-05,
"loss": 0.9955,
"step": 38
},
{
"epoch": 0.3329775880469584,
"grad_norm": 0.30985817313194275,
"learning_rate": 9.978353953249022e-05,
"loss": 0.9684,
"step": 39
},
{
"epoch": 0.3415154749199573,
"grad_norm": 0.31904837489128113,
"learning_rate": 9.972609476841367e-05,
"loss": 1.0183,
"step": 40
},
{
"epoch": 0.35005336179295626,
"grad_norm": 0.29088112711906433,
"learning_rate": 9.966191788709716e-05,
"loss": 0.9516,
"step": 41
},
{
"epoch": 0.35859124866595515,
"grad_norm": 0.3119213283061981,
"learning_rate": 9.959101757706308e-05,
"loss": 0.9607,
"step": 42
},
{
"epoch": 0.3671291355389541,
"grad_norm": 0.2980335056781769,
"learning_rate": 9.951340343707852e-05,
"loss": 0.9116,
"step": 43
},
{
"epoch": 0.37566702241195304,
"grad_norm": 0.31713929772377014,
"learning_rate": 9.942908597485558e-05,
"loss": 1.0001,
"step": 44
},
{
"epoch": 0.384204909284952,
"grad_norm": 0.30609187483787537,
"learning_rate": 9.933807660562898e-05,
"loss": 0.9237,
"step": 45
},
{
"epoch": 0.3927427961579509,
"grad_norm": 0.30118709802627563,
"learning_rate": 9.924038765061042e-05,
"loss": 0.9682,
"step": 46
},
{
"epoch": 0.4012806830309498,
"grad_norm": 0.3028307855129242,
"learning_rate": 9.913603233532067e-05,
"loss": 0.9255,
"step": 47
},
{
"epoch": 0.40981856990394877,
"grad_norm": 0.31861209869384766,
"learning_rate": 9.902502478779896e-05,
"loss": 0.8936,
"step": 48
},
{
"epoch": 0.4183564567769477,
"grad_norm": 0.2734357416629791,
"learning_rate": 9.890738003669029e-05,
"loss": 0.953,
"step": 49
},
{
"epoch": 0.42689434364994666,
"grad_norm": 0.29771876335144043,
"learning_rate": 9.878311400921072e-05,
"loss": 0.9401,
"step": 50
},
{
"epoch": 0.42689434364994666,
"eval_loss": 0.9200984835624695,
"eval_runtime": 126.2659,
"eval_samples_per_second": 7.746,
"eval_steps_per_second": 0.491,
"step": 50
},
{
"epoch": 0.43543223052294555,
"grad_norm": 0.3155674338340759,
"learning_rate": 9.865224352899119e-05,
"loss": 0.8947,
"step": 51
},
{
"epoch": 0.4439701173959445,
"grad_norm": 0.30623170733451843,
"learning_rate": 9.851478631379982e-05,
"loss": 0.9038,
"step": 52
},
{
"epoch": 0.45250800426894344,
"grad_norm": 0.30273666977882385,
"learning_rate": 9.837076097314319e-05,
"loss": 0.9348,
"step": 53
},
{
"epoch": 0.4610458911419424,
"grad_norm": 0.3075931966304779,
"learning_rate": 9.822018700574695e-05,
"loss": 0.9275,
"step": 54
},
{
"epoch": 0.4695837780149413,
"grad_norm": 0.2986295521259308,
"learning_rate": 9.806308479691595e-05,
"loss": 0.928,
"step": 55
},
{
"epoch": 0.4781216648879402,
"grad_norm": 0.3007640540599823,
"learning_rate": 9.789947561577445e-05,
"loss": 0.9702,
"step": 56
},
{
"epoch": 0.48665955176093917,
"grad_norm": 0.30233699083328247,
"learning_rate": 9.77293816123866e-05,
"loss": 0.9534,
"step": 57
},
{
"epoch": 0.4951974386339381,
"grad_norm": 0.30345094203948975,
"learning_rate": 9.755282581475769e-05,
"loss": 0.9474,
"step": 58
},
{
"epoch": 0.503735325506937,
"grad_norm": 0.3086802065372467,
"learning_rate": 9.736983212571646e-05,
"loss": 0.8856,
"step": 59
},
{
"epoch": 0.512273212379936,
"grad_norm": 0.28710058331489563,
"learning_rate": 9.718042531967918e-05,
"loss": 0.946,
"step": 60
},
{
"epoch": 0.5208110992529349,
"grad_norm": 0.3032543361186981,
"learning_rate": 9.698463103929542e-05,
"loss": 0.9204,
"step": 61
},
{
"epoch": 0.5293489861259338,
"grad_norm": 0.33324557542800903,
"learning_rate": 9.678247579197657e-05,
"loss": 0.9127,
"step": 62
},
{
"epoch": 0.5378868729989328,
"grad_norm": 0.31016331911087036,
"learning_rate": 9.657398694630712e-05,
"loss": 0.8976,
"step": 63
},
{
"epoch": 0.5464247598719317,
"grad_norm": 0.3162042498588562,
"learning_rate": 9.635919272833938e-05,
"loss": 0.8394,
"step": 64
},
{
"epoch": 0.5549626467449307,
"grad_norm": 0.3050558865070343,
"learning_rate": 9.613812221777212e-05,
"loss": 0.9153,
"step": 65
},
{
"epoch": 0.5635005336179295,
"grad_norm": 0.3125409483909607,
"learning_rate": 9.591080534401371e-05,
"loss": 0.9224,
"step": 66
},
{
"epoch": 0.5720384204909285,
"grad_norm": 0.3080565333366394,
"learning_rate": 9.567727288213005e-05,
"loss": 0.9281,
"step": 67
},
{
"epoch": 0.5805763073639274,
"grad_norm": 0.31865012645721436,
"learning_rate": 9.543755644867822e-05,
"loss": 0.8459,
"step": 68
},
{
"epoch": 0.5891141942369263,
"grad_norm": 0.3252212107181549,
"learning_rate": 9.519168849742604e-05,
"loss": 0.9076,
"step": 69
},
{
"epoch": 0.5976520811099253,
"grad_norm": 0.31885048747062683,
"learning_rate": 9.493970231495835e-05,
"loss": 0.9292,
"step": 70
},
{
"epoch": 0.6061899679829242,
"grad_norm": 0.3276972770690918,
"learning_rate": 9.468163201617062e-05,
"loss": 0.7814,
"step": 71
},
{
"epoch": 0.6147278548559232,
"grad_norm": 0.3108748495578766,
"learning_rate": 9.441751253965021e-05,
"loss": 0.8878,
"step": 72
},
{
"epoch": 0.6232657417289221,
"grad_norm": 0.3175681531429291,
"learning_rate": 9.414737964294636e-05,
"loss": 0.9064,
"step": 73
},
{
"epoch": 0.6318036286019211,
"grad_norm": 0.3110436499118805,
"learning_rate": 9.38712698977291e-05,
"loss": 0.9185,
"step": 74
},
{
"epoch": 0.6403415154749199,
"grad_norm": 0.31339648365974426,
"learning_rate": 9.358922068483812e-05,
"loss": 0.9435,
"step": 75
},
{
"epoch": 0.6403415154749199,
"eval_loss": 0.8910433650016785,
"eval_runtime": 126.3701,
"eval_samples_per_second": 7.739,
"eval_steps_per_second": 0.491,
"step": 75
},
{
"epoch": 0.6488794023479189,
"grad_norm": 0.32126352190971375,
"learning_rate": 9.330127018922194e-05,
"loss": 0.91,
"step": 76
},
{
"epoch": 0.6574172892209178,
"grad_norm": 0.31005603075027466,
"learning_rate": 9.300745739476829e-05,
"loss": 0.9656,
"step": 77
},
{
"epoch": 0.6659551760939167,
"grad_norm": 0.3075394630432129,
"learning_rate": 9.270782207902629e-05,
"loss": 0.9734,
"step": 78
},
{
"epoch": 0.6744930629669157,
"grad_norm": 0.3072083294391632,
"learning_rate": 9.24024048078213e-05,
"loss": 0.9102,
"step": 79
},
{
"epoch": 0.6830309498399146,
"grad_norm": 0.3321980834007263,
"learning_rate": 9.209124692976287e-05,
"loss": 0.9166,
"step": 80
},
{
"epoch": 0.6915688367129136,
"grad_norm": 0.3266827464103699,
"learning_rate": 9.177439057064683e-05,
"loss": 0.8672,
"step": 81
},
{
"epoch": 0.7001067235859125,
"grad_norm": 0.33046409487724304,
"learning_rate": 9.145187862775209e-05,
"loss": 0.8953,
"step": 82
},
{
"epoch": 0.7086446104589115,
"grad_norm": 0.33084535598754883,
"learning_rate": 9.112375476403312e-05,
"loss": 0.914,
"step": 83
},
{
"epoch": 0.7171824973319103,
"grad_norm": 0.3533576726913452,
"learning_rate": 9.079006340220862e-05,
"loss": 0.8936,
"step": 84
},
{
"epoch": 0.7257203842049093,
"grad_norm": 0.3123546242713928,
"learning_rate": 9.045084971874738e-05,
"loss": 0.9096,
"step": 85
},
{
"epoch": 0.7342582710779082,
"grad_norm": 0.3312874138355255,
"learning_rate": 9.01061596377522e-05,
"loss": 0.957,
"step": 86
},
{
"epoch": 0.7427961579509071,
"grad_norm": 0.3258456885814667,
"learning_rate": 8.97560398247424e-05,
"loss": 0.8853,
"step": 87
},
{
"epoch": 0.7513340448239061,
"grad_norm": 0.35378894209861755,
"learning_rate": 8.940053768033609e-05,
"loss": 0.907,
"step": 88
},
{
"epoch": 0.759871931696905,
"grad_norm": 0.30806443095207214,
"learning_rate": 8.903970133383297e-05,
"loss": 0.8719,
"step": 89
},
{
"epoch": 0.768409818569904,
"grad_norm": 0.33278632164001465,
"learning_rate": 8.86735796366982e-05,
"loss": 0.9782,
"step": 90
},
{
"epoch": 0.7769477054429029,
"grad_norm": 0.31797873973846436,
"learning_rate": 8.83022221559489e-05,
"loss": 0.9225,
"step": 91
},
{
"epoch": 0.7854855923159018,
"grad_norm": 0.3217017352581024,
"learning_rate": 8.792567916744346e-05,
"loss": 0.9045,
"step": 92
},
{
"epoch": 0.7940234791889007,
"grad_norm": 0.37561750411987305,
"learning_rate": 8.754400164907497e-05,
"loss": 0.9335,
"step": 93
},
{
"epoch": 0.8025613660618997,
"grad_norm": 0.3165503144264221,
"learning_rate": 8.715724127386972e-05,
"loss": 0.9086,
"step": 94
},
{
"epoch": 0.8110992529348986,
"grad_norm": 0.3458029329776764,
"learning_rate": 8.676545040299145e-05,
"loss": 0.9079,
"step": 95
},
{
"epoch": 0.8196371398078975,
"grad_norm": 0.336024671792984,
"learning_rate": 8.636868207865244e-05,
"loss": 0.9059,
"step": 96
},
{
"epoch": 0.8281750266808965,
"grad_norm": 0.3598126471042633,
"learning_rate": 8.596699001693255e-05,
"loss": 0.847,
"step": 97
},
{
"epoch": 0.8367129135538954,
"grad_norm": 0.33313530683517456,
"learning_rate": 8.556042860050687e-05,
"loss": 0.911,
"step": 98
},
{
"epoch": 0.8452508004268944,
"grad_norm": 0.3099718689918518,
"learning_rate": 8.51490528712831e-05,
"loss": 0.8864,
"step": 99
},
{
"epoch": 0.8537886872998933,
"grad_norm": 0.3349950611591339,
"learning_rate": 8.473291852294987e-05,
"loss": 0.8479,
"step": 100
},
{
"epoch": 0.8537886872998933,
"eval_loss": 0.8699346780776978,
"eval_runtime": 126.3565,
"eval_samples_per_second": 7.74,
"eval_steps_per_second": 0.491,
"step": 100
},
{
"epoch": 0.8623265741728922,
"grad_norm": 0.3409707546234131,
"learning_rate": 8.43120818934367e-05,
"loss": 0.92,
"step": 101
},
{
"epoch": 0.8708644610458911,
"grad_norm": 0.349369078874588,
"learning_rate": 8.388659995728663e-05,
"loss": 0.8551,
"step": 102
},
{
"epoch": 0.87940234791889,
"grad_norm": 0.3358254134654999,
"learning_rate": 8.345653031794292e-05,
"loss": 0.9024,
"step": 103
},
{
"epoch": 0.887940234791889,
"grad_norm": 0.30185699462890625,
"learning_rate": 8.302193119995039e-05,
"loss": 0.9081,
"step": 104
},
{
"epoch": 0.8964781216648879,
"grad_norm": 0.33944496512413025,
"learning_rate": 8.258286144107276e-05,
"loss": 0.9215,
"step": 105
},
{
"epoch": 0.9050160085378869,
"grad_norm": 0.31531962752342224,
"learning_rate": 8.213938048432697e-05,
"loss": 0.886,
"step": 106
},
{
"epoch": 0.9135538954108858,
"grad_norm": 0.33005067706108093,
"learning_rate": 8.169154836993551e-05,
"loss": 0.8436,
"step": 107
},
{
"epoch": 0.9220917822838848,
"grad_norm": 0.32761579751968384,
"learning_rate": 8.1239425727198e-05,
"loss": 0.8181,
"step": 108
},
{
"epoch": 0.9306296691568837,
"grad_norm": 0.3269185423851013,
"learning_rate": 8.07830737662829e-05,
"loss": 0.8687,
"step": 109
},
{
"epoch": 0.9391675560298826,
"grad_norm": 0.3282541036605835,
"learning_rate": 8.032255426994069e-05,
"loss": 0.8341,
"step": 110
},
{
"epoch": 0.9477054429028815,
"grad_norm": 0.331903338432312,
"learning_rate": 7.985792958513931e-05,
"loss": 0.9307,
"step": 111
},
{
"epoch": 0.9562433297758804,
"grad_norm": 0.37399402260780334,
"learning_rate": 7.938926261462366e-05,
"loss": 0.8765,
"step": 112
},
{
"epoch": 0.9647812166488794,
"grad_norm": 0.3470500409603119,
"learning_rate": 7.891661680839932e-05,
"loss": 0.8956,
"step": 113
},
{
"epoch": 0.9733191035218783,
"grad_norm": 0.33433717489242554,
"learning_rate": 7.844005615514259e-05,
"loss": 0.8607,
"step": 114
},
{
"epoch": 0.9818569903948773,
"grad_norm": 0.3233843743801117,
"learning_rate": 7.795964517353735e-05,
"loss": 0.8357,
"step": 115
},
{
"epoch": 0.9903948772678762,
"grad_norm": 0.32620981335639954,
"learning_rate": 7.74754489035403e-05,
"loss": 0.9093,
"step": 116
},
{
"epoch": 0.9989327641408752,
"grad_norm": 0.3210049569606781,
"learning_rate": 7.698753289757565e-05,
"loss": 0.8595,
"step": 117
},
{
"epoch": 1.0,
"grad_norm": 0.883870542049408,
"learning_rate": 7.649596321166024e-05,
"loss": 0.968,
"step": 118
},
{
"epoch": 1.0085378868729988,
"grad_norm": 0.36958256363868713,
"learning_rate": 7.600080639646077e-05,
"loss": 0.8389,
"step": 119
},
{
"epoch": 1.017075773745998,
"grad_norm": 0.32345816493034363,
"learning_rate": 7.550212948828377e-05,
"loss": 0.8826,
"step": 120
},
{
"epoch": 1.0256136606189967,
"grad_norm": 0.33251067996025085,
"learning_rate": 7.500000000000001e-05,
"loss": 0.8961,
"step": 121
},
{
"epoch": 1.0341515474919958,
"grad_norm": 0.3673892915248871,
"learning_rate": 7.449448591190435e-05,
"loss": 0.7779,
"step": 122
},
{
"epoch": 1.0426894343649946,
"grad_norm": 0.3406680226325989,
"learning_rate": 7.398565566251232e-05,
"loss": 0.882,
"step": 123
},
{
"epoch": 1.0512273212379937,
"grad_norm": 0.35136982798576355,
"learning_rate": 7.347357813929454e-05,
"loss": 0.8583,
"step": 124
},
{
"epoch": 1.0597652081109925,
"grad_norm": 0.38215872645378113,
"learning_rate": 7.295832266935059e-05,
"loss": 0.8085,
"step": 125
},
{
"epoch": 1.0597652081109925,
"eval_loss": 0.855903685092926,
"eval_runtime": 126.4753,
"eval_samples_per_second": 7.733,
"eval_steps_per_second": 0.49,
"step": 125
},
{
"epoch": 1.0683030949839916,
"grad_norm": 0.3545176386833191,
"learning_rate": 7.243995901002312e-05,
"loss": 0.8287,
"step": 126
},
{
"epoch": 1.0768409818569904,
"grad_norm": 0.36242854595184326,
"learning_rate": 7.191855733945387e-05,
"loss": 0.8779,
"step": 127
},
{
"epoch": 1.0853788687299892,
"grad_norm": 0.3716667890548706,
"learning_rate": 7.139418824708272e-05,
"loss": 0.8207,
"step": 128
},
{
"epoch": 1.0939167556029883,
"grad_norm": 0.3446180820465088,
"learning_rate": 7.08669227240909e-05,
"loss": 0.8071,
"step": 129
},
{
"epoch": 1.1024546424759871,
"grad_norm": 0.348202109336853,
"learning_rate": 7.033683215379002e-05,
"loss": 0.816,
"step": 130
},
{
"epoch": 1.1109925293489862,
"grad_norm": 0.3857647776603699,
"learning_rate": 6.980398830195785e-05,
"loss": 0.8307,
"step": 131
},
{
"epoch": 1.119530416221985,
"grad_norm": 0.3619552254676819,
"learning_rate": 6.926846330712242e-05,
"loss": 0.8799,
"step": 132
},
{
"epoch": 1.128068303094984,
"grad_norm": 0.36076459288597107,
"learning_rate": 6.873032967079561e-05,
"loss": 0.8427,
"step": 133
},
{
"epoch": 1.136606189967983,
"grad_norm": 0.3395594358444214,
"learning_rate": 6.818966024765758e-05,
"loss": 0.857,
"step": 134
},
{
"epoch": 1.1451440768409817,
"grad_norm": 0.3523291051387787,
"learning_rate": 6.764652823569344e-05,
"loss": 0.8238,
"step": 135
},
{
"epoch": 1.1536819637139808,
"grad_norm": 0.35138288140296936,
"learning_rate": 6.710100716628344e-05,
"loss": 0.8208,
"step": 136
},
{
"epoch": 1.1622198505869796,
"grad_norm": 0.3666648864746094,
"learning_rate": 6.65531708942479e-05,
"loss": 0.7936,
"step": 137
},
{
"epoch": 1.1707577374599787,
"grad_norm": 0.38198062777519226,
"learning_rate": 6.600309358784857e-05,
"loss": 0.8355,
"step": 138
},
{
"epoch": 1.1792956243329775,
"grad_norm": 0.361715167760849,
"learning_rate": 6.545084971874738e-05,
"loss": 0.7518,
"step": 139
},
{
"epoch": 1.1878335112059766,
"grad_norm": 0.3789786100387573,
"learning_rate": 6.48965140519241e-05,
"loss": 0.8091,
"step": 140
},
{
"epoch": 1.1963713980789754,
"grad_norm": 0.3489861488342285,
"learning_rate": 6.434016163555452e-05,
"loss": 0.8138,
"step": 141
},
{
"epoch": 1.2049092849519745,
"grad_norm": 0.3795143961906433,
"learning_rate": 6.378186779084995e-05,
"loss": 0.8357,
"step": 142
},
{
"epoch": 1.2134471718249733,
"grad_norm": 0.3954067826271057,
"learning_rate": 6.322170810186012e-05,
"loss": 0.8278,
"step": 143
},
{
"epoch": 1.2219850586979724,
"grad_norm": 0.37652266025543213,
"learning_rate": 6.26597584052401e-05,
"loss": 0.8395,
"step": 144
},
{
"epoch": 1.2305229455709712,
"grad_norm": 0.3819694519042969,
"learning_rate": 6.209609477998338e-05,
"loss": 0.811,
"step": 145
},
{
"epoch": 1.23906083244397,
"grad_norm": 0.3565637767314911,
"learning_rate": 6.153079353712201e-05,
"loss": 0.8305,
"step": 146
},
{
"epoch": 1.247598719316969,
"grad_norm": 0.37492281198501587,
"learning_rate": 6.096393120939516e-05,
"loss": 0.8235,
"step": 147
},
{
"epoch": 1.256136606189968,
"grad_norm": 0.3353641927242279,
"learning_rate": 6.0395584540887963e-05,
"loss": 0.8181,
"step": 148
},
{
"epoch": 1.264674493062967,
"grad_norm": 0.37503373622894287,
"learning_rate": 5.982583047664151e-05,
"loss": 0.8309,
"step": 149
},
{
"epoch": 1.2732123799359658,
"grad_norm": 0.34756171703338623,
"learning_rate": 5.925474615223573e-05,
"loss": 0.8345,
"step": 150
},
{
"epoch": 1.2732123799359658,
"eval_loss": 0.8430656790733337,
"eval_runtime": 126.4713,
"eval_samples_per_second": 7.733,
"eval_steps_per_second": 0.49,
"step": 150
}
],
"logging_steps": 1,
"max_steps": 300,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.4140777917830922e+18,
"train_batch_size": 3,
"trial_name": null,
"trial_params": null
}