Nemo-Recwnt / checkpoint-200 /trainer_state.json
zaddyzaddy's picture
Upload folder using huggingface_hub
5f7c21d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.13995801259622112,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio": 0.0,
"completion_length": 92.1964340209961,
"epoch": 0.0006997900629811056,
"grad_norm": 0.04008340386412661,
"learning_rate": 1.7482517482517484e-07,
"loss": -0.0004,
"num_tokens": 19229.0,
"reward": -0.3571428656578064,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.3571428656578064,
"rewards/check_winston_local_func/std": 0.9425028562545776,
"step": 1
},
{
"clip_ratio": 0.0,
"epoch": 0.0013995801259622112,
"grad_norm": 0.04044301640894262,
"learning_rate": 3.496503496503497e-07,
"loss": -0.0004,
"step": 2
},
{
"clip_ratio": 0.0026809382252395153,
"epoch": 0.002099370188943317,
"grad_norm": 0.03858326410821281,
"learning_rate": 5.244755244755246e-07,
"loss": -0.0004,
"step": 3
},
{
"clip_ratio": 0.002734784735366702,
"epoch": 0.0027991602519244225,
"grad_norm": 0.03954530218881001,
"learning_rate": 6.993006993006994e-07,
"loss": -0.0004,
"step": 4
},
{
"clip_ratio": 0.003463542787358165,
"completion_length": 100.78572082519531,
"epoch": 0.0034989503149055285,
"grad_norm": 0.03131731501094277,
"learning_rate": 8.741258741258743e-07,
"loss": -0.0015,
"num_tokens": 39549.0,
"reward": -0.392857164144516,
"reward_std": 0.15152287483215332,
"rewards/check_winston_local_func/mean": -0.3928571343421936,
"rewards/check_winston_local_func/std": 0.9279217720031738,
"step": 5
},
{
"clip_ratio": 0.001506845816038549,
"epoch": 0.004198740377886634,
"grad_norm": 0.031304121161173655,
"learning_rate": 1.0489510489510491e-06,
"loss": -0.0013,
"step": 6
},
{
"clip_ratio": 0.0035776374861598015,
"epoch": 0.00489853044086774,
"grad_norm": 0.0317402782217819,
"learning_rate": 1.2237762237762238e-06,
"loss": -0.0014,
"step": 7
},
{
"clip_ratio": 0.0017960710683837533,
"epoch": 0.005598320503848845,
"grad_norm": 0.03238973734129298,
"learning_rate": 1.3986013986013987e-06,
"loss": -0.0014,
"step": 8
},
{
"clip_ratio": 0.0011830016737803817,
"completion_length": 71.98214721679688,
"epoch": 0.006298110566829951,
"grad_norm": 0.07179208315805877,
"learning_rate": 1.5734265734265736e-06,
"loss": 0.0047,
"num_tokens": 56096.0,
"reward": -0.5714285969734192,
"reward_std": 0.30304574966430664,
"rewards/check_winston_local_func/mean": -0.5714285969734192,
"rewards/check_winston_local_func/std": 0.82807856798172,
"step": 9
},
{
"clip_ratio": 0.0014540323754772544,
"epoch": 0.006997900629811057,
"grad_norm": 0.07210672978675704,
"learning_rate": 1.7482517482517485e-06,
"loss": 0.0043,
"step": 10
},
{
"clip_ratio": 0.0009645656682550907,
"epoch": 0.007697690692792162,
"grad_norm": 0.07154328122740576,
"learning_rate": 1.9230769230769234e-06,
"loss": 0.0048,
"step": 11
},
{
"clip_ratio": 0.0012382904533296824,
"epoch": 0.008397480755773267,
"grad_norm": 0.07206324717584058,
"learning_rate": 2.0979020979020983e-06,
"loss": 0.0045,
"step": 12
},
{
"clip_ratio": 0.0024568967055529356,
"completion_length": 99.33928680419922,
"epoch": 0.009097270818754374,
"grad_norm": 0.10630346643398497,
"learning_rate": 2.2727272727272728e-06,
"loss": 0.0041,
"num_tokens": 76111.0,
"reward": -0.5,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.5,
"rewards/check_winston_local_func/std": 0.8738628625869751,
"step": 13
},
{
"clip_ratio": 0.0018984745256602764,
"epoch": 0.00979706088173548,
"grad_norm": 0.10601918892837382,
"learning_rate": 2.4475524475524477e-06,
"loss": 0.0037,
"step": 14
},
{
"clip_ratio": 0.0020660855807363987,
"epoch": 0.010496850944716585,
"grad_norm": 0.10309105028697467,
"learning_rate": 2.6223776223776225e-06,
"loss": 0.0038,
"step": 15
},
{
"clip_ratio": 0.002301447093486786,
"epoch": 0.01119664100769769,
"grad_norm": 0.10289876702541416,
"learning_rate": 2.7972027972027974e-06,
"loss": 0.0038,
"step": 16
},
{
"clip_ratio": 0.002995749469846487,
"completion_length": 104.10714721679688,
"epoch": 0.011896431070678797,
"grad_norm": 0.02394212165465065,
"learning_rate": 2.9720279720279723e-06,
"loss": -0.0016,
"num_tokens": 97045.0,
"reward": -0.5714285969734192,
"reward_std": 0.10101525485515594,
"rewards/check_winston_local_func/mean": -0.5714285969734192,
"rewards/check_winston_local_func/std": 0.82807856798172,
"step": 17
},
{
"clip_ratio": 0.0021103813778609037,
"epoch": 0.012596221133659902,
"grad_norm": 0.023576991661382562,
"learning_rate": 3.1468531468531472e-06,
"loss": -0.0016,
"step": 18
},
{
"clip_ratio": 0.002064172876998782,
"epoch": 0.013296011196641007,
"grad_norm": 0.023272394799082628,
"learning_rate": 3.3216783216783217e-06,
"loss": -0.0017,
"step": 19
},
{
"clip_ratio": 0.0035561085678637028,
"epoch": 0.013995801259622114,
"grad_norm": 0.023832453115006213,
"learning_rate": 3.496503496503497e-06,
"loss": -0.0016,
"step": 20
},
{
"clip_ratio": 0.0015116453869268298,
"completion_length": 101.58928680419922,
"epoch": 0.01469559132260322,
"grad_norm": 0.11810094542561876,
"learning_rate": 3.6713286713286715e-06,
"loss": 0.0183,
"num_tokens": 117256.0,
"reward": -0.4642857313156128,
"reward_std": 0.45456865429878235,
"rewards/check_winston_local_func/mean": -0.4642857015132904,
"rewards/check_winston_local_func/std": 0.893700897693634,
"step": 21
},
{
"clip_ratio": 0.0015069997170940042,
"epoch": 0.015395381385584325,
"grad_norm": 0.11627287333325324,
"learning_rate": 3.846153846153847e-06,
"loss": 0.0183,
"step": 22
},
{
"clip_ratio": 0.0024017037358134985,
"epoch": 0.01609517144856543,
"grad_norm": 0.11747415252739393,
"learning_rate": 4.020979020979021e-06,
"loss": 0.0185,
"step": 23
},
{
"clip_ratio": 0.0012106437934562564,
"epoch": 0.016794961511546535,
"grad_norm": 0.11886525001745403,
"learning_rate": 4.195804195804197e-06,
"loss": 0.0187,
"step": 24
},
{
"clip_ratio": 0.00203885231167078,
"completion_length": 105.55357360839844,
"epoch": 0.01749475157452764,
"grad_norm": 0.054683142805963834,
"learning_rate": 4.370629370629371e-06,
"loss": 0.0025,
"num_tokens": 138225.0,
"reward": -0.785714328289032,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.7857142686843872,
"rewards/check_winston_local_func/std": 0.6241878271102905,
"step": 25
},
{
"clip_ratio": 0.0020559704862535,
"epoch": 0.01819454163750875,
"grad_norm": 0.05445975538463714,
"learning_rate": 4.5454545454545455e-06,
"loss": 0.0023,
"step": 26
},
{
"clip_ratio": 0.0019049489637836814,
"epoch": 0.018894331700489854,
"grad_norm": 0.0535587329596401,
"learning_rate": 4.72027972027972e-06,
"loss": 0.0024,
"step": 27
},
{
"clip_ratio": 0.002319059334695339,
"epoch": 0.01959412176347096,
"grad_norm": 0.0517636030603052,
"learning_rate": 4.895104895104895e-06,
"loss": 0.0024,
"step": 28
},
{
"clip_ratio": 0.000747890502680093,
"completion_length": 64.1964340209961,
"epoch": 0.020293911826452064,
"grad_norm": 0.13000888889789194,
"learning_rate": 5.06993006993007e-06,
"loss": -0.0059,
"num_tokens": 154256.0,
"reward": -0.25,
"reward_std": 0.25253814458847046,
"rewards/check_winston_local_func/mean": -0.25,
"rewards/check_winston_local_func/std": 0.9770084023475647,
"step": 29
},
{
"clip_ratio": 0.0015584274660795927,
"epoch": 0.02099370188943317,
"grad_norm": 0.13318739478301467,
"learning_rate": 5.244755244755245e-06,
"loss": -0.0058,
"step": 30
},
{
"clip_ratio": 0.001018411829136312,
"epoch": 0.021693491952414275,
"grad_norm": 0.1309805911026886,
"learning_rate": 5.419580419580419e-06,
"loss": -0.0059,
"step": 31
},
{
"clip_ratio": 0.0013923741644248366,
"epoch": 0.02239328201539538,
"grad_norm": 0.1329220463881631,
"learning_rate": 5.594405594405595e-06,
"loss": -0.0059,
"step": 32
},
{
"clip_ratio": 0.002108451910316944,
"completion_length": 89.85714721679688,
"epoch": 0.02309307207837649,
"grad_norm": 0.09378199848577215,
"learning_rate": 5.76923076923077e-06,
"loss": -0.0061,
"num_tokens": 173030.0,
"reward": -0.5714285969734192,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.5714285969734192,
"rewards/check_winston_local_func/std": 0.82807856798172,
"step": 33
},
{
"clip_ratio": 0.001712737837806344,
"epoch": 0.023792862141357594,
"grad_norm": 0.08027161931844973,
"learning_rate": 5.944055944055945e-06,
"loss": -0.0058,
"step": 34
},
{
"clip_ratio": 0.0028315752279013395,
"epoch": 0.0244926522043387,
"grad_norm": 0.09430537489400144,
"learning_rate": 6.1188811188811196e-06,
"loss": -0.0058,
"step": 35
},
{
"clip_ratio": 0.002622501691803336,
"epoch": 0.025192442267319804,
"grad_norm": 0.09399084723387506,
"learning_rate": 6.2937062937062944e-06,
"loss": -0.0059,
"step": 36
},
{
"clip_ratio": 0.0021157327573746443,
"completion_length": 109.67857360839844,
"epoch": 0.02589223233030091,
"grad_norm": 0.03251688295876031,
"learning_rate": 6.468531468531469e-06,
"loss": -0.0003,
"num_tokens": 194736.0,
"reward": -0.7500000596046448,
"reward_std": 0.15152287483215332,
"rewards/check_winston_local_func/mean": -0.75,
"rewards/check_winston_local_func/std": 0.6674237847328186,
"step": 37
},
{
"clip_ratio": 0.001992279663681984,
"epoch": 0.026592022393282014,
"grad_norm": 0.03188152803556155,
"learning_rate": 6.643356643356643e-06,
"loss": -0.0002,
"step": 38
},
{
"clip_ratio": 0.001603165757842362,
"epoch": 0.02729181245626312,
"grad_norm": 0.03305840754053864,
"learning_rate": 6.818181818181818e-06,
"loss": -0.0003,
"step": 39
},
{
"clip_ratio": 0.0016526266699656844,
"epoch": 0.02799160251924423,
"grad_norm": 0.032953470827506756,
"learning_rate": 6.993006993006994e-06,
"loss": -0.0005,
"step": 40
},
{
"clip_ratio": 0.0016526016406714916,
"completion_length": 104.71428680419922,
"epoch": 0.028691392582225334,
"grad_norm": 0.08991319428906154,
"learning_rate": 7.167832167832168e-06,
"loss": -0.0004,
"num_tokens": 215924.0,
"reward": -0.6428571939468384,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.6428571343421936,
"rewards/check_winston_local_func/std": 0.7729182839393616,
"step": 41
},
{
"clip_ratio": 0.0011276095174252987,
"epoch": 0.02939118264520644,
"grad_norm": 0.0932105814337471,
"learning_rate": 7.342657342657343e-06,
"loss": -0.0005,
"step": 42
},
{
"clip_ratio": 0.0012143882922828197,
"epoch": 0.030090972708187544,
"grad_norm": 0.09501466400263839,
"learning_rate": 7.517482517482517e-06,
"loss": -0.0006,
"step": 43
},
{
"clip_ratio": 0.001439297804608941,
"epoch": 0.03079076277116865,
"grad_norm": 0.09410970453927742,
"learning_rate": 7.692307692307694e-06,
"loss": -0.0007,
"step": 44
},
{
"clip_ratio": 0.0007226300658658147,
"completion_length": 88.4464340209961,
"epoch": 0.031490552834149754,
"grad_norm": 0.03381639342861229,
"learning_rate": 7.867132867132867e-06,
"loss": 0.0016,
"num_tokens": 234579.0,
"reward": -0.535714328289032,
"reward_std": 0.05050762742757797,
"rewards/check_winston_local_func/mean": -0.5357142686843872,
"rewards/check_winston_local_func/std": 0.8520410656929016,
"step": 45
},
{
"clip_ratio": 0.0017245642375200987,
"epoch": 0.03219034289713086,
"grad_norm": 0.033458450811929934,
"learning_rate": 8.041958041958042e-06,
"loss": 0.0016,
"step": 46
},
{
"clip_ratio": 0.0011628264328464866,
"epoch": 0.032890132960111965,
"grad_norm": 0.03442645231795752,
"learning_rate": 8.216783216783217e-06,
"loss": 0.0016,
"step": 47
},
{
"clip_ratio": 0.0013220456894487143,
"epoch": 0.03358992302309307,
"grad_norm": 0.028415415852515197,
"learning_rate": 8.391608391608393e-06,
"loss": 0.0015,
"step": 48
},
{
"clip_ratio": 0.0034361626021564007,
"completion_length": 91.46428680419922,
"epoch": 0.034289713086074175,
"grad_norm": 0.04920502199586618,
"learning_rate": 8.566433566433566e-06,
"loss": 0.0013,
"num_tokens": 253761.0,
"reward": -0.8214285969734192,
"reward_std": 0.15152287483215332,
"rewards/check_winston_local_func/mean": -0.8214285969734192,
"rewards/check_winston_local_func/std": 0.5754727125167847,
"step": 49
},
{
"clip_ratio": 0.0016177111538127065,
"epoch": 0.03498950314905528,
"grad_norm": 0.04933398351205588,
"learning_rate": 8.741258741258741e-06,
"loss": 0.0013,
"step": 50
},
{
"clip_ratio": 0.0023581169079989195,
"epoch": 0.03568929321203639,
"grad_norm": 0.04990239817529589,
"learning_rate": 8.916083916083918e-06,
"loss": 0.0012,
"step": 51
},
{
"clip_ratio": 0.002693318761885166,
"epoch": 0.0363890832750175,
"grad_norm": 0.049258900801895884,
"learning_rate": 9.090909090909091e-06,
"loss": 0.0011,
"step": 52
},
{
"clip_ratio": 0.0013933092122897506,
"completion_length": 76.5,
"epoch": 0.0370888733379986,
"grad_norm": 0.06783434389116598,
"learning_rate": 9.265734265734266e-06,
"loss": 0.0004,
"num_tokens": 270951.0,
"reward": -0.4285714626312256,
"reward_std": 0.4040610194206238,
"rewards/check_winston_local_func/mean": -0.4285714328289032,
"rewards/check_winston_local_func/std": 0.9116845726966858,
"step": 53
},
{
"clip_ratio": 0.0012361541157588363,
"epoch": 0.03778866340097971,
"grad_norm": 0.06739214545276892,
"learning_rate": 9.44055944055944e-06,
"loss": 0.0002,
"step": 54
},
{
"clip_ratio": 0.0011503547430038452,
"epoch": 0.03848845346396081,
"grad_norm": 0.06968777943502087,
"learning_rate": 9.615384615384616e-06,
"loss": -0.0001,
"step": 55
},
{
"clip_ratio": 0.002146858721971512,
"epoch": 0.03918824352694192,
"grad_norm": 0.06632924367628588,
"learning_rate": 9.79020979020979e-06,
"loss": -0.0001,
"step": 56
},
{
"clip_ratio": 0.002248402452096343,
"completion_length": 100.05357360839844,
"epoch": 0.03988803358992302,
"grad_norm": 0.03339030440662342,
"learning_rate": 9.965034965034966e-06,
"loss": 0.0007,
"num_tokens": 291902.0,
"reward": -0.4285714626312256,
"reward_std": 0.10101525485515594,
"rewards/check_winston_local_func/mean": -0.4285714328289032,
"rewards/check_winston_local_func/std": 0.9116845726966858,
"step": 57
},
{
"clip_ratio": 0.002410450717434287,
"epoch": 0.04058782365290413,
"grad_norm": 0.03407166704900621,
"learning_rate": 1.013986013986014e-05,
"loss": 0.0008,
"step": 58
},
{
"clip_ratio": 0.0027997640427201986,
"epoch": 0.041287613715885234,
"grad_norm": 0.03397162163806953,
"learning_rate": 1.0314685314685315e-05,
"loss": 0.0007,
"step": 59
},
{
"clip_ratio": 0.0030621180776506662,
"epoch": 0.04198740377886634,
"grad_norm": 0.034258551127942245,
"learning_rate": 1.048951048951049e-05,
"loss": 0.0007,
"step": 60
},
{
"clip_ratio": 0.0021611705888062716,
"completion_length": 84.55357360839844,
"epoch": 0.042687193841847444,
"grad_norm": 0.04382869653282982,
"learning_rate": 1.0664335664335665e-05,
"loss": 0.0024,
"num_tokens": 310097.0,
"reward": -0.7500000596046448,
"reward_std": 0.15152287483215332,
"rewards/check_winston_local_func/mean": -0.75,
"rewards/check_winston_local_func/std": 0.6674237847328186,
"step": 61
},
{
"clip_ratio": 0.0010437711607664824,
"epoch": 0.04338698390482855,
"grad_norm": 0.04632797121542376,
"learning_rate": 1.0839160839160838e-05,
"loss": 0.0024,
"step": 62
},
{
"clip_ratio": 0.0013273117365315557,
"epoch": 0.044086773967809655,
"grad_norm": 0.046019113122256816,
"learning_rate": 1.1013986013986015e-05,
"loss": 0.0021,
"step": 63
},
{
"clip_ratio": 0.002344512613490224,
"epoch": 0.04478656403079076,
"grad_norm": 0.04434571865803126,
"learning_rate": 1.118881118881119e-05,
"loss": 0.0022,
"step": 64
},
{
"clip_ratio": 0.0014854084001854062,
"completion_length": 93.58928680419922,
"epoch": 0.04548635409377187,
"grad_norm": 0.013792833624525774,
"learning_rate": 1.1363636363636365e-05,
"loss": -0.0006,
"num_tokens": 329004.0,
"reward": -0.8214285969734192,
"reward_std": 0.05050762742757797,
"rewards/check_winston_local_func/mean": -0.8214285969734192,
"rewards/check_winston_local_func/std": 0.5754727125167847,
"step": 65
},
{
"clip_ratio": 0.002451003296300769,
"epoch": 0.04618614415675298,
"grad_norm": 0.01395207894798071,
"learning_rate": 1.153846153846154e-05,
"loss": -0.0006,
"step": 66
},
{
"clip_ratio": 0.0016351536614820361,
"epoch": 0.04688593421973408,
"grad_norm": 0.01316846865814371,
"learning_rate": 1.1713286713286714e-05,
"loss": -0.0006,
"step": 67
},
{
"clip_ratio": 0.0026426080148667097,
"epoch": 0.04758572428271519,
"grad_norm": 0.013837974578886334,
"learning_rate": 1.188811188811189e-05,
"loss": -0.0006,
"step": 68
},
{
"clip_ratio": 0.0009010470239445567,
"completion_length": 95.62500762939453,
"epoch": 0.04828551434569629,
"grad_norm": 0.08072032529486632,
"learning_rate": 1.2062937062937063e-05,
"loss": 0.0018,
"num_tokens": 349081.0,
"reward": -0.7142857313156128,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.7142857313156128,
"rewards/check_winston_local_func/std": 0.7061878442764282,
"step": 69
},
{
"clip_ratio": 0.0022199582308530807,
"epoch": 0.0489853044086774,
"grad_norm": 0.0818785835069056,
"learning_rate": 1.2237762237762239e-05,
"loss": 0.0014,
"step": 70
},
{
"clip_ratio": 0.0017290068790316582,
"epoch": 0.0496850944716585,
"grad_norm": 0.07902681914542756,
"learning_rate": 1.2412587412587414e-05,
"loss": 0.001,
"step": 71
},
{
"clip_ratio": 0.0016852362314239144,
"epoch": 0.05038488453463961,
"grad_norm": 0.08449847550325483,
"learning_rate": 1.2587412587412589e-05,
"loss": 0.0005,
"step": 72
},
{
"clip_ratio": 0.0020247853826731443,
"completion_length": 91.78572082519531,
"epoch": 0.05108467459762071,
"grad_norm": 0.04909551324890117,
"learning_rate": 1.2762237762237764e-05,
"loss": -0.003,
"num_tokens": 367819.0,
"reward": -0.5,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.5,
"rewards/check_winston_local_func/std": 0.8738628625869751,
"step": 73
},
{
"clip_ratio": 0.0034576961770653725,
"epoch": 0.05178446466060182,
"grad_norm": 0.050114477186976265,
"learning_rate": 1.2937062937062939e-05,
"loss": -0.0031,
"step": 74
},
{
"clip_ratio": 0.0019642652478069067,
"epoch": 0.052484254723582924,
"grad_norm": 0.05018386747388819,
"learning_rate": 1.3111888111888112e-05,
"loss": -0.0033,
"step": 75
},
{
"clip_ratio": 0.004161330871284008,
"epoch": 0.05318404478656403,
"grad_norm": 0.05121415635619661,
"learning_rate": 1.3286713286713287e-05,
"loss": -0.0037,
"step": 76
},
{
"clip_ratio": 0.0017616358818486333,
"completion_length": 106.50000762939453,
"epoch": 0.053883834849545134,
"grad_norm": 0.05530735796526797,
"learning_rate": 1.3461538461538462e-05,
"loss": -0.0041,
"num_tokens": 388977.0,
"reward": -0.6428571939468384,
"reward_std": 0.10101525485515594,
"rewards/check_winston_local_func/mean": -0.6428571343421936,
"rewards/check_winston_local_func/std": 0.7729182839393616,
"step": 77
},
{
"clip_ratio": 0.0018110191449522972,
"epoch": 0.05458362491252624,
"grad_norm": 0.05608373724379992,
"learning_rate": 1.3636363636363637e-05,
"loss": -0.0042,
"step": 78
},
{
"clip_ratio": 0.003877634182572365,
"epoch": 0.055283414975507345,
"grad_norm": 0.04218829661702587,
"learning_rate": 1.381118881118881e-05,
"loss": -0.0044,
"step": 79
},
{
"clip_ratio": 0.003191744675859809,
"epoch": 0.05598320503848846,
"grad_norm": 0.04293784405855666,
"learning_rate": 1.3986013986013988e-05,
"loss": -0.0046,
"step": 80
},
{
"clip_ratio": 0.002192563144490123,
"completion_length": 102.51786041259766,
"epoch": 0.05668299510146956,
"grad_norm": 0.01621220365623996,
"learning_rate": 1.4160839160839163e-05,
"loss": -0.0011,
"num_tokens": 409804.0,
"reward": -0.8214285969734192,
"reward_std": 0.05050762742757797,
"rewards/check_winston_local_func/mean": -0.8214285969734192,
"rewards/check_winston_local_func/std": 0.5754727125167847,
"step": 81
},
{
"clip_ratio": 0.0021092891693115234,
"epoch": 0.05738278516445067,
"grad_norm": 0.016235676972647724,
"learning_rate": 1.4335664335664336e-05,
"loss": -0.0012,
"step": 82
},
{
"clip_ratio": 0.0023684012703597546,
"epoch": 0.05808257522743177,
"grad_norm": 0.016915613552120477,
"learning_rate": 1.4510489510489511e-05,
"loss": -0.0011,
"step": 83
},
{
"clip_ratio": 0.002553236670792103,
"epoch": 0.05878236529041288,
"grad_norm": 0.016878300731711607,
"learning_rate": 1.4685314685314686e-05,
"loss": -0.0012,
"step": 84
},
{
"clip_ratio": 0.0023020573426038027,
"completion_length": 95.33928680419922,
"epoch": 0.05948215535339398,
"grad_norm": 0.09922403654931192,
"learning_rate": 1.486013986013986e-05,
"loss": 0.0034,
"num_tokens": 429265.0,
"reward": -0.392857164144516,
"reward_std": 0.3535533845424652,
"rewards/check_winston_local_func/mean": -0.3928571343421936,
"rewards/check_winston_local_func/std": 0.9279217720031738,
"step": 85
},
{
"clip_ratio": 0.002049357397481799,
"epoch": 0.06018194541637509,
"grad_norm": 0.0820235579491076,
"learning_rate": 1.5034965034965034e-05,
"loss": 0.0032,
"step": 86
},
{
"clip_ratio": 0.003993889316916466,
"epoch": 0.06088173547935619,
"grad_norm": 0.07572179365943402,
"learning_rate": 1.5209790209790212e-05,
"loss": 0.0028,
"step": 87
},
{
"clip_ratio": 0.004023912828415632,
"epoch": 0.0615815255423373,
"grad_norm": 0.0786883863698215,
"learning_rate": 1.5384615384615387e-05,
"loss": 0.0025,
"step": 88
},
{
"clip_ratio": 0.0012786961160600185,
"completion_length": 101.01786041259766,
"epoch": 0.0622813156053184,
"grad_norm": 0.13944075765142377,
"learning_rate": 1.555944055944056e-05,
"loss": -0.0059,
"num_tokens": 449712.0,
"reward": -0.4642857313156128,
"reward_std": 0.3535533845424652,
"rewards/check_winston_local_func/mean": -0.4642857015132904,
"rewards/check_winston_local_func/std": 0.893700897693634,
"step": 89
},
{
"clip_ratio": 0.0012525760103017092,
"epoch": 0.06298110566829951,
"grad_norm": 0.14046611947962784,
"learning_rate": 1.5734265734265734e-05,
"loss": -0.0062,
"step": 90
},
{
"clip_ratio": 0.003111109836027026,
"epoch": 0.06368089573128062,
"grad_norm": 0.13042169819458227,
"learning_rate": 1.590909090909091e-05,
"loss": -0.0079,
"step": 91
},
{
"clip_ratio": 0.013328815810382366,
"epoch": 0.06438068579426172,
"grad_norm": 0.0955558239371001,
"learning_rate": 1.6083916083916083e-05,
"loss": -0.0092,
"step": 92
},
{
"clip_ratio": 0.0021473567467182875,
"completion_length": 80.30357360839844,
"epoch": 0.06508047585724283,
"grad_norm": 0.17357283543483568,
"learning_rate": 1.625874125874126e-05,
"loss": 0.0104,
"num_tokens": 466947.0,
"reward": -0.2142857313156128,
"reward_std": 0.4040610194206238,
"rewards/check_winston_local_func/mean": -0.2142857164144516,
"rewards/check_winston_local_func/std": 0.9856107234954834,
"step": 93
},
{
"clip_ratio": 0.004891776479780674,
"epoch": 0.06578026592022393,
"grad_norm": 0.16677719867011565,
"learning_rate": 1.6433566433566433e-05,
"loss": 0.0089,
"step": 94
},
{
"clip_ratio": 0.008591952733695507,
"epoch": 0.06648005598320504,
"grad_norm": 0.15657656176787582,
"learning_rate": 1.660839160839161e-05,
"loss": 0.0066,
"step": 95
},
{
"clip_ratio": 0.017924649640917778,
"epoch": 0.06717984604618614,
"grad_norm": 0.1468099989251008,
"learning_rate": 1.6783216783216786e-05,
"loss": 0.004,
"step": 96
},
{
"clip_ratio": 0.0016577127389609814,
"completion_length": 120.50000762939453,
"epoch": 0.06787963610916725,
"grad_norm": 0.06674060360416964,
"learning_rate": 1.695804195804196e-05,
"loss": -0.001,
"num_tokens": 489863.0,
"reward": -0.785714328289032,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.7857142686843872,
"rewards/check_winston_local_func/std": 0.6241878271102905,
"step": 97
},
{
"clip_ratio": 0.003024409292265773,
"epoch": 0.06857942617214835,
"grad_norm": 0.05912491262092969,
"learning_rate": 1.7132867132867133e-05,
"loss": -0.0015,
"step": 98
},
{
"clip_ratio": 0.004554019309580326,
"epoch": 0.06927921623512946,
"grad_norm": 0.05430919883014471,
"learning_rate": 1.730769230769231e-05,
"loss": -0.0019,
"step": 99
},
{
"clip_ratio": 0.00875174906104803,
"epoch": 0.06997900629811056,
"grad_norm": 0.04764899914092442,
"learning_rate": 1.7482517482517483e-05,
"loss": -0.0025,
"step": 100
},
{
"clip_ratio": 0.0022144827526062727,
"completion_length": 85.92857360839844,
"epoch": 0.07067879636109167,
"grad_norm": 0.09514652279620683,
"learning_rate": 1.7657342657342656e-05,
"loss": 0.0045,
"num_tokens": 508189.0,
"reward": -0.535714328289032,
"reward_std": 0.3535534143447876,
"rewards/check_winston_local_func/mean": -0.5357142686843872,
"rewards/check_winston_local_func/std": 0.8520411252975464,
"step": 101
},
{
"clip_ratio": 0.004405450075864792,
"epoch": 0.07137858642407278,
"grad_norm": 0.08384041265838117,
"learning_rate": 1.7832167832167836e-05,
"loss": 0.004,
"step": 102
},
{
"clip_ratio": 0.007100887596607208,
"epoch": 0.07207837648705388,
"grad_norm": 0.07838152678466788,
"learning_rate": 1.800699300699301e-05,
"loss": 0.0031,
"step": 103
},
{
"clip_ratio": 0.012806176207959652,
"epoch": 0.072778166550035,
"grad_norm": 0.054761747581467624,
"learning_rate": 1.8181818181818182e-05,
"loss": 0.0022,
"step": 104
},
{
"clip_ratio": 0.002409872133284807,
"completion_length": 89.16072082519531,
"epoch": 0.0734779566130161,
"grad_norm": 0.088338886024993,
"learning_rate": 1.835664335664336e-05,
"loss": -0.0024,
"num_tokens": 526646.0,
"reward": -0.5,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.5,
"rewards/check_winston_local_func/std": 0.8738628625869751,
"step": 105
},
{
"clip_ratio": 0.003525706473737955,
"epoch": 0.0741777466759972,
"grad_norm": 0.08578242655426072,
"learning_rate": 1.8531468531468532e-05,
"loss": -0.0031,
"step": 106
},
{
"clip_ratio": 0.010289273224771023,
"epoch": 0.0748775367389783,
"grad_norm": 0.07249149477931406,
"learning_rate": 1.8706293706293705e-05,
"loss": -0.0043,
"step": 107
},
{
"clip_ratio": 0.027354398742318153,
"epoch": 0.07557732680195942,
"grad_norm": 0.059934840975274094,
"learning_rate": 1.888111888111888e-05,
"loss": -0.005,
"step": 108
},
{
"clip_ratio": 0.002047365065664053,
"completion_length": 99.67857360839844,
"epoch": 0.07627711686494051,
"grad_norm": 0.052068804469643425,
"learning_rate": 1.9055944055944055e-05,
"loss": -0.0002,
"num_tokens": 546720.0,
"reward": -0.7142857313156128,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.7142857313156128,
"rewards/check_winston_local_func/std": 0.7061878442764282,
"step": 109
},
{
"clip_ratio": 0.006073285825550556,
"epoch": 0.07697690692792163,
"grad_norm": 0.053601884682000965,
"learning_rate": 1.923076923076923e-05,
"loss": -0.0004,
"step": 110
},
{
"clip_ratio": 0.01850634068250656,
"epoch": 0.07767669699090272,
"grad_norm": 0.0262467926642497,
"learning_rate": 1.9405594405594408e-05,
"loss": -0.0006,
"step": 111
},
{
"clip_ratio": 0.02557740919291973,
"epoch": 0.07837648705388384,
"grad_norm": 0.028862292431493224,
"learning_rate": 1.958041958041958e-05,
"loss": -0.0007,
"step": 112
},
{
"clip_ratio": 0.0017085629515349865,
"completion_length": 102.46428680419922,
"epoch": 0.07907627711686493,
"grad_norm": 0.047073786173530204,
"learning_rate": 1.9755244755244758e-05,
"loss": 0.0003,
"num_tokens": 567098.0,
"reward": -0.6428571939468384,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.6428571343421936,
"rewards/check_winston_local_func/std": 0.7729182839393616,
"step": 113
},
{
"clip_ratio": 0.0032013251911848783,
"epoch": 0.07977606717984605,
"grad_norm": 0.04603497030180295,
"learning_rate": 1.993006993006993e-05,
"loss": 0.0,
"step": 114
},
{
"clip_ratio": 0.00821536686271429,
"epoch": 0.08047585724282715,
"grad_norm": 0.04227017570834447,
"learning_rate": 2.0104895104895104e-05,
"loss": -0.0003,
"step": 115
},
{
"clip_ratio": 0.020387563854455948,
"epoch": 0.08117564730580826,
"grad_norm": 0.03197828312555116,
"learning_rate": 2.027972027972028e-05,
"loss": -0.0007,
"step": 116
},
{
"clip_ratio": 0.002021044958382845,
"completion_length": 81.4464340209961,
"epoch": 0.08187543736878937,
"grad_norm": 0.13324413388648826,
"learning_rate": 2.0454545454545457e-05,
"loss": 0.0049,
"num_tokens": 584845.0,
"reward": -0.4285714626312256,
"reward_std": 0.4040610194206238,
"rewards/check_winston_local_func/mean": -0.4285714328289032,
"rewards/check_winston_local_func/std": 0.9116845726966858,
"step": 117
},
{
"clip_ratio": 0.01248109433799982,
"epoch": 0.08257522743177047,
"grad_norm": 0.11616979541003405,
"learning_rate": 2.062937062937063e-05,
"loss": 0.0033,
"step": 118
},
{
"clip_ratio": 0.03872568532824516,
"epoch": 0.08327501749475158,
"grad_norm": 0.08932212762248723,
"learning_rate": 2.0804195804195807e-05,
"loss": 0.0016,
"step": 119
},
{
"clip_ratio": 0.0671406015753746,
"epoch": 0.08397480755773268,
"grad_norm": 0.07829121992484567,
"learning_rate": 2.097902097902098e-05,
"loss": 0.0001,
"step": 120
},
{
"clip_ratio": 0.0027903958689421415,
"completion_length": 109.67857360839844,
"epoch": 0.08467459762071379,
"grad_norm": 0.10038881852522909,
"learning_rate": 2.1153846153846154e-05,
"loss": 0.0027,
"num_tokens": 606389.0,
"reward": -0.1071428656578064,
"reward_std": 0.05050762742757797,
"rewards/check_winston_local_func/mean": -0.1071428582072258,
"rewards/check_winston_local_func/std": 1.0032415390014648,
"step": 121
},
{
"clip_ratio": 0.010223714634776115,
"epoch": 0.08537438768369489,
"grad_norm": 0.08037774781050448,
"learning_rate": 2.132867132867133e-05,
"loss": 0.0017,
"step": 122
},
{
"clip_ratio": 0.02510545216500759,
"epoch": 0.086074177746676,
"grad_norm": 0.06594532056416831,
"learning_rate": 2.1503496503496503e-05,
"loss": 0.0009,
"step": 123
},
{
"clip_ratio": 0.04746328294277191,
"epoch": 0.0867739678096571,
"grad_norm": 0.0495169500454822,
"learning_rate": 2.1678321678321677e-05,
"loss": 0.0004,
"step": 124
},
{
"clip_ratio": 0.002552854595705867,
"completion_length": 92.92857360839844,
"epoch": 0.08747375787263821,
"grad_norm": 0.07383050554477533,
"learning_rate": 2.1853146853146857e-05,
"loss": -0.003,
"num_tokens": 625723.0,
"reward": -0.392857164144516,
"reward_std": 0.05050762742757797,
"rewards/check_winston_local_func/mean": -0.3928571343421936,
"rewards/check_winston_local_func/std": 0.9279217720031738,
"step": 125
},
{
"clip_ratio": 0.017842039465904236,
"epoch": 0.08817354793561931,
"grad_norm": 0.043649507012091936,
"learning_rate": 2.202797202797203e-05,
"loss": -0.0037,
"step": 126
},
{
"clip_ratio": 0.045183245092630386,
"epoch": 0.08887333799860042,
"grad_norm": 0.03451108201943257,
"learning_rate": 2.2202797202797203e-05,
"loss": -0.0039,
"step": 127
},
{
"clip_ratio": 0.0701090469956398,
"epoch": 0.08957312806158152,
"grad_norm": 0.023061406081443397,
"learning_rate": 2.237762237762238e-05,
"loss": -0.0041,
"step": 128
},
{
"clip_ratio": 0.0010595758212730289,
"completion_length": 103.25000762939453,
"epoch": 0.09027291812456263,
"grad_norm": 0.15282505586734968,
"learning_rate": 2.2552447552447553e-05,
"loss": 0.0011,
"num_tokens": 646391.0,
"reward": -0.1071428656578064,
"reward_std": 0.45456868410110474,
"rewards/check_winston_local_func/mean": -0.1071428582072258,
"rewards/check_winston_local_func/std": 1.0032414197921753,
"step": 129
},
{
"clip_ratio": 0.015386571176350117,
"epoch": 0.09097270818754374,
"grad_norm": 0.13868014978577842,
"learning_rate": 2.272727272727273e-05,
"loss": -0.0008,
"step": 130
},
{
"clip_ratio": 0.025238754227757454,
"epoch": 0.09167249825052484,
"grad_norm": 0.12901702065673692,
"learning_rate": 2.2902097902097902e-05,
"loss": -0.0033,
"step": 131
},
{
"clip_ratio": 0.026391755789518356,
"epoch": 0.09237228831350595,
"grad_norm": 0.09971213845352783,
"learning_rate": 2.307692307692308e-05,
"loss": -0.006,
"step": 132
},
{
"clip_ratio": 0.0035059447400271893,
"completion_length": 98.33928680419922,
"epoch": 0.09307207837648705,
"grad_norm": 0.11128060046799104,
"learning_rate": 2.3251748251748252e-05,
"loss": -0.0018,
"num_tokens": 666114.0,
"reward": -0.3571428656578064,
"reward_std": 0.30304574966430664,
"rewards/check_winston_local_func/mean": -0.3571428656578064,
"rewards/check_winston_local_func/std": 0.9425028562545776,
"step": 133
},
{
"clip_ratio": 0.0042843748815357685,
"epoch": 0.09377186843946816,
"grad_norm": 0.1065458455684171,
"learning_rate": 2.342657342657343e-05,
"loss": -0.0028,
"step": 134
},
{
"clip_ratio": 0.01338073518127203,
"epoch": 0.09447165850244926,
"grad_norm": 0.07422771205194853,
"learning_rate": 2.3601398601398602e-05,
"loss": -0.0042,
"step": 135
},
{
"clip_ratio": 0.02260064147412777,
"epoch": 0.09517144856543037,
"grad_norm": 0.05031624319039464,
"learning_rate": 2.377622377622378e-05,
"loss": -0.0051,
"step": 136
},
{
"clip_ratio": 0.0022608404979109764,
"completion_length": 87.00000762939453,
"epoch": 0.09587123862841147,
"grad_norm": 0.17254445638790264,
"learning_rate": 2.3951048951048952e-05,
"loss": -0.0094,
"num_tokens": 684454.0,
"reward": -0.0357142873108387,
"reward_std": 0.3535533845424652,
"rewards/check_winston_local_func/mean": -0.0357142873108387,
"rewards/check_winston_local_func/std": 1.0084062814712524,
"step": 137
},
{
"clip_ratio": 0.011511722579598427,
"epoch": 0.09657102869139259,
"grad_norm": 0.146693566733721,
"learning_rate": 2.4125874125874125e-05,
"loss": -0.0129,
"step": 138
},
{
"clip_ratio": 0.0310056172311306,
"epoch": 0.09727081875437368,
"grad_norm": 0.1269304320317103,
"learning_rate": 2.43006993006993e-05,
"loss": -0.0159,
"step": 139
},
{
"clip_ratio": 0.04163637384772301,
"epoch": 0.0979706088173548,
"grad_norm": 0.09818573191869126,
"learning_rate": 2.4475524475524478e-05,
"loss": -0.0187,
"step": 140
},
{
"clip_ratio": 0.0025228250306099653,
"completion_length": 104.9464340209961,
"epoch": 0.0986703988803359,
"grad_norm": 0.1003012262187849,
"learning_rate": 2.465034965034965e-05,
"loss": -0.0031,
"num_tokens": 705439.0,
"reward": -0.1428571492433548,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.1428571492433548,
"rewards/check_winston_local_func/std": 0.9987004995346069,
"step": 141
},
{
"clip_ratio": 0.012786303646862507,
"epoch": 0.099370188943317,
"grad_norm": 0.08364392907940049,
"learning_rate": 2.4825174825174828e-05,
"loss": -0.0043,
"step": 142
},
{
"clip_ratio": 0.04185020551085472,
"epoch": 0.1000699790062981,
"grad_norm": 0.055411268177771554,
"learning_rate": 2.5e-05,
"loss": -0.0052,
"step": 143
},
{
"clip_ratio": 0.06534933298826218,
"epoch": 0.10076976906927922,
"grad_norm": 0.04043680317744743,
"learning_rate": 2.5174825174825178e-05,
"loss": -0.0057,
"step": 144
},
{
"clip_ratio": 0.00163670489564538,
"completion_length": 79.0,
"epoch": 0.10146955913226033,
"grad_norm": 0.26037723002756274,
"learning_rate": 2.534965034965035e-05,
"loss": -0.0013,
"num_tokens": 722363.0,
"reward": -0.2857142984867096,
"reward_std": 0.4040610194206238,
"rewards/check_winston_local_func/mean": -0.2857142984867096,
"rewards/check_winston_local_func/std": 0.9669875502586365,
"step": 145
},
{
"clip_ratio": 0.010257317684590816,
"epoch": 0.10216934919524143,
"grad_norm": 0.21925125532352843,
"learning_rate": 2.5524475524475528e-05,
"loss": -0.0078,
"step": 146
},
{
"clip_ratio": 0.03979513794183731,
"epoch": 0.10286913925822254,
"grad_norm": 0.1493741677240076,
"learning_rate": 2.5699300699300697e-05,
"loss": -0.014,
"step": 147
},
{
"clip_ratio": 0.06495730578899384,
"epoch": 0.10356892932120364,
"grad_norm": 0.09721255929993584,
"learning_rate": 2.5874125874125877e-05,
"loss": -0.0176,
"step": 148
},
{
"clip_ratio": 0.003729403717443347,
"completion_length": 98.60714721679688,
"epoch": 0.10426871938418475,
"grad_norm": 0.21024181860460278,
"learning_rate": 2.6048951048951047e-05,
"loss": -0.0029,
"num_tokens": 742543.0,
"reward": -0.3214285969734192,
"reward_std": 0.25253814458847046,
"rewards/check_winston_local_func/mean": -0.3214285671710968,
"rewards/check_winston_local_func/std": 0.955503523349762,
"step": 149
},
{
"clip_ratio": 0.011744500137865543,
"epoch": 0.10496850944716585,
"grad_norm": 0.16355956559118884,
"learning_rate": 2.6223776223776224e-05,
"loss": -0.0069,
"step": 150
},
{
"clip_ratio": 0.030442187562584877,
"epoch": 0.10566829951014696,
"grad_norm": 0.1182182136437938,
"learning_rate": 2.6398601398601404e-05,
"loss": -0.0104,
"step": 151
},
{
"clip_ratio": 0.04920857399702072,
"epoch": 0.10636808957312806,
"grad_norm": 0.09156116791582807,
"learning_rate": 2.6573426573426574e-05,
"loss": -0.013,
"step": 152
},
{
"clip_ratio": 0.0027830980252474546,
"completion_length": 101.71428680419922,
"epoch": 0.10706787963610917,
"grad_norm": 0.07071809306988276,
"learning_rate": 2.674825174825175e-05,
"loss": 0.0011,
"num_tokens": 762373.0,
"reward": -0.4285714626312256,
"reward_std": 0.10101525485515594,
"rewards/check_winston_local_func/mean": -0.4285714328289032,
"rewards/check_winston_local_func/std": 0.9116845726966858,
"step": 153
},
{
"clip_ratio": 0.004099779762327671,
"epoch": 0.10776766969909027,
"grad_norm": 0.07036975743996918,
"learning_rate": 2.6923076923076923e-05,
"loss": 0.0006,
"step": 154
},
{
"clip_ratio": 0.011427856050431728,
"epoch": 0.10846745976207138,
"grad_norm": 0.060173537559690966,
"learning_rate": 2.70979020979021e-05,
"loss": -0.0002,
"step": 155
},
{
"clip_ratio": 0.02241707034409046,
"epoch": 0.10916724982505248,
"grad_norm": 0.04464993792503255,
"learning_rate": 2.7272727272727273e-05,
"loss": -0.0007,
"step": 156
},
{
"clip_ratio": 0.003849891945719719,
"completion_length": 126.60714721679688,
"epoch": 0.10986703988803359,
"grad_norm": 0.14247799791087257,
"learning_rate": 2.744755244755245e-05,
"loss": 0.003,
"num_tokens": 785161.0,
"reward": -0.25,
"reward_std": 0.3535533845424652,
"rewards/check_winston_local_func/mean": -0.25,
"rewards/check_winston_local_func/std": 0.9770084023475647,
"step": 157
},
{
"clip_ratio": 0.00803058035671711,
"epoch": 0.11056682995101469,
"grad_norm": 0.13081890796714626,
"learning_rate": 2.762237762237762e-05,
"loss": 0.0006,
"step": 158
},
{
"clip_ratio": 0.02222571335732937,
"epoch": 0.1112666200139958,
"grad_norm": 0.10130551565904075,
"learning_rate": 2.77972027972028e-05,
"loss": -0.0021,
"step": 159
},
{
"clip_ratio": 0.03389605134725571,
"epoch": 0.11196641007697691,
"grad_norm": 0.0756074031523243,
"learning_rate": 2.7972027972027976e-05,
"loss": -0.0047,
"step": 160
},
{
"clip_ratio": 0.003950103186070919,
"completion_length": 104.12500762939453,
"epoch": 0.11266620013995801,
"grad_norm": 0.1290577443710622,
"learning_rate": 2.8146853146853146e-05,
"loss": 0.002,
"num_tokens": 805162.0,
"reward": -0.3571428656578064,
"reward_std": 0.2020305097103119,
"rewards/check_winston_local_func/mean": -0.3571428656578064,
"rewards/check_winston_local_func/std": 0.9425028562545776,
"step": 161
},
{
"clip_ratio": 0.007646625861525536,
"epoch": 0.11336599020293912,
"grad_norm": 0.11025990408745222,
"learning_rate": 2.8321678321678326e-05,
"loss": 0.0002,
"step": 162
},
{
"clip_ratio": 0.01921841874718666,
"epoch": 0.11406578026592022,
"grad_norm": 0.07245932578181155,
"learning_rate": 2.8496503496503496e-05,
"loss": -0.0017,
"step": 163
},
{
"clip_ratio": 0.03461840748786926,
"epoch": 0.11476557032890133,
"grad_norm": 0.051788726865233656,
"learning_rate": 2.8671328671328672e-05,
"loss": -0.0028,
"step": 164
},
{
"clip_ratio": 0.002369140973314643,
"completion_length": 98.73214721679688,
"epoch": 0.11546536039188243,
"grad_norm": 0.1842865637323427,
"learning_rate": 2.8846153846153845e-05,
"loss": 0.0051,
"num_tokens": 825007.0,
"reward": -0.0714285746216774,
"reward_std": 0.4040610194206238,
"rewards/check_winston_local_func/mean": -0.0714285746216774,
"rewards/check_winston_local_func/std": 1.0064724683761597,
"step": 165
},
{
"clip_ratio": 0.012421431951224804,
"epoch": 0.11616515045486354,
"grad_norm": 0.15047989090077732,
"learning_rate": 2.9020979020979022e-05,
"loss": 0.0018,
"step": 166
},
{
"clip_ratio": 0.027025196701288223,
"epoch": 0.11686494051784464,
"grad_norm": 0.09721729056776199,
"learning_rate": 2.91958041958042e-05,
"loss": -0.0012,
"step": 167
},
{
"clip_ratio": 0.04289395734667778,
"epoch": 0.11756473058082575,
"grad_norm": 0.06975068028842074,
"learning_rate": 2.9370629370629372e-05,
"loss": -0.0033,
"step": 168
},
{
"clip_ratio": 0.0018040953436866403,
"completion_length": 105.3214340209961,
"epoch": 0.11826452064380685,
"grad_norm": 0.2677758748707387,
"learning_rate": 2.954545454545455e-05,
"loss": -0.0015,
"num_tokens": 845663.0,
"reward": -0.0714285746216774,
"reward_std": 0.4040609896183014,
"rewards/check_winston_local_func/mean": -0.0714285746216774,
"rewards/check_winston_local_func/std": 1.0064724683761597,
"step": 169
},
{
"clip_ratio": 0.01557006873190403,
"epoch": 0.11896431070678797,
"grad_norm": 0.2023749080142164,
"learning_rate": 2.972027972027972e-05,
"loss": -0.0082,
"step": 170
},
{
"clip_ratio": 0.04334796220064163,
"epoch": 0.11966410076976906,
"grad_norm": 0.12184054679561386,
"learning_rate": 2.9895104895104898e-05,
"loss": -0.0128,
"step": 171
},
{
"clip_ratio": 0.06698625534772873,
"epoch": 0.12036389083275018,
"grad_norm": 0.08481462032815572,
"learning_rate": 3.0069930069930068e-05,
"loss": -0.0152,
"step": 172
},
{
"clip_ratio": 0.002561988076195121,
"completion_length": 106.25000762939453,
"epoch": 0.12106368089573127,
"grad_norm": 0.3222269362637656,
"learning_rate": 3.0244755244755245e-05,
"loss": -0.0225,
"num_tokens": 866213.0,
"reward": -0.1785714328289032,
"reward_std": 0.45456865429878235,
"rewards/check_winston_local_func/mean": -0.1785714328289032,
"rewards/check_winston_local_func/std": 0.9928314685821533,
"step": 173
},
{
"clip_ratio": 0.027860935777425766,
"epoch": 0.12176347095871239,
"grad_norm": 0.23558262189831372,
"learning_rate": 3.0419580419580425e-05,
"loss": -0.0308,
"step": 174
},
{
"clip_ratio": 0.0620122067630291,
"epoch": 0.1224632610216935,
"grad_norm": 0.16018574686601328,
"learning_rate": 3.0594405594405594e-05,
"loss": -0.0363,
"step": 175
},
{
"clip_ratio": 0.0841975286602974,
"epoch": 0.1231630510846746,
"grad_norm": 0.11961313174632249,
"learning_rate": 3.0769230769230774e-05,
"loss": -0.0402,
"step": 176
},
{
"clip_ratio": 0.0038492009043693542,
"completion_length": 112.05357360839844,
"epoch": 0.12386284114765571,
"grad_norm": 0.16841580805802311,
"learning_rate": 3.094405594405594e-05,
"loss": 0.0144,
"num_tokens": 887380.0,
"reward": -0.3571428656578064,
"reward_std": 0.30304574966430664,
"rewards/check_winston_local_func/mean": -0.3571428656578064,
"rewards/check_winston_local_func/std": 0.9425028562545776,
"step": 177
},
{
"clip_ratio": 0.0077889300882816315,
"epoch": 0.1245626312106368,
"grad_norm": 0.14772994247321478,
"learning_rate": 3.111888111888112e-05,
"loss": 0.0116,
"step": 178
},
{
"clip_ratio": 0.024449503049254417,
"epoch": 0.12526242127361792,
"grad_norm": 0.11835892505404014,
"learning_rate": 3.1293706293706294e-05,
"loss": 0.0085,
"step": 179
},
{
"clip_ratio": 0.04173960164189339,
"epoch": 0.12596221133659902,
"grad_norm": 0.07150863048317996,
"learning_rate": 3.146853146853147e-05,
"loss": 0.0065,
"step": 180
},
{
"clip_ratio": 0.0018401921261101961,
"completion_length": 130.8928680419922,
"epoch": 0.12666200139958012,
"grad_norm": 0.19977261410354588,
"learning_rate": 3.164335664335665e-05,
"loss": 0.0079,
"num_tokens": 910956.0,
"reward": -0.2857142984867096,
"reward_std": 0.4040610194206238,
"rewards/check_winston_local_func/mean": -0.2857142984867096,
"rewards/check_winston_local_func/std": 0.9669875502586365,
"step": 181
},
{
"clip_ratio": 0.013327782042324543,
"epoch": 0.12736179146256124,
"grad_norm": 0.18304114191410556,
"learning_rate": 3.181818181818182e-05,
"loss": 0.0037,
"step": 182
},
{
"clip_ratio": 0.035625942051410675,
"epoch": 0.12806158152554234,
"grad_norm": 0.13774519057774026,
"learning_rate": 3.1993006993006994e-05,
"loss": -0.0011,
"step": 183
},
{
"clip_ratio": 0.055436424911022186,
"epoch": 0.12876137158852344,
"grad_norm": 0.1037769963224376,
"learning_rate": 3.216783216783217e-05,
"loss": -0.005,
"step": 184
},
{
"clip_ratio": 0.003232809714972973,
"completion_length": 115.85714721679688,
"epoch": 0.12946116165150454,
"grad_norm": 0.22848421400085006,
"learning_rate": 3.234265734265735e-05,
"loss": -0.002,
"num_tokens": 932532.0,
"reward": -0.0714285746216774,
"reward_std": 0.4040610194206238,
"rewards/check_winston_local_func/mean": -0.0714285746216774,
"rewards/check_winston_local_func/std": 1.0064724683761597,
"step": 185
},
{
"clip_ratio": 0.013981361873447895,
"epoch": 0.13016095171448566,
"grad_norm": 0.14544684282491208,
"learning_rate": 3.251748251748252e-05,
"loss": -0.006,
"step": 186
},
{
"clip_ratio": 0.032285287976264954,
"epoch": 0.13086074177746676,
"grad_norm": 0.11489336548569441,
"learning_rate": 3.269230769230769e-05,
"loss": -0.0092,
"step": 187
},
{
"clip_ratio": 0.05701437592506409,
"epoch": 0.13156053184044786,
"grad_norm": 0.08462838304844368,
"learning_rate": 3.2867132867132866e-05,
"loss": -0.0115,
"step": 188
},
{
"clip_ratio": 0.0032829714473336935,
"completion_length": 81.66072082519531,
"epoch": 0.13226032190342898,
"grad_norm": 0.33315610129233025,
"learning_rate": 3.3041958041958046e-05,
"loss": -0.0109,
"num_tokens": 950027.0,
"reward": 0.1428571492433548,
"reward_std": 0.5050762891769409,
"rewards/check_winston_local_func/mean": 0.1428571492433548,
"rewards/check_winston_local_func/std": 0.9987004399299622,
"step": 189
},
{
"clip_ratio": 0.02284400910139084,
"epoch": 0.13296011196641008,
"grad_norm": 0.25411099199841936,
"learning_rate": 3.321678321678322e-05,
"loss": -0.0211,
"step": 190
},
{
"clip_ratio": 0.06905340403318405,
"epoch": 0.13365990202939118,
"grad_norm": 0.17966938259048712,
"learning_rate": 3.339160839160839e-05,
"loss": -0.0292,
"step": 191
},
{
"clip_ratio": 0.10486488789319992,
"epoch": 0.13435969209237228,
"grad_norm": 0.12395870104426593,
"learning_rate": 3.356643356643357e-05,
"loss": -0.0345,
"step": 192
},
{
"clip_ratio": 0.0039912075735628605,
"completion_length": 98.53572082519531,
"epoch": 0.1350594821553534,
"grad_norm": 0.19967198347280601,
"learning_rate": 3.374125874125874e-05,
"loss": -0.0034,
"num_tokens": 969635.0,
"reward": -0.2142857313156128,
"reward_std": 0.4040610194206238,
"rewards/check_winston_local_func/mean": -0.2142857164144516,
"rewards/check_winston_local_func/std": 0.9856107234954834,
"step": 193
},
{
"clip_ratio": 0.015699883922934532,
"epoch": 0.1357592722183345,
"grad_norm": 0.14403516372623493,
"learning_rate": 3.391608391608392e-05,
"loss": -0.0078,
"step": 194
},
{
"clip_ratio": 0.0434185229241848,
"epoch": 0.1364590622813156,
"grad_norm": 0.09019420685306537,
"learning_rate": 3.409090909090909e-05,
"loss": -0.0106,
"step": 195
},
{
"clip_ratio": 0.06262689083814621,
"epoch": 0.1371588523442967,
"grad_norm": 0.06525358070480064,
"learning_rate": 3.4265734265734265e-05,
"loss": -0.012,
"step": 196
},
{
"clip_ratio": 0.0026181330904364586,
"completion_length": 112.64286041259766,
"epoch": 0.13785864240727783,
"grad_norm": 0.26666733225679445,
"learning_rate": 3.4440559440559445e-05,
"loss": -0.0005,
"num_tokens": 990877.0,
"reward": -0.0357142873108387,
"reward_std": 0.45456865429878235,
"rewards/check_winston_local_func/mean": -0.0357142873108387,
"rewards/check_winston_local_func/std": 1.0084062814712524,
"step": 197
},
{
"clip_ratio": 0.02237066999077797,
"epoch": 0.13855843247025892,
"grad_norm": 0.196293468974288,
"learning_rate": 3.461538461538462e-05,
"loss": -0.0081,
"step": 198
},
{
"clip_ratio": 0.05692709609866142,
"epoch": 0.13925822253324002,
"grad_norm": 0.12274152758367594,
"learning_rate": 3.479020979020979e-05,
"loss": -0.0128,
"step": 199
},
{
"clip_ratio": 0.07971518486738205,
"epoch": 0.13995801259622112,
"grad_norm": 0.08223267167561776,
"learning_rate": 3.4965034965034965e-05,
"loss": -0.0154,
"step": 200
}
],
"logging_steps": 1,
"max_steps": 5716,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}