Nemo-Recwnt / checkpoint-200 /trainer_state.json

Upload folder using huggingface_hub

5f7c21d verified about 1 year ago

58.4 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.13995801259622112,
	"eval_steps": 500,
	"global_step": 200,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"clip_ratio": 0.0,
	"completion_length": 92.1964340209961,
	"epoch": 0.0006997900629811056,
	"grad_norm": 0.04008340386412661,
	"learning_rate": 1.7482517482517484e-07,
	"loss": -0.0004,
	"num_tokens": 19229.0,
	"reward": -0.3571428656578064,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.3571428656578064,
	"rewards/check_winston_local_func/std": 0.9425028562545776,
	"step": 1
	},
	{
	"clip_ratio": 0.0,
	"epoch": 0.0013995801259622112,
	"grad_norm": 0.04044301640894262,
	"learning_rate": 3.496503496503497e-07,
	"loss": -0.0004,
	"step": 2
	},
	{
	"clip_ratio": 0.0026809382252395153,
	"epoch": 0.002099370188943317,
	"grad_norm": 0.03858326410821281,
	"learning_rate": 5.244755244755246e-07,
	"loss": -0.0004,
	"step": 3
	},
	{
	"clip_ratio": 0.002734784735366702,
	"epoch": 0.0027991602519244225,
	"grad_norm": 0.03954530218881001,
	"learning_rate": 6.993006993006994e-07,
	"loss": -0.0004,
	"step": 4
	},
	{
	"clip_ratio": 0.003463542787358165,
	"completion_length": 100.78572082519531,
	"epoch": 0.0034989503149055285,
	"grad_norm": 0.03131731501094277,
	"learning_rate": 8.741258741258743e-07,
	"loss": -0.0015,
	"num_tokens": 39549.0,
	"reward": -0.392857164144516,
	"reward_std": 0.15152287483215332,
	"rewards/check_winston_local_func/mean": -0.3928571343421936,
	"rewards/check_winston_local_func/std": 0.9279217720031738,
	"step": 5
	},
	{
	"clip_ratio": 0.001506845816038549,
	"epoch": 0.004198740377886634,
	"grad_norm": 0.031304121161173655,
	"learning_rate": 1.0489510489510491e-06,
	"loss": -0.0013,
	"step": 6
	},
	{
	"clip_ratio": 0.0035776374861598015,
	"epoch": 0.00489853044086774,
	"grad_norm": 0.0317402782217819,
	"learning_rate": 1.2237762237762238e-06,
	"loss": -0.0014,
	"step": 7
	},
	{
	"clip_ratio": 0.0017960710683837533,
	"epoch": 0.005598320503848845,
	"grad_norm": 0.03238973734129298,
	"learning_rate": 1.3986013986013987e-06,
	"loss": -0.0014,
	"step": 8
	},
	{
	"clip_ratio": 0.0011830016737803817,
	"completion_length": 71.98214721679688,
	"epoch": 0.006298110566829951,
	"grad_norm": 0.07179208315805877,
	"learning_rate": 1.5734265734265736e-06,
	"loss": 0.0047,
	"num_tokens": 56096.0,
	"reward": -0.5714285969734192,
	"reward_std": 0.30304574966430664,
	"rewards/check_winston_local_func/mean": -0.5714285969734192,
	"rewards/check_winston_local_func/std": 0.82807856798172,
	"step": 9
	},
	{
	"clip_ratio": 0.0014540323754772544,
	"epoch": 0.006997900629811057,
	"grad_norm": 0.07210672978675704,
	"learning_rate": 1.7482517482517485e-06,
	"loss": 0.0043,
	"step": 10
	},
	{
	"clip_ratio": 0.0009645656682550907,
	"epoch": 0.007697690692792162,
	"grad_norm": 0.07154328122740576,
	"learning_rate": 1.9230769230769234e-06,
	"loss": 0.0048,
	"step": 11
	},
	{
	"clip_ratio": 0.0012382904533296824,
	"epoch": 0.008397480755773267,
	"grad_norm": 0.07206324717584058,
	"learning_rate": 2.0979020979020983e-06,
	"loss": 0.0045,
	"step": 12
	},
	{
	"clip_ratio": 0.0024568967055529356,
	"completion_length": 99.33928680419922,
	"epoch": 0.009097270818754374,
	"grad_norm": 0.10630346643398497,
	"learning_rate": 2.2727272727272728e-06,
	"loss": 0.0041,
	"num_tokens": 76111.0,
	"reward": -0.5,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.5,
	"rewards/check_winston_local_func/std": 0.8738628625869751,
	"step": 13
	},
	{
	"clip_ratio": 0.0018984745256602764,
	"epoch": 0.00979706088173548,
	"grad_norm": 0.10601918892837382,
	"learning_rate": 2.4475524475524477e-06,
	"loss": 0.0037,
	"step": 14
	},
	{
	"clip_ratio": 0.0020660855807363987,
	"epoch": 0.010496850944716585,
	"grad_norm": 0.10309105028697467,
	"learning_rate": 2.6223776223776225e-06,
	"loss": 0.0038,
	"step": 15
	},
	{
	"clip_ratio": 0.002301447093486786,
	"epoch": 0.01119664100769769,
	"grad_norm": 0.10289876702541416,
	"learning_rate": 2.7972027972027974e-06,
	"loss": 0.0038,
	"step": 16
	},
	{
	"clip_ratio": 0.002995749469846487,
	"completion_length": 104.10714721679688,
	"epoch": 0.011896431070678797,
	"grad_norm": 0.02394212165465065,
	"learning_rate": 2.9720279720279723e-06,
	"loss": -0.0016,
	"num_tokens": 97045.0,
	"reward": -0.5714285969734192,
	"reward_std": 0.10101525485515594,
	"rewards/check_winston_local_func/mean": -0.5714285969734192,
	"rewards/check_winston_local_func/std": 0.82807856798172,
	"step": 17
	},
	{
	"clip_ratio": 0.0021103813778609037,
	"epoch": 0.012596221133659902,
	"grad_norm": 0.023576991661382562,
	"learning_rate": 3.1468531468531472e-06,
	"loss": -0.0016,
	"step": 18
	},
	{
	"clip_ratio": 0.002064172876998782,
	"epoch": 0.013296011196641007,
	"grad_norm": 0.023272394799082628,
	"learning_rate": 3.3216783216783217e-06,
	"loss": -0.0017,
	"step": 19
	},
	{
	"clip_ratio": 0.0035561085678637028,
	"epoch": 0.013995801259622114,
	"grad_norm": 0.023832453115006213,
	"learning_rate": 3.496503496503497e-06,
	"loss": -0.0016,
	"step": 20
	},
	{
	"clip_ratio": 0.0015116453869268298,
	"completion_length": 101.58928680419922,
	"epoch": 0.01469559132260322,
	"grad_norm": 0.11810094542561876,
	"learning_rate": 3.6713286713286715e-06,
	"loss": 0.0183,
	"num_tokens": 117256.0,
	"reward": -0.4642857313156128,
	"reward_std": 0.45456865429878235,
	"rewards/check_winston_local_func/mean": -0.4642857015132904,
	"rewards/check_winston_local_func/std": 0.893700897693634,
	"step": 21
	},
	{
	"clip_ratio": 0.0015069997170940042,
	"epoch": 0.015395381385584325,
	"grad_norm": 0.11627287333325324,
	"learning_rate": 3.846153846153847e-06,
	"loss": 0.0183,
	"step": 22
	},
	{
	"clip_ratio": 0.0024017037358134985,
	"epoch": 0.01609517144856543,
	"grad_norm": 0.11747415252739393,
	"learning_rate": 4.020979020979021e-06,
	"loss": 0.0185,
	"step": 23
	},
	{
	"clip_ratio": 0.0012106437934562564,
	"epoch": 0.016794961511546535,
	"grad_norm": 0.11886525001745403,
	"learning_rate": 4.195804195804197e-06,
	"loss": 0.0187,
	"step": 24
	},
	{
	"clip_ratio": 0.00203885231167078,
	"completion_length": 105.55357360839844,
	"epoch": 0.01749475157452764,
	"grad_norm": 0.054683142805963834,
	"learning_rate": 4.370629370629371e-06,
	"loss": 0.0025,
	"num_tokens": 138225.0,
	"reward": -0.785714328289032,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.7857142686843872,
	"rewards/check_winston_local_func/std": 0.6241878271102905,
	"step": 25
	},
	{
	"clip_ratio": 0.0020559704862535,
	"epoch": 0.01819454163750875,
	"grad_norm": 0.05445975538463714,
	"learning_rate": 4.5454545454545455e-06,
	"loss": 0.0023,
	"step": 26
	},
	{
	"clip_ratio": 0.0019049489637836814,
	"epoch": 0.018894331700489854,
	"grad_norm": 0.0535587329596401,
	"learning_rate": 4.72027972027972e-06,
	"loss": 0.0024,
	"step": 27
	},
	{
	"clip_ratio": 0.002319059334695339,
	"epoch": 0.01959412176347096,
	"grad_norm": 0.0517636030603052,
	"learning_rate": 4.895104895104895e-06,
	"loss": 0.0024,
	"step": 28
	},
	{
	"clip_ratio": 0.000747890502680093,
	"completion_length": 64.1964340209961,
	"epoch": 0.020293911826452064,
	"grad_norm": 0.13000888889789194,
	"learning_rate": 5.06993006993007e-06,
	"loss": -0.0059,
	"num_tokens": 154256.0,
	"reward": -0.25,
	"reward_std": 0.25253814458847046,
	"rewards/check_winston_local_func/mean": -0.25,
	"rewards/check_winston_local_func/std": 0.9770084023475647,
	"step": 29
	},
	{
	"clip_ratio": 0.0015584274660795927,
	"epoch": 0.02099370188943317,
	"grad_norm": 0.13318739478301467,
	"learning_rate": 5.244755244755245e-06,
	"loss": -0.0058,
	"step": 30
	},
	{
	"clip_ratio": 0.001018411829136312,
	"epoch": 0.021693491952414275,
	"grad_norm": 0.1309805911026886,
	"learning_rate": 5.419580419580419e-06,
	"loss": -0.0059,
	"step": 31
	},
	{
	"clip_ratio": 0.0013923741644248366,
	"epoch": 0.02239328201539538,
	"grad_norm": 0.1329220463881631,
	"learning_rate": 5.594405594405595e-06,
	"loss": -0.0059,
	"step": 32
	},
	{
	"clip_ratio": 0.002108451910316944,
	"completion_length": 89.85714721679688,
	"epoch": 0.02309307207837649,
	"grad_norm": 0.09378199848577215,
	"learning_rate": 5.76923076923077e-06,
	"loss": -0.0061,
	"num_tokens": 173030.0,
	"reward": -0.5714285969734192,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.5714285969734192,
	"rewards/check_winston_local_func/std": 0.82807856798172,
	"step": 33
	},
	{
	"clip_ratio": 0.001712737837806344,
	"epoch": 0.023792862141357594,
	"grad_norm": 0.08027161931844973,
	"learning_rate": 5.944055944055945e-06,
	"loss": -0.0058,
	"step": 34
	},
	{
	"clip_ratio": 0.0028315752279013395,
	"epoch": 0.0244926522043387,
	"grad_norm": 0.09430537489400144,
	"learning_rate": 6.1188811188811196e-06,
	"loss": -0.0058,
	"step": 35
	},
	{
	"clip_ratio": 0.002622501691803336,
	"epoch": 0.025192442267319804,
	"grad_norm": 0.09399084723387506,
	"learning_rate": 6.2937062937062944e-06,
	"loss": -0.0059,
	"step": 36
	},
	{
	"clip_ratio": 0.0021157327573746443,
	"completion_length": 109.67857360839844,
	"epoch": 0.02589223233030091,
	"grad_norm": 0.03251688295876031,
	"learning_rate": 6.468531468531469e-06,
	"loss": -0.0003,
	"num_tokens": 194736.0,
	"reward": -0.7500000596046448,
	"reward_std": 0.15152287483215332,
	"rewards/check_winston_local_func/mean": -0.75,
	"rewards/check_winston_local_func/std": 0.6674237847328186,
	"step": 37
	},
	{
	"clip_ratio": 0.001992279663681984,
	"epoch": 0.026592022393282014,
	"grad_norm": 0.03188152803556155,
	"learning_rate": 6.643356643356643e-06,
	"loss": -0.0002,
	"step": 38
	},
	{
	"clip_ratio": 0.001603165757842362,
	"epoch": 0.02729181245626312,
	"grad_norm": 0.03305840754053864,
	"learning_rate": 6.818181818181818e-06,
	"loss": -0.0003,
	"step": 39
	},
	{
	"clip_ratio": 0.0016526266699656844,
	"epoch": 0.02799160251924423,
	"grad_norm": 0.032953470827506756,
	"learning_rate": 6.993006993006994e-06,
	"loss": -0.0005,
	"step": 40
	},
	{
	"clip_ratio": 0.0016526016406714916,
	"completion_length": 104.71428680419922,
	"epoch": 0.028691392582225334,
	"grad_norm": 0.08991319428906154,
	"learning_rate": 7.167832167832168e-06,
	"loss": -0.0004,
	"num_tokens": 215924.0,
	"reward": -0.6428571939468384,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.6428571343421936,
	"rewards/check_winston_local_func/std": 0.7729182839393616,
	"step": 41
	},
	{
	"clip_ratio": 0.0011276095174252987,
	"epoch": 0.02939118264520644,
	"grad_norm": 0.0932105814337471,
	"learning_rate": 7.342657342657343e-06,
	"loss": -0.0005,
	"step": 42
	},
	{
	"clip_ratio": 0.0012143882922828197,
	"epoch": 0.030090972708187544,
	"grad_norm": 0.09501466400263839,
	"learning_rate": 7.517482517482517e-06,
	"loss": -0.0006,
	"step": 43
	},
	{
	"clip_ratio": 0.001439297804608941,
	"epoch": 0.03079076277116865,
	"grad_norm": 0.09410970453927742,
	"learning_rate": 7.692307692307694e-06,
	"loss": -0.0007,
	"step": 44
	},
	{
	"clip_ratio": 0.0007226300658658147,
	"completion_length": 88.4464340209961,
	"epoch": 0.031490552834149754,
	"grad_norm": 0.03381639342861229,
	"learning_rate": 7.867132867132867e-06,
	"loss": 0.0016,
	"num_tokens": 234579.0,
	"reward": -0.535714328289032,
	"reward_std": 0.05050762742757797,
	"rewards/check_winston_local_func/mean": -0.5357142686843872,
	"rewards/check_winston_local_func/std": 0.8520410656929016,
	"step": 45
	},
	{
	"clip_ratio": 0.0017245642375200987,
	"epoch": 0.03219034289713086,
	"grad_norm": 0.033458450811929934,
	"learning_rate": 8.041958041958042e-06,
	"loss": 0.0016,
	"step": 46
	},
	{
	"clip_ratio": 0.0011628264328464866,
	"epoch": 0.032890132960111965,
	"grad_norm": 0.03442645231795752,
	"learning_rate": 8.216783216783217e-06,
	"loss": 0.0016,
	"step": 47
	},
	{
	"clip_ratio": 0.0013220456894487143,
	"epoch": 0.03358992302309307,
	"grad_norm": 0.028415415852515197,
	"learning_rate": 8.391608391608393e-06,
	"loss": 0.0015,
	"step": 48
	},
	{
	"clip_ratio": 0.0034361626021564007,
	"completion_length": 91.46428680419922,
	"epoch": 0.034289713086074175,
	"grad_norm": 0.04920502199586618,
	"learning_rate": 8.566433566433566e-06,
	"loss": 0.0013,
	"num_tokens": 253761.0,
	"reward": -0.8214285969734192,
	"reward_std": 0.15152287483215332,
	"rewards/check_winston_local_func/mean": -0.8214285969734192,
	"rewards/check_winston_local_func/std": 0.5754727125167847,
	"step": 49
	},
	{
	"clip_ratio": 0.0016177111538127065,
	"epoch": 0.03498950314905528,
	"grad_norm": 0.04933398351205588,
	"learning_rate": 8.741258741258741e-06,
	"loss": 0.0013,
	"step": 50
	},
	{
	"clip_ratio": 0.0023581169079989195,
	"epoch": 0.03568929321203639,
	"grad_norm": 0.04990239817529589,
	"learning_rate": 8.916083916083918e-06,
	"loss": 0.0012,
	"step": 51
	},
	{
	"clip_ratio": 0.002693318761885166,
	"epoch": 0.0363890832750175,
	"grad_norm": 0.049258900801895884,
	"learning_rate": 9.090909090909091e-06,
	"loss": 0.0011,
	"step": 52
	},
	{
	"clip_ratio": 0.0013933092122897506,
	"completion_length": 76.5,
	"epoch": 0.0370888733379986,
	"grad_norm": 0.06783434389116598,
	"learning_rate": 9.265734265734266e-06,
	"loss": 0.0004,
	"num_tokens": 270951.0,
	"reward": -0.4285714626312256,
	"reward_std": 0.4040610194206238,
	"rewards/check_winston_local_func/mean": -0.4285714328289032,
	"rewards/check_winston_local_func/std": 0.9116845726966858,
	"step": 53
	},
	{
	"clip_ratio": 0.0012361541157588363,
	"epoch": 0.03778866340097971,
	"grad_norm": 0.06739214545276892,
	"learning_rate": 9.44055944055944e-06,
	"loss": 0.0002,
	"step": 54
	},
	{
	"clip_ratio": 0.0011503547430038452,
	"epoch": 0.03848845346396081,
	"grad_norm": 0.06968777943502087,
	"learning_rate": 9.615384615384616e-06,
	"loss": -0.0001,
	"step": 55
	},
	{
	"clip_ratio": 0.002146858721971512,
	"epoch": 0.03918824352694192,
	"grad_norm": 0.06632924367628588,
	"learning_rate": 9.79020979020979e-06,
	"loss": -0.0001,
	"step": 56
	},
	{
	"clip_ratio": 0.002248402452096343,
	"completion_length": 100.05357360839844,
	"epoch": 0.03988803358992302,
	"grad_norm": 0.03339030440662342,
	"learning_rate": 9.965034965034966e-06,
	"loss": 0.0007,
	"num_tokens": 291902.0,
	"reward": -0.4285714626312256,
	"reward_std": 0.10101525485515594,
	"rewards/check_winston_local_func/mean": -0.4285714328289032,
	"rewards/check_winston_local_func/std": 0.9116845726966858,
	"step": 57
	},
	{
	"clip_ratio": 0.002410450717434287,
	"epoch": 0.04058782365290413,
	"grad_norm": 0.03407166704900621,
	"learning_rate": 1.013986013986014e-05,
	"loss": 0.0008,
	"step": 58
	},
	{
	"clip_ratio": 0.0027997640427201986,
	"epoch": 0.041287613715885234,
	"grad_norm": 0.03397162163806953,
	"learning_rate": 1.0314685314685315e-05,
	"loss": 0.0007,
	"step": 59
	},
	{
	"clip_ratio": 0.0030621180776506662,
	"epoch": 0.04198740377886634,
	"grad_norm": 0.034258551127942245,
	"learning_rate": 1.048951048951049e-05,
	"loss": 0.0007,
	"step": 60
	},
	{
	"clip_ratio": 0.0021611705888062716,
	"completion_length": 84.55357360839844,
	"epoch": 0.042687193841847444,
	"grad_norm": 0.04382869653282982,
	"learning_rate": 1.0664335664335665e-05,
	"loss": 0.0024,
	"num_tokens": 310097.0,
	"reward": -0.7500000596046448,
	"reward_std": 0.15152287483215332,
	"rewards/check_winston_local_func/mean": -0.75,
	"rewards/check_winston_local_func/std": 0.6674237847328186,
	"step": 61
	},
	{
	"clip_ratio": 0.0010437711607664824,
	"epoch": 0.04338698390482855,
	"grad_norm": 0.04632797121542376,
	"learning_rate": 1.0839160839160838e-05,
	"loss": 0.0024,
	"step": 62
	},
	{
	"clip_ratio": 0.0013273117365315557,
	"epoch": 0.044086773967809655,
	"grad_norm": 0.046019113122256816,
	"learning_rate": 1.1013986013986015e-05,
	"loss": 0.0021,
	"step": 63
	},
	{
	"clip_ratio": 0.002344512613490224,
	"epoch": 0.04478656403079076,
	"grad_norm": 0.04434571865803126,
	"learning_rate": 1.118881118881119e-05,
	"loss": 0.0022,
	"step": 64
	},
	{
	"clip_ratio": 0.0014854084001854062,
	"completion_length": 93.58928680419922,
	"epoch": 0.04548635409377187,
	"grad_norm": 0.013792833624525774,
	"learning_rate": 1.1363636363636365e-05,
	"loss": -0.0006,
	"num_tokens": 329004.0,
	"reward": -0.8214285969734192,
	"reward_std": 0.05050762742757797,
	"rewards/check_winston_local_func/mean": -0.8214285969734192,
	"rewards/check_winston_local_func/std": 0.5754727125167847,
	"step": 65
	},
	{
	"clip_ratio": 0.002451003296300769,
	"epoch": 0.04618614415675298,
	"grad_norm": 0.01395207894798071,
	"learning_rate": 1.153846153846154e-05,
	"loss": -0.0006,
	"step": 66
	},
	{
	"clip_ratio": 0.0016351536614820361,
	"epoch": 0.04688593421973408,
	"grad_norm": 0.01316846865814371,
	"learning_rate": 1.1713286713286714e-05,
	"loss": -0.0006,
	"step": 67
	},
	{
	"clip_ratio": 0.0026426080148667097,
	"epoch": 0.04758572428271519,
	"grad_norm": 0.013837974578886334,
	"learning_rate": 1.188811188811189e-05,
	"loss": -0.0006,
	"step": 68
	},
	{
	"clip_ratio": 0.0009010470239445567,
	"completion_length": 95.62500762939453,
	"epoch": 0.04828551434569629,
	"grad_norm": 0.08072032529486632,
	"learning_rate": 1.2062937062937063e-05,
	"loss": 0.0018,
	"num_tokens": 349081.0,
	"reward": -0.7142857313156128,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.7142857313156128,
	"rewards/check_winston_local_func/std": 0.7061878442764282,
	"step": 69
	},
	{
	"clip_ratio": 0.0022199582308530807,
	"epoch": 0.0489853044086774,
	"grad_norm": 0.0818785835069056,
	"learning_rate": 1.2237762237762239e-05,
	"loss": 0.0014,
	"step": 70
	},
	{
	"clip_ratio": 0.0017290068790316582,
	"epoch": 0.0496850944716585,
	"grad_norm": 0.07902681914542756,
	"learning_rate": 1.2412587412587414e-05,
	"loss": 0.001,
	"step": 71
	},
	{
	"clip_ratio": 0.0016852362314239144,
	"epoch": 0.05038488453463961,
	"grad_norm": 0.08449847550325483,
	"learning_rate": 1.2587412587412589e-05,
	"loss": 0.0005,
	"step": 72
	},
	{
	"clip_ratio": 0.0020247853826731443,
	"completion_length": 91.78572082519531,
	"epoch": 0.05108467459762071,
	"grad_norm": 0.04909551324890117,
	"learning_rate": 1.2762237762237764e-05,
	"loss": -0.003,
	"num_tokens": 367819.0,
	"reward": -0.5,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.5,
	"rewards/check_winston_local_func/std": 0.8738628625869751,
	"step": 73
	},
	{
	"clip_ratio": 0.0034576961770653725,
	"epoch": 0.05178446466060182,
	"grad_norm": 0.050114477186976265,
	"learning_rate": 1.2937062937062939e-05,
	"loss": -0.0031,
	"step": 74
	},
	{
	"clip_ratio": 0.0019642652478069067,
	"epoch": 0.052484254723582924,
	"grad_norm": 0.05018386747388819,
	"learning_rate": 1.3111888111888112e-05,
	"loss": -0.0033,
	"step": 75
	},
	{
	"clip_ratio": 0.004161330871284008,
	"epoch": 0.05318404478656403,
	"grad_norm": 0.05121415635619661,
	"learning_rate": 1.3286713286713287e-05,
	"loss": -0.0037,
	"step": 76
	},
	{
	"clip_ratio": 0.0017616358818486333,
	"completion_length": 106.50000762939453,
	"epoch": 0.053883834849545134,
	"grad_norm": 0.05530735796526797,
	"learning_rate": 1.3461538461538462e-05,
	"loss": -0.0041,
	"num_tokens": 388977.0,
	"reward": -0.6428571939468384,
	"reward_std": 0.10101525485515594,
	"rewards/check_winston_local_func/mean": -0.6428571343421936,
	"rewards/check_winston_local_func/std": 0.7729182839393616,
	"step": 77
	},
	{
	"clip_ratio": 0.0018110191449522972,
	"epoch": 0.05458362491252624,
	"grad_norm": 0.05608373724379992,
	"learning_rate": 1.3636363636363637e-05,
	"loss": -0.0042,
	"step": 78
	},
	{
	"clip_ratio": 0.003877634182572365,
	"epoch": 0.055283414975507345,
	"grad_norm": 0.04218829661702587,
	"learning_rate": 1.381118881118881e-05,
	"loss": -0.0044,
	"step": 79
	},
	{
	"clip_ratio": 0.003191744675859809,
	"epoch": 0.05598320503848846,
	"grad_norm": 0.04293784405855666,
	"learning_rate": 1.3986013986013988e-05,
	"loss": -0.0046,
	"step": 80
	},
	{
	"clip_ratio": 0.002192563144490123,
	"completion_length": 102.51786041259766,
	"epoch": 0.05668299510146956,
	"grad_norm": 0.01621220365623996,
	"learning_rate": 1.4160839160839163e-05,
	"loss": -0.0011,
	"num_tokens": 409804.0,
	"reward": -0.8214285969734192,
	"reward_std": 0.05050762742757797,
	"rewards/check_winston_local_func/mean": -0.8214285969734192,
	"rewards/check_winston_local_func/std": 0.5754727125167847,
	"step": 81
	},
	{
	"clip_ratio": 0.0021092891693115234,
	"epoch": 0.05738278516445067,
	"grad_norm": 0.016235676972647724,
	"learning_rate": 1.4335664335664336e-05,
	"loss": -0.0012,
	"step": 82
	},
	{
	"clip_ratio": 0.0023684012703597546,
	"epoch": 0.05808257522743177,
	"grad_norm": 0.016915613552120477,
	"learning_rate": 1.4510489510489511e-05,
	"loss": -0.0011,
	"step": 83
	},
	{
	"clip_ratio": 0.002553236670792103,
	"epoch": 0.05878236529041288,
	"grad_norm": 0.016878300731711607,
	"learning_rate": 1.4685314685314686e-05,
	"loss": -0.0012,
	"step": 84
	},
	{
	"clip_ratio": 0.0023020573426038027,
	"completion_length": 95.33928680419922,
	"epoch": 0.05948215535339398,
	"grad_norm": 0.09922403654931192,
	"learning_rate": 1.486013986013986e-05,
	"loss": 0.0034,
	"num_tokens": 429265.0,
	"reward": -0.392857164144516,
	"reward_std": 0.3535533845424652,
	"rewards/check_winston_local_func/mean": -0.3928571343421936,
	"rewards/check_winston_local_func/std": 0.9279217720031738,
	"step": 85
	},
	{
	"clip_ratio": 0.002049357397481799,
	"epoch": 0.06018194541637509,
	"grad_norm": 0.0820235579491076,
	"learning_rate": 1.5034965034965034e-05,
	"loss": 0.0032,
	"step": 86
	},
	{
	"clip_ratio": 0.003993889316916466,
	"epoch": 0.06088173547935619,
	"grad_norm": 0.07572179365943402,
	"learning_rate": 1.5209790209790212e-05,
	"loss": 0.0028,
	"step": 87
	},
	{
	"clip_ratio": 0.004023912828415632,
	"epoch": 0.0615815255423373,
	"grad_norm": 0.0786883863698215,
	"learning_rate": 1.5384615384615387e-05,
	"loss": 0.0025,
	"step": 88
	},
	{
	"clip_ratio": 0.0012786961160600185,
	"completion_length": 101.01786041259766,
	"epoch": 0.0622813156053184,
	"grad_norm": 0.13944075765142377,
	"learning_rate": 1.555944055944056e-05,
	"loss": -0.0059,
	"num_tokens": 449712.0,
	"reward": -0.4642857313156128,
	"reward_std": 0.3535533845424652,
	"rewards/check_winston_local_func/mean": -0.4642857015132904,
	"rewards/check_winston_local_func/std": 0.893700897693634,
	"step": 89
	},
	{
	"clip_ratio": 0.0012525760103017092,
	"epoch": 0.06298110566829951,
	"grad_norm": 0.14046611947962784,
	"learning_rate": 1.5734265734265734e-05,
	"loss": -0.0062,
	"step": 90
	},
	{
	"clip_ratio": 0.003111109836027026,
	"epoch": 0.06368089573128062,
	"grad_norm": 0.13042169819458227,
	"learning_rate": 1.590909090909091e-05,
	"loss": -0.0079,
	"step": 91
	},
	{
	"clip_ratio": 0.013328815810382366,
	"epoch": 0.06438068579426172,
	"grad_norm": 0.0955558239371001,
	"learning_rate": 1.6083916083916083e-05,
	"loss": -0.0092,
	"step": 92
	},
	{
	"clip_ratio": 0.0021473567467182875,
	"completion_length": 80.30357360839844,
	"epoch": 0.06508047585724283,
	"grad_norm": 0.17357283543483568,
	"learning_rate": 1.625874125874126e-05,
	"loss": 0.0104,
	"num_tokens": 466947.0,
	"reward": -0.2142857313156128,
	"reward_std": 0.4040610194206238,
	"rewards/check_winston_local_func/mean": -0.2142857164144516,
	"rewards/check_winston_local_func/std": 0.9856107234954834,
	"step": 93
	},
	{
	"clip_ratio": 0.004891776479780674,
	"epoch": 0.06578026592022393,
	"grad_norm": 0.16677719867011565,
	"learning_rate": 1.6433566433566433e-05,
	"loss": 0.0089,
	"step": 94
	},
	{
	"clip_ratio": 0.008591952733695507,
	"epoch": 0.06648005598320504,
	"grad_norm": 0.15657656176787582,
	"learning_rate": 1.660839160839161e-05,
	"loss": 0.0066,
	"step": 95
	},
	{
	"clip_ratio": 0.017924649640917778,
	"epoch": 0.06717984604618614,
	"grad_norm": 0.1468099989251008,
	"learning_rate": 1.6783216783216786e-05,
	"loss": 0.004,
	"step": 96
	},
	{
	"clip_ratio": 0.0016577127389609814,
	"completion_length": 120.50000762939453,
	"epoch": 0.06787963610916725,
	"grad_norm": 0.06674060360416964,
	"learning_rate": 1.695804195804196e-05,
	"loss": -0.001,
	"num_tokens": 489863.0,
	"reward": -0.785714328289032,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.7857142686843872,
	"rewards/check_winston_local_func/std": 0.6241878271102905,
	"step": 97
	},
	{
	"clip_ratio": 0.003024409292265773,
	"epoch": 0.06857942617214835,
	"grad_norm": 0.05912491262092969,
	"learning_rate": 1.7132867132867133e-05,
	"loss": -0.0015,
	"step": 98
	},
	{
	"clip_ratio": 0.004554019309580326,
	"epoch": 0.06927921623512946,
	"grad_norm": 0.05430919883014471,
	"learning_rate": 1.730769230769231e-05,
	"loss": -0.0019,
	"step": 99
	},
	{
	"clip_ratio": 0.00875174906104803,
	"epoch": 0.06997900629811056,
	"grad_norm": 0.04764899914092442,
	"learning_rate": 1.7482517482517483e-05,
	"loss": -0.0025,
	"step": 100
	},
	{
	"clip_ratio": 0.0022144827526062727,
	"completion_length": 85.92857360839844,
	"epoch": 0.07067879636109167,
	"grad_norm": 0.09514652279620683,
	"learning_rate": 1.7657342657342656e-05,
	"loss": 0.0045,
	"num_tokens": 508189.0,
	"reward": -0.535714328289032,
	"reward_std": 0.3535534143447876,
	"rewards/check_winston_local_func/mean": -0.5357142686843872,
	"rewards/check_winston_local_func/std": 0.8520411252975464,
	"step": 101
	},
	{
	"clip_ratio": 0.004405450075864792,
	"epoch": 0.07137858642407278,
	"grad_norm": 0.08384041265838117,
	"learning_rate": 1.7832167832167836e-05,
	"loss": 0.004,
	"step": 102
	},
	{
	"clip_ratio": 0.007100887596607208,
	"epoch": 0.07207837648705388,
	"grad_norm": 0.07838152678466788,
	"learning_rate": 1.800699300699301e-05,
	"loss": 0.0031,
	"step": 103
	},
	{
	"clip_ratio": 0.012806176207959652,
	"epoch": 0.072778166550035,
	"grad_norm": 0.054761747581467624,
	"learning_rate": 1.8181818181818182e-05,
	"loss": 0.0022,
	"step": 104
	},
	{
	"clip_ratio": 0.002409872133284807,
	"completion_length": 89.16072082519531,
	"epoch": 0.0734779566130161,
	"grad_norm": 0.088338886024993,
	"learning_rate": 1.835664335664336e-05,
	"loss": -0.0024,
	"num_tokens": 526646.0,
	"reward": -0.5,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.5,
	"rewards/check_winston_local_func/std": 0.8738628625869751,
	"step": 105
	},
	{
	"clip_ratio": 0.003525706473737955,
	"epoch": 0.0741777466759972,
	"grad_norm": 0.08578242655426072,
	"learning_rate": 1.8531468531468532e-05,
	"loss": -0.0031,
	"step": 106
	},
	{
	"clip_ratio": 0.010289273224771023,
	"epoch": 0.0748775367389783,
	"grad_norm": 0.07249149477931406,
	"learning_rate": 1.8706293706293705e-05,
	"loss": -0.0043,
	"step": 107
	},
	{
	"clip_ratio": 0.027354398742318153,
	"epoch": 0.07557732680195942,
	"grad_norm": 0.059934840975274094,
	"learning_rate": 1.888111888111888e-05,
	"loss": -0.005,
	"step": 108
	},
	{
	"clip_ratio": 0.002047365065664053,
	"completion_length": 99.67857360839844,
	"epoch": 0.07627711686494051,
	"grad_norm": 0.052068804469643425,
	"learning_rate": 1.9055944055944055e-05,
	"loss": -0.0002,
	"num_tokens": 546720.0,
	"reward": -0.7142857313156128,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.7142857313156128,
	"rewards/check_winston_local_func/std": 0.7061878442764282,
	"step": 109
	},
	{
	"clip_ratio": 0.006073285825550556,
	"epoch": 0.07697690692792163,
	"grad_norm": 0.053601884682000965,
	"learning_rate": 1.923076923076923e-05,
	"loss": -0.0004,
	"step": 110
	},
	{
	"clip_ratio": 0.01850634068250656,
	"epoch": 0.07767669699090272,
	"grad_norm": 0.0262467926642497,
	"learning_rate": 1.9405594405594408e-05,
	"loss": -0.0006,
	"step": 111
	},
	{
	"clip_ratio": 0.02557740919291973,
	"epoch": 0.07837648705388384,
	"grad_norm": 0.028862292431493224,
	"learning_rate": 1.958041958041958e-05,
	"loss": -0.0007,
	"step": 112
	},
	{
	"clip_ratio": 0.0017085629515349865,
	"completion_length": 102.46428680419922,
	"epoch": 0.07907627711686493,
	"grad_norm": 0.047073786173530204,
	"learning_rate": 1.9755244755244758e-05,
	"loss": 0.0003,
	"num_tokens": 567098.0,
	"reward": -0.6428571939468384,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.6428571343421936,
	"rewards/check_winston_local_func/std": 0.7729182839393616,
	"step": 113
	},
	{
	"clip_ratio": 0.0032013251911848783,
	"epoch": 0.07977606717984605,
	"grad_norm": 0.04603497030180295,
	"learning_rate": 1.993006993006993e-05,
	"loss": 0.0,
	"step": 114
	},
	{
	"clip_ratio": 0.00821536686271429,
	"epoch": 0.08047585724282715,
	"grad_norm": 0.04227017570834447,
	"learning_rate": 2.0104895104895104e-05,
	"loss": -0.0003,
	"step": 115
	},
	{
	"clip_ratio": 0.020387563854455948,
	"epoch": 0.08117564730580826,
	"grad_norm": 0.03197828312555116,
	"learning_rate": 2.027972027972028e-05,
	"loss": -0.0007,
	"step": 116
	},
	{
	"clip_ratio": 0.002021044958382845,
	"completion_length": 81.4464340209961,
	"epoch": 0.08187543736878937,
	"grad_norm": 0.13324413388648826,
	"learning_rate": 2.0454545454545457e-05,
	"loss": 0.0049,
	"num_tokens": 584845.0,
	"reward": -0.4285714626312256,
	"reward_std": 0.4040610194206238,
	"rewards/check_winston_local_func/mean": -0.4285714328289032,
	"rewards/check_winston_local_func/std": 0.9116845726966858,
	"step": 117
	},
	{
	"clip_ratio": 0.01248109433799982,
	"epoch": 0.08257522743177047,
	"grad_norm": 0.11616979541003405,
	"learning_rate": 2.062937062937063e-05,
	"loss": 0.0033,
	"step": 118
	},
	{
	"clip_ratio": 0.03872568532824516,
	"epoch": 0.08327501749475158,
	"grad_norm": 0.08932212762248723,
	"learning_rate": 2.0804195804195807e-05,
	"loss": 0.0016,
	"step": 119
	},
	{
	"clip_ratio": 0.0671406015753746,
	"epoch": 0.08397480755773268,
	"grad_norm": 0.07829121992484567,
	"learning_rate": 2.097902097902098e-05,
	"loss": 0.0001,
	"step": 120
	},
	{
	"clip_ratio": 0.0027903958689421415,
	"completion_length": 109.67857360839844,
	"epoch": 0.08467459762071379,
	"grad_norm": 0.10038881852522909,
	"learning_rate": 2.1153846153846154e-05,
	"loss": 0.0027,
	"num_tokens": 606389.0,
	"reward": -0.1071428656578064,
	"reward_std": 0.05050762742757797,
	"rewards/check_winston_local_func/mean": -0.1071428582072258,
	"rewards/check_winston_local_func/std": 1.0032415390014648,
	"step": 121
	},
	{
	"clip_ratio": 0.010223714634776115,
	"epoch": 0.08537438768369489,
	"grad_norm": 0.08037774781050448,
	"learning_rate": 2.132867132867133e-05,
	"loss": 0.0017,
	"step": 122
	},
	{
	"clip_ratio": 0.02510545216500759,
	"epoch": 0.086074177746676,
	"grad_norm": 0.06594532056416831,
	"learning_rate": 2.1503496503496503e-05,
	"loss": 0.0009,
	"step": 123
	},
	{
	"clip_ratio": 0.04746328294277191,
	"epoch": 0.0867739678096571,
	"grad_norm": 0.0495169500454822,
	"learning_rate": 2.1678321678321677e-05,
	"loss": 0.0004,
	"step": 124
	},
	{
	"clip_ratio": 0.002552854595705867,
	"completion_length": 92.92857360839844,
	"epoch": 0.08747375787263821,
	"grad_norm": 0.07383050554477533,
	"learning_rate": 2.1853146853146857e-05,
	"loss": -0.003,
	"num_tokens": 625723.0,
	"reward": -0.392857164144516,
	"reward_std": 0.05050762742757797,
	"rewards/check_winston_local_func/mean": -0.3928571343421936,
	"rewards/check_winston_local_func/std": 0.9279217720031738,
	"step": 125
	},
	{
	"clip_ratio": 0.017842039465904236,
	"epoch": 0.08817354793561931,
	"grad_norm": 0.043649507012091936,
	"learning_rate": 2.202797202797203e-05,
	"loss": -0.0037,
	"step": 126
	},
	{
	"clip_ratio": 0.045183245092630386,
	"epoch": 0.08887333799860042,
	"grad_norm": 0.03451108201943257,
	"learning_rate": 2.2202797202797203e-05,
	"loss": -0.0039,
	"step": 127
	},
	{
	"clip_ratio": 0.0701090469956398,
	"epoch": 0.08957312806158152,
	"grad_norm": 0.023061406081443397,
	"learning_rate": 2.237762237762238e-05,
	"loss": -0.0041,
	"step": 128
	},
	{
	"clip_ratio": 0.0010595758212730289,
	"completion_length": 103.25000762939453,
	"epoch": 0.09027291812456263,
	"grad_norm": 0.15282505586734968,
	"learning_rate": 2.2552447552447553e-05,
	"loss": 0.0011,
	"num_tokens": 646391.0,
	"reward": -0.1071428656578064,
	"reward_std": 0.45456868410110474,
	"rewards/check_winston_local_func/mean": -0.1071428582072258,
	"rewards/check_winston_local_func/std": 1.0032414197921753,
	"step": 129
	},
	{
	"clip_ratio": 0.015386571176350117,
	"epoch": 0.09097270818754374,
	"grad_norm": 0.13868014978577842,
	"learning_rate": 2.272727272727273e-05,
	"loss": -0.0008,
	"step": 130
	},
	{
	"clip_ratio": 0.025238754227757454,
	"epoch": 0.09167249825052484,
	"grad_norm": 0.12901702065673692,
	"learning_rate": 2.2902097902097902e-05,
	"loss": -0.0033,
	"step": 131
	},
	{
	"clip_ratio": 0.026391755789518356,
	"epoch": 0.09237228831350595,
	"grad_norm": 0.09971213845352783,
	"learning_rate": 2.307692307692308e-05,
	"loss": -0.006,
	"step": 132
	},
	{
	"clip_ratio": 0.0035059447400271893,
	"completion_length": 98.33928680419922,
	"epoch": 0.09307207837648705,
	"grad_norm": 0.11128060046799104,
	"learning_rate": 2.3251748251748252e-05,
	"loss": -0.0018,
	"num_tokens": 666114.0,
	"reward": -0.3571428656578064,
	"reward_std": 0.30304574966430664,
	"rewards/check_winston_local_func/mean": -0.3571428656578064,
	"rewards/check_winston_local_func/std": 0.9425028562545776,
	"step": 133
	},
	{
	"clip_ratio": 0.0042843748815357685,
	"epoch": 0.09377186843946816,
	"grad_norm": 0.1065458455684171,
	"learning_rate": 2.342657342657343e-05,
	"loss": -0.0028,
	"step": 134
	},
	{
	"clip_ratio": 0.01338073518127203,
	"epoch": 0.09447165850244926,
	"grad_norm": 0.07422771205194853,
	"learning_rate": 2.3601398601398602e-05,
	"loss": -0.0042,
	"step": 135
	},
	{
	"clip_ratio": 0.02260064147412777,
	"epoch": 0.09517144856543037,
	"grad_norm": 0.05031624319039464,
	"learning_rate": 2.377622377622378e-05,
	"loss": -0.0051,
	"step": 136
	},
	{
	"clip_ratio": 0.0022608404979109764,
	"completion_length": 87.00000762939453,
	"epoch": 0.09587123862841147,
	"grad_norm": 0.17254445638790264,
	"learning_rate": 2.3951048951048952e-05,
	"loss": -0.0094,
	"num_tokens": 684454.0,
	"reward": -0.0357142873108387,
	"reward_std": 0.3535533845424652,
	"rewards/check_winston_local_func/mean": -0.0357142873108387,
	"rewards/check_winston_local_func/std": 1.0084062814712524,
	"step": 137
	},
	{
	"clip_ratio": 0.011511722579598427,
	"epoch": 0.09657102869139259,
	"grad_norm": 0.146693566733721,
	"learning_rate": 2.4125874125874125e-05,
	"loss": -0.0129,
	"step": 138
	},
	{
	"clip_ratio": 0.0310056172311306,
	"epoch": 0.09727081875437368,
	"grad_norm": 0.1269304320317103,
	"learning_rate": 2.43006993006993e-05,
	"loss": -0.0159,
	"step": 139
	},
	{
	"clip_ratio": 0.04163637384772301,
	"epoch": 0.0979706088173548,
	"grad_norm": 0.09818573191869126,
	"learning_rate": 2.4475524475524478e-05,
	"loss": -0.0187,
	"step": 140
	},
	{
	"clip_ratio": 0.0025228250306099653,
	"completion_length": 104.9464340209961,
	"epoch": 0.0986703988803359,
	"grad_norm": 0.1003012262187849,
	"learning_rate": 2.465034965034965e-05,
	"loss": -0.0031,
	"num_tokens": 705439.0,
	"reward": -0.1428571492433548,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.1428571492433548,
	"rewards/check_winston_local_func/std": 0.9987004995346069,
	"step": 141
	},
	{
	"clip_ratio": 0.012786303646862507,
	"epoch": 0.099370188943317,
	"grad_norm": 0.08364392907940049,
	"learning_rate": 2.4825174825174828e-05,
	"loss": -0.0043,
	"step": 142
	},
	{
	"clip_ratio": 0.04185020551085472,
	"epoch": 0.1000699790062981,
	"grad_norm": 0.055411268177771554,
	"learning_rate": 2.5e-05,
	"loss": -0.0052,
	"step": 143
	},
	{
	"clip_ratio": 0.06534933298826218,
	"epoch": 0.10076976906927922,
	"grad_norm": 0.04043680317744743,
	"learning_rate": 2.5174825174825178e-05,
	"loss": -0.0057,
	"step": 144
	},
	{
	"clip_ratio": 0.00163670489564538,
	"completion_length": 79.0,
	"epoch": 0.10146955913226033,
	"grad_norm": 0.26037723002756274,
	"learning_rate": 2.534965034965035e-05,
	"loss": -0.0013,
	"num_tokens": 722363.0,
	"reward": -0.2857142984867096,
	"reward_std": 0.4040610194206238,
	"rewards/check_winston_local_func/mean": -0.2857142984867096,
	"rewards/check_winston_local_func/std": 0.9669875502586365,
	"step": 145
	},
	{
	"clip_ratio": 0.010257317684590816,
	"epoch": 0.10216934919524143,
	"grad_norm": 0.21925125532352843,
	"learning_rate": 2.5524475524475528e-05,
	"loss": -0.0078,
	"step": 146
	},
	{
	"clip_ratio": 0.03979513794183731,
	"epoch": 0.10286913925822254,
	"grad_norm": 0.1493741677240076,
	"learning_rate": 2.5699300699300697e-05,
	"loss": -0.014,
	"step": 147
	},
	{
	"clip_ratio": 0.06495730578899384,
	"epoch": 0.10356892932120364,
	"grad_norm": 0.09721255929993584,
	"learning_rate": 2.5874125874125877e-05,
	"loss": -0.0176,
	"step": 148
	},
	{
	"clip_ratio": 0.003729403717443347,
	"completion_length": 98.60714721679688,
	"epoch": 0.10426871938418475,
	"grad_norm": 0.21024181860460278,
	"learning_rate": 2.6048951048951047e-05,
	"loss": -0.0029,
	"num_tokens": 742543.0,
	"reward": -0.3214285969734192,
	"reward_std": 0.25253814458847046,
	"rewards/check_winston_local_func/mean": -0.3214285671710968,
	"rewards/check_winston_local_func/std": 0.955503523349762,
	"step": 149
	},
	{
	"clip_ratio": 0.011744500137865543,
	"epoch": 0.10496850944716585,
	"grad_norm": 0.16355956559118884,
	"learning_rate": 2.6223776223776224e-05,
	"loss": -0.0069,
	"step": 150
	},
	{
	"clip_ratio": 0.030442187562584877,
	"epoch": 0.10566829951014696,
	"grad_norm": 0.1182182136437938,
	"learning_rate": 2.6398601398601404e-05,
	"loss": -0.0104,
	"step": 151
	},
	{
	"clip_ratio": 0.04920857399702072,
	"epoch": 0.10636808957312806,
	"grad_norm": 0.09156116791582807,
	"learning_rate": 2.6573426573426574e-05,
	"loss": -0.013,
	"step": 152
	},
	{
	"clip_ratio": 0.0027830980252474546,
	"completion_length": 101.71428680419922,
	"epoch": 0.10706787963610917,
	"grad_norm": 0.07071809306988276,
	"learning_rate": 2.674825174825175e-05,
	"loss": 0.0011,
	"num_tokens": 762373.0,
	"reward": -0.4285714626312256,
	"reward_std": 0.10101525485515594,
	"rewards/check_winston_local_func/mean": -0.4285714328289032,
	"rewards/check_winston_local_func/std": 0.9116845726966858,
	"step": 153
	},
	{
	"clip_ratio": 0.004099779762327671,
	"epoch": 0.10776766969909027,
	"grad_norm": 0.07036975743996918,
	"learning_rate": 2.6923076923076923e-05,
	"loss": 0.0006,
	"step": 154
	},
	{
	"clip_ratio": 0.011427856050431728,
	"epoch": 0.10846745976207138,
	"grad_norm": 0.060173537559690966,
	"learning_rate": 2.70979020979021e-05,
	"loss": -0.0002,
	"step": 155
	},
	{
	"clip_ratio": 0.02241707034409046,
	"epoch": 0.10916724982505248,
	"grad_norm": 0.04464993792503255,
	"learning_rate": 2.7272727272727273e-05,
	"loss": -0.0007,
	"step": 156
	},
	{
	"clip_ratio": 0.003849891945719719,
	"completion_length": 126.60714721679688,
	"epoch": 0.10986703988803359,
	"grad_norm": 0.14247799791087257,
	"learning_rate": 2.744755244755245e-05,
	"loss": 0.003,
	"num_tokens": 785161.0,
	"reward": -0.25,
	"reward_std": 0.3535533845424652,
	"rewards/check_winston_local_func/mean": -0.25,
	"rewards/check_winston_local_func/std": 0.9770084023475647,
	"step": 157
	},
	{
	"clip_ratio": 0.00803058035671711,
	"epoch": 0.11056682995101469,
	"grad_norm": 0.13081890796714626,
	"learning_rate": 2.762237762237762e-05,
	"loss": 0.0006,
	"step": 158
	},
	{
	"clip_ratio": 0.02222571335732937,
	"epoch": 0.1112666200139958,
	"grad_norm": 0.10130551565904075,
	"learning_rate": 2.77972027972028e-05,
	"loss": -0.0021,
	"step": 159
	},
	{
	"clip_ratio": 0.03389605134725571,
	"epoch": 0.11196641007697691,
	"grad_norm": 0.0756074031523243,
	"learning_rate": 2.7972027972027976e-05,
	"loss": -0.0047,
	"step": 160
	},
	{
	"clip_ratio": 0.003950103186070919,
	"completion_length": 104.12500762939453,
	"epoch": 0.11266620013995801,
	"grad_norm": 0.1290577443710622,
	"learning_rate": 2.8146853146853146e-05,
	"loss": 0.002,
	"num_tokens": 805162.0,
	"reward": -0.3571428656578064,
	"reward_std": 0.2020305097103119,
	"rewards/check_winston_local_func/mean": -0.3571428656578064,
	"rewards/check_winston_local_func/std": 0.9425028562545776,
	"step": 161
	},
	{
	"clip_ratio": 0.007646625861525536,
	"epoch": 0.11336599020293912,
	"grad_norm": 0.11025990408745222,
	"learning_rate": 2.8321678321678326e-05,
	"loss": 0.0002,
	"step": 162
	},
	{
	"clip_ratio": 0.01921841874718666,
	"epoch": 0.11406578026592022,
	"grad_norm": 0.07245932578181155,
	"learning_rate": 2.8496503496503496e-05,
	"loss": -0.0017,
	"step": 163
	},
	{
	"clip_ratio": 0.03461840748786926,
	"epoch": 0.11476557032890133,
	"grad_norm": 0.051788726865233656,
	"learning_rate": 2.8671328671328672e-05,
	"loss": -0.0028,
	"step": 164
	},
	{
	"clip_ratio": 0.002369140973314643,
	"completion_length": 98.73214721679688,
	"epoch": 0.11546536039188243,
	"grad_norm": 0.1842865637323427,
	"learning_rate": 2.8846153846153845e-05,
	"loss": 0.0051,
	"num_tokens": 825007.0,
	"reward": -0.0714285746216774,
	"reward_std": 0.4040610194206238,
	"rewards/check_winston_local_func/mean": -0.0714285746216774,
	"rewards/check_winston_local_func/std": 1.0064724683761597,
	"step": 165
	},
	{
	"clip_ratio": 0.012421431951224804,
	"epoch": 0.11616515045486354,
	"grad_norm": 0.15047989090077732,
	"learning_rate": 2.9020979020979022e-05,
	"loss": 0.0018,
	"step": 166
	},
	{
	"clip_ratio": 0.027025196701288223,
	"epoch": 0.11686494051784464,
	"grad_norm": 0.09721729056776199,
	"learning_rate": 2.91958041958042e-05,
	"loss": -0.0012,
	"step": 167
	},
	{
	"clip_ratio": 0.04289395734667778,
	"epoch": 0.11756473058082575,
	"grad_norm": 0.06975068028842074,
	"learning_rate": 2.9370629370629372e-05,
	"loss": -0.0033,
	"step": 168
	},
	{
	"clip_ratio": 0.0018040953436866403,
	"completion_length": 105.3214340209961,
	"epoch": 0.11826452064380685,
	"grad_norm": 0.2677758748707387,
	"learning_rate": 2.954545454545455e-05,
	"loss": -0.0015,
	"num_tokens": 845663.0,
	"reward": -0.0714285746216774,
	"reward_std": 0.4040609896183014,
	"rewards/check_winston_local_func/mean": -0.0714285746216774,
	"rewards/check_winston_local_func/std": 1.0064724683761597,
	"step": 169
	},
	{
	"clip_ratio": 0.01557006873190403,
	"epoch": 0.11896431070678797,
	"grad_norm": 0.2023749080142164,
	"learning_rate": 2.972027972027972e-05,
	"loss": -0.0082,
	"step": 170
	},
	{
	"clip_ratio": 0.04334796220064163,
	"epoch": 0.11966410076976906,
	"grad_norm": 0.12184054679561386,
	"learning_rate": 2.9895104895104898e-05,
	"loss": -0.0128,
	"step": 171
	},
	{
	"clip_ratio": 0.06698625534772873,
	"epoch": 0.12036389083275018,
	"grad_norm": 0.08481462032815572,
	"learning_rate": 3.0069930069930068e-05,
	"loss": -0.0152,
	"step": 172
	},
	{
	"clip_ratio": 0.002561988076195121,
	"completion_length": 106.25000762939453,
	"epoch": 0.12106368089573127,
	"grad_norm": 0.3222269362637656,
	"learning_rate": 3.0244755244755245e-05,
	"loss": -0.0225,
	"num_tokens": 866213.0,
	"reward": -0.1785714328289032,
	"reward_std": 0.45456865429878235,
	"rewards/check_winston_local_func/mean": -0.1785714328289032,
	"rewards/check_winston_local_func/std": 0.9928314685821533,
	"step": 173
	},
	{
	"clip_ratio": 0.027860935777425766,
	"epoch": 0.12176347095871239,
	"grad_norm": 0.23558262189831372,
	"learning_rate": 3.0419580419580425e-05,
	"loss": -0.0308,
	"step": 174
	},
	{
	"clip_ratio": 0.0620122067630291,
	"epoch": 0.1224632610216935,
	"grad_norm": 0.16018574686601328,
	"learning_rate": 3.0594405594405594e-05,
	"loss": -0.0363,
	"step": 175
	},
	{
	"clip_ratio": 0.0841975286602974,
	"epoch": 0.1231630510846746,
	"grad_norm": 0.11961313174632249,
	"learning_rate": 3.0769230769230774e-05,
	"loss": -0.0402,
	"step": 176
	},
	{
	"clip_ratio": 0.0038492009043693542,
	"completion_length": 112.05357360839844,
	"epoch": 0.12386284114765571,
	"grad_norm": 0.16841580805802311,
	"learning_rate": 3.094405594405594e-05,
	"loss": 0.0144,
	"num_tokens": 887380.0,
	"reward": -0.3571428656578064,
	"reward_std": 0.30304574966430664,
	"rewards/check_winston_local_func/mean": -0.3571428656578064,
	"rewards/check_winston_local_func/std": 0.9425028562545776,
	"step": 177
	},
	{
	"clip_ratio": 0.0077889300882816315,
	"epoch": 0.1245626312106368,
	"grad_norm": 0.14772994247321478,
	"learning_rate": 3.111888111888112e-05,
	"loss": 0.0116,
	"step": 178
	},
	{
	"clip_ratio": 0.024449503049254417,
	"epoch": 0.12526242127361792,
	"grad_norm": 0.11835892505404014,
	"learning_rate": 3.1293706293706294e-05,
	"loss": 0.0085,
	"step": 179
	},
	{
	"clip_ratio": 0.04173960164189339,
	"epoch": 0.12596221133659902,
	"grad_norm": 0.07150863048317996,
	"learning_rate": 3.146853146853147e-05,
	"loss": 0.0065,
	"step": 180
	},
	{
	"clip_ratio": 0.0018401921261101961,
	"completion_length": 130.8928680419922,
	"epoch": 0.12666200139958012,
	"grad_norm": 0.19977261410354588,
	"learning_rate": 3.164335664335665e-05,
	"loss": 0.0079,
	"num_tokens": 910956.0,
	"reward": -0.2857142984867096,
	"reward_std": 0.4040610194206238,
	"rewards/check_winston_local_func/mean": -0.2857142984867096,
	"rewards/check_winston_local_func/std": 0.9669875502586365,
	"step": 181
	},
	{
	"clip_ratio": 0.013327782042324543,
	"epoch": 0.12736179146256124,
	"grad_norm": 0.18304114191410556,
	"learning_rate": 3.181818181818182e-05,
	"loss": 0.0037,
	"step": 182
	},
	{
	"clip_ratio": 0.035625942051410675,
	"epoch": 0.12806158152554234,
	"grad_norm": 0.13774519057774026,
	"learning_rate": 3.1993006993006994e-05,
	"loss": -0.0011,
	"step": 183
	},
	{
	"clip_ratio": 0.055436424911022186,
	"epoch": 0.12876137158852344,
	"grad_norm": 0.1037769963224376,
	"learning_rate": 3.216783216783217e-05,
	"loss": -0.005,
	"step": 184
	},
	{
	"clip_ratio": 0.003232809714972973,
	"completion_length": 115.85714721679688,
	"epoch": 0.12946116165150454,
	"grad_norm": 0.22848421400085006,
	"learning_rate": 3.234265734265735e-05,
	"loss": -0.002,
	"num_tokens": 932532.0,
	"reward": -0.0714285746216774,
	"reward_std": 0.4040610194206238,
	"rewards/check_winston_local_func/mean": -0.0714285746216774,
	"rewards/check_winston_local_func/std": 1.0064724683761597,
	"step": 185
	},
	{
	"clip_ratio": 0.013981361873447895,
	"epoch": 0.13016095171448566,
	"grad_norm": 0.14544684282491208,
	"learning_rate": 3.251748251748252e-05,
	"loss": -0.006,
	"step": 186
	},
	{
	"clip_ratio": 0.032285287976264954,
	"epoch": 0.13086074177746676,
	"grad_norm": 0.11489336548569441,
	"learning_rate": 3.269230769230769e-05,
	"loss": -0.0092,
	"step": 187
	},
	{
	"clip_ratio": 0.05701437592506409,
	"epoch": 0.13156053184044786,
	"grad_norm": 0.08462838304844368,
	"learning_rate": 3.2867132867132866e-05,
	"loss": -0.0115,
	"step": 188
	},
	{
	"clip_ratio": 0.0032829714473336935,
	"completion_length": 81.66072082519531,
	"epoch": 0.13226032190342898,
	"grad_norm": 0.33315610129233025,
	"learning_rate": 3.3041958041958046e-05,
	"loss": -0.0109,
	"num_tokens": 950027.0,
	"reward": 0.1428571492433548,
	"reward_std": 0.5050762891769409,
	"rewards/check_winston_local_func/mean": 0.1428571492433548,
	"rewards/check_winston_local_func/std": 0.9987004399299622,
	"step": 189
	},
	{
	"clip_ratio": 0.02284400910139084,
	"epoch": 0.13296011196641008,
	"grad_norm": 0.25411099199841936,
	"learning_rate": 3.321678321678322e-05,
	"loss": -0.0211,
	"step": 190
	},
	{
	"clip_ratio": 0.06905340403318405,
	"epoch": 0.13365990202939118,
	"grad_norm": 0.17966938259048712,
	"learning_rate": 3.339160839160839e-05,
	"loss": -0.0292,
	"step": 191
	},
	{
	"clip_ratio": 0.10486488789319992,
	"epoch": 0.13435969209237228,
	"grad_norm": 0.12395870104426593,
	"learning_rate": 3.356643356643357e-05,
	"loss": -0.0345,
	"step": 192
	},
	{
	"clip_ratio": 0.0039912075735628605,
	"completion_length": 98.53572082519531,
	"epoch": 0.1350594821553534,
	"grad_norm": 0.19967198347280601,
	"learning_rate": 3.374125874125874e-05,
	"loss": -0.0034,
	"num_tokens": 969635.0,
	"reward": -0.2142857313156128,
	"reward_std": 0.4040610194206238,
	"rewards/check_winston_local_func/mean": -0.2142857164144516,
	"rewards/check_winston_local_func/std": 0.9856107234954834,
	"step": 193
	},
	{
	"clip_ratio": 0.015699883922934532,
	"epoch": 0.1357592722183345,
	"grad_norm": 0.14403516372623493,
	"learning_rate": 3.391608391608392e-05,
	"loss": -0.0078,
	"step": 194
	},
	{
	"clip_ratio": 0.0434185229241848,
	"epoch": 0.1364590622813156,
	"grad_norm": 0.09019420685306537,
	"learning_rate": 3.409090909090909e-05,
	"loss": -0.0106,
	"step": 195
	},
	{
	"clip_ratio": 0.06262689083814621,
	"epoch": 0.1371588523442967,
	"grad_norm": 0.06525358070480064,
	"learning_rate": 3.4265734265734265e-05,
	"loss": -0.012,
	"step": 196
	},
	{
	"clip_ratio": 0.0026181330904364586,
	"completion_length": 112.64286041259766,
	"epoch": 0.13785864240727783,
	"grad_norm": 0.26666733225679445,
	"learning_rate": 3.4440559440559445e-05,
	"loss": -0.0005,
	"num_tokens": 990877.0,
	"reward": -0.0357142873108387,
	"reward_std": 0.45456865429878235,
	"rewards/check_winston_local_func/mean": -0.0357142873108387,
	"rewards/check_winston_local_func/std": 1.0084062814712524,
	"step": 197
	},
	{
	"clip_ratio": 0.02237066999077797,
	"epoch": 0.13855843247025892,
	"grad_norm": 0.196293468974288,
	"learning_rate": 3.461538461538462e-05,
	"loss": -0.0081,
	"step": 198
	},
	{
	"clip_ratio": 0.05692709609866142,
	"epoch": 0.13925822253324002,
	"grad_norm": 0.12274152758367594,
	"learning_rate": 3.479020979020979e-05,
	"loss": -0.0128,
	"step": 199
	},
	{
	"clip_ratio": 0.07971518486738205,
	"epoch": 0.13995801259622112,
	"grad_norm": 0.08223267167561776,
	"learning_rate": 3.4965034965034965e-05,
	"loss": -0.0154,
	"step": 200
	}
	],
	"logging_steps": 1,
	"max_steps": 5716,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 4,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}