| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.13995801259622112, |
| "eval_steps": 500, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completion_length": 92.1964340209961, |
| "epoch": 0.0006997900629811056, |
| "grad_norm": 0.04008340386412661, |
| "learning_rate": 1.7482517482517484e-07, |
| "loss": -0.0004, |
| "num_tokens": 19229.0, |
| "reward": -0.3571428656578064, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.3571428656578064, |
| "rewards/check_winston_local_func/std": 0.9425028562545776, |
| "step": 1 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "epoch": 0.0013995801259622112, |
| "grad_norm": 0.04044301640894262, |
| "learning_rate": 3.496503496503497e-07, |
| "loss": -0.0004, |
| "step": 2 |
| }, |
| { |
| "clip_ratio": 0.0026809382252395153, |
| "epoch": 0.002099370188943317, |
| "grad_norm": 0.03858326410821281, |
| "learning_rate": 5.244755244755246e-07, |
| "loss": -0.0004, |
| "step": 3 |
| }, |
| { |
| "clip_ratio": 0.002734784735366702, |
| "epoch": 0.0027991602519244225, |
| "grad_norm": 0.03954530218881001, |
| "learning_rate": 6.993006993006994e-07, |
| "loss": -0.0004, |
| "step": 4 |
| }, |
| { |
| "clip_ratio": 0.003463542787358165, |
| "completion_length": 100.78572082519531, |
| "epoch": 0.0034989503149055285, |
| "grad_norm": 0.03131731501094277, |
| "learning_rate": 8.741258741258743e-07, |
| "loss": -0.0015, |
| "num_tokens": 39549.0, |
| "reward": -0.392857164144516, |
| "reward_std": 0.15152287483215332, |
| "rewards/check_winston_local_func/mean": -0.3928571343421936, |
| "rewards/check_winston_local_func/std": 0.9279217720031738, |
| "step": 5 |
| }, |
| { |
| "clip_ratio": 0.001506845816038549, |
| "epoch": 0.004198740377886634, |
| "grad_norm": 0.031304121161173655, |
| "learning_rate": 1.0489510489510491e-06, |
| "loss": -0.0013, |
| "step": 6 |
| }, |
| { |
| "clip_ratio": 0.0035776374861598015, |
| "epoch": 0.00489853044086774, |
| "grad_norm": 0.0317402782217819, |
| "learning_rate": 1.2237762237762238e-06, |
| "loss": -0.0014, |
| "step": 7 |
| }, |
| { |
| "clip_ratio": 0.0017960710683837533, |
| "epoch": 0.005598320503848845, |
| "grad_norm": 0.03238973734129298, |
| "learning_rate": 1.3986013986013987e-06, |
| "loss": -0.0014, |
| "step": 8 |
| }, |
| { |
| "clip_ratio": 0.0011830016737803817, |
| "completion_length": 71.98214721679688, |
| "epoch": 0.006298110566829951, |
| "grad_norm": 0.07179208315805877, |
| "learning_rate": 1.5734265734265736e-06, |
| "loss": 0.0047, |
| "num_tokens": 56096.0, |
| "reward": -0.5714285969734192, |
| "reward_std": 0.30304574966430664, |
| "rewards/check_winston_local_func/mean": -0.5714285969734192, |
| "rewards/check_winston_local_func/std": 0.82807856798172, |
| "step": 9 |
| }, |
| { |
| "clip_ratio": 0.0014540323754772544, |
| "epoch": 0.006997900629811057, |
| "grad_norm": 0.07210672978675704, |
| "learning_rate": 1.7482517482517485e-06, |
| "loss": 0.0043, |
| "step": 10 |
| }, |
| { |
| "clip_ratio": 0.0009645656682550907, |
| "epoch": 0.007697690692792162, |
| "grad_norm": 0.07154328122740576, |
| "learning_rate": 1.9230769230769234e-06, |
| "loss": 0.0048, |
| "step": 11 |
| }, |
| { |
| "clip_ratio": 0.0012382904533296824, |
| "epoch": 0.008397480755773267, |
| "grad_norm": 0.07206324717584058, |
| "learning_rate": 2.0979020979020983e-06, |
| "loss": 0.0045, |
| "step": 12 |
| }, |
| { |
| "clip_ratio": 0.0024568967055529356, |
| "completion_length": 99.33928680419922, |
| "epoch": 0.009097270818754374, |
| "grad_norm": 0.10630346643398497, |
| "learning_rate": 2.2727272727272728e-06, |
| "loss": 0.0041, |
| "num_tokens": 76111.0, |
| "reward": -0.5, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.5, |
| "rewards/check_winston_local_func/std": 0.8738628625869751, |
| "step": 13 |
| }, |
| { |
| "clip_ratio": 0.0018984745256602764, |
| "epoch": 0.00979706088173548, |
| "grad_norm": 0.10601918892837382, |
| "learning_rate": 2.4475524475524477e-06, |
| "loss": 0.0037, |
| "step": 14 |
| }, |
| { |
| "clip_ratio": 0.0020660855807363987, |
| "epoch": 0.010496850944716585, |
| "grad_norm": 0.10309105028697467, |
| "learning_rate": 2.6223776223776225e-06, |
| "loss": 0.0038, |
| "step": 15 |
| }, |
| { |
| "clip_ratio": 0.002301447093486786, |
| "epoch": 0.01119664100769769, |
| "grad_norm": 0.10289876702541416, |
| "learning_rate": 2.7972027972027974e-06, |
| "loss": 0.0038, |
| "step": 16 |
| }, |
| { |
| "clip_ratio": 0.002995749469846487, |
| "completion_length": 104.10714721679688, |
| "epoch": 0.011896431070678797, |
| "grad_norm": 0.02394212165465065, |
| "learning_rate": 2.9720279720279723e-06, |
| "loss": -0.0016, |
| "num_tokens": 97045.0, |
| "reward": -0.5714285969734192, |
| "reward_std": 0.10101525485515594, |
| "rewards/check_winston_local_func/mean": -0.5714285969734192, |
| "rewards/check_winston_local_func/std": 0.82807856798172, |
| "step": 17 |
| }, |
| { |
| "clip_ratio": 0.0021103813778609037, |
| "epoch": 0.012596221133659902, |
| "grad_norm": 0.023576991661382562, |
| "learning_rate": 3.1468531468531472e-06, |
| "loss": -0.0016, |
| "step": 18 |
| }, |
| { |
| "clip_ratio": 0.002064172876998782, |
| "epoch": 0.013296011196641007, |
| "grad_norm": 0.023272394799082628, |
| "learning_rate": 3.3216783216783217e-06, |
| "loss": -0.0017, |
| "step": 19 |
| }, |
| { |
| "clip_ratio": 0.0035561085678637028, |
| "epoch": 0.013995801259622114, |
| "grad_norm": 0.023832453115006213, |
| "learning_rate": 3.496503496503497e-06, |
| "loss": -0.0016, |
| "step": 20 |
| }, |
| { |
| "clip_ratio": 0.0015116453869268298, |
| "completion_length": 101.58928680419922, |
| "epoch": 0.01469559132260322, |
| "grad_norm": 0.11810094542561876, |
| "learning_rate": 3.6713286713286715e-06, |
| "loss": 0.0183, |
| "num_tokens": 117256.0, |
| "reward": -0.4642857313156128, |
| "reward_std": 0.45456865429878235, |
| "rewards/check_winston_local_func/mean": -0.4642857015132904, |
| "rewards/check_winston_local_func/std": 0.893700897693634, |
| "step": 21 |
| }, |
| { |
| "clip_ratio": 0.0015069997170940042, |
| "epoch": 0.015395381385584325, |
| "grad_norm": 0.11627287333325324, |
| "learning_rate": 3.846153846153847e-06, |
| "loss": 0.0183, |
| "step": 22 |
| }, |
| { |
| "clip_ratio": 0.0024017037358134985, |
| "epoch": 0.01609517144856543, |
| "grad_norm": 0.11747415252739393, |
| "learning_rate": 4.020979020979021e-06, |
| "loss": 0.0185, |
| "step": 23 |
| }, |
| { |
| "clip_ratio": 0.0012106437934562564, |
| "epoch": 0.016794961511546535, |
| "grad_norm": 0.11886525001745403, |
| "learning_rate": 4.195804195804197e-06, |
| "loss": 0.0187, |
| "step": 24 |
| }, |
| { |
| "clip_ratio": 0.00203885231167078, |
| "completion_length": 105.55357360839844, |
| "epoch": 0.01749475157452764, |
| "grad_norm": 0.054683142805963834, |
| "learning_rate": 4.370629370629371e-06, |
| "loss": 0.0025, |
| "num_tokens": 138225.0, |
| "reward": -0.785714328289032, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.7857142686843872, |
| "rewards/check_winston_local_func/std": 0.6241878271102905, |
| "step": 25 |
| }, |
| { |
| "clip_ratio": 0.0020559704862535, |
| "epoch": 0.01819454163750875, |
| "grad_norm": 0.05445975538463714, |
| "learning_rate": 4.5454545454545455e-06, |
| "loss": 0.0023, |
| "step": 26 |
| }, |
| { |
| "clip_ratio": 0.0019049489637836814, |
| "epoch": 0.018894331700489854, |
| "grad_norm": 0.0535587329596401, |
| "learning_rate": 4.72027972027972e-06, |
| "loss": 0.0024, |
| "step": 27 |
| }, |
| { |
| "clip_ratio": 0.002319059334695339, |
| "epoch": 0.01959412176347096, |
| "grad_norm": 0.0517636030603052, |
| "learning_rate": 4.895104895104895e-06, |
| "loss": 0.0024, |
| "step": 28 |
| }, |
| { |
| "clip_ratio": 0.000747890502680093, |
| "completion_length": 64.1964340209961, |
| "epoch": 0.020293911826452064, |
| "grad_norm": 0.13000888889789194, |
| "learning_rate": 5.06993006993007e-06, |
| "loss": -0.0059, |
| "num_tokens": 154256.0, |
| "reward": -0.25, |
| "reward_std": 0.25253814458847046, |
| "rewards/check_winston_local_func/mean": -0.25, |
| "rewards/check_winston_local_func/std": 0.9770084023475647, |
| "step": 29 |
| }, |
| { |
| "clip_ratio": 0.0015584274660795927, |
| "epoch": 0.02099370188943317, |
| "grad_norm": 0.13318739478301467, |
| "learning_rate": 5.244755244755245e-06, |
| "loss": -0.0058, |
| "step": 30 |
| }, |
| { |
| "clip_ratio": 0.001018411829136312, |
| "epoch": 0.021693491952414275, |
| "grad_norm": 0.1309805911026886, |
| "learning_rate": 5.419580419580419e-06, |
| "loss": -0.0059, |
| "step": 31 |
| }, |
| { |
| "clip_ratio": 0.0013923741644248366, |
| "epoch": 0.02239328201539538, |
| "grad_norm": 0.1329220463881631, |
| "learning_rate": 5.594405594405595e-06, |
| "loss": -0.0059, |
| "step": 32 |
| }, |
| { |
| "clip_ratio": 0.002108451910316944, |
| "completion_length": 89.85714721679688, |
| "epoch": 0.02309307207837649, |
| "grad_norm": 0.09378199848577215, |
| "learning_rate": 5.76923076923077e-06, |
| "loss": -0.0061, |
| "num_tokens": 173030.0, |
| "reward": -0.5714285969734192, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.5714285969734192, |
| "rewards/check_winston_local_func/std": 0.82807856798172, |
| "step": 33 |
| }, |
| { |
| "clip_ratio": 0.001712737837806344, |
| "epoch": 0.023792862141357594, |
| "grad_norm": 0.08027161931844973, |
| "learning_rate": 5.944055944055945e-06, |
| "loss": -0.0058, |
| "step": 34 |
| }, |
| { |
| "clip_ratio": 0.0028315752279013395, |
| "epoch": 0.0244926522043387, |
| "grad_norm": 0.09430537489400144, |
| "learning_rate": 6.1188811188811196e-06, |
| "loss": -0.0058, |
| "step": 35 |
| }, |
| { |
| "clip_ratio": 0.002622501691803336, |
| "epoch": 0.025192442267319804, |
| "grad_norm": 0.09399084723387506, |
| "learning_rate": 6.2937062937062944e-06, |
| "loss": -0.0059, |
| "step": 36 |
| }, |
| { |
| "clip_ratio": 0.0021157327573746443, |
| "completion_length": 109.67857360839844, |
| "epoch": 0.02589223233030091, |
| "grad_norm": 0.03251688295876031, |
| "learning_rate": 6.468531468531469e-06, |
| "loss": -0.0003, |
| "num_tokens": 194736.0, |
| "reward": -0.7500000596046448, |
| "reward_std": 0.15152287483215332, |
| "rewards/check_winston_local_func/mean": -0.75, |
| "rewards/check_winston_local_func/std": 0.6674237847328186, |
| "step": 37 |
| }, |
| { |
| "clip_ratio": 0.001992279663681984, |
| "epoch": 0.026592022393282014, |
| "grad_norm": 0.03188152803556155, |
| "learning_rate": 6.643356643356643e-06, |
| "loss": -0.0002, |
| "step": 38 |
| }, |
| { |
| "clip_ratio": 0.001603165757842362, |
| "epoch": 0.02729181245626312, |
| "grad_norm": 0.03305840754053864, |
| "learning_rate": 6.818181818181818e-06, |
| "loss": -0.0003, |
| "step": 39 |
| }, |
| { |
| "clip_ratio": 0.0016526266699656844, |
| "epoch": 0.02799160251924423, |
| "grad_norm": 0.032953470827506756, |
| "learning_rate": 6.993006993006994e-06, |
| "loss": -0.0005, |
| "step": 40 |
| }, |
| { |
| "clip_ratio": 0.0016526016406714916, |
| "completion_length": 104.71428680419922, |
| "epoch": 0.028691392582225334, |
| "grad_norm": 0.08991319428906154, |
| "learning_rate": 7.167832167832168e-06, |
| "loss": -0.0004, |
| "num_tokens": 215924.0, |
| "reward": -0.6428571939468384, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.6428571343421936, |
| "rewards/check_winston_local_func/std": 0.7729182839393616, |
| "step": 41 |
| }, |
| { |
| "clip_ratio": 0.0011276095174252987, |
| "epoch": 0.02939118264520644, |
| "grad_norm": 0.0932105814337471, |
| "learning_rate": 7.342657342657343e-06, |
| "loss": -0.0005, |
| "step": 42 |
| }, |
| { |
| "clip_ratio": 0.0012143882922828197, |
| "epoch": 0.030090972708187544, |
| "grad_norm": 0.09501466400263839, |
| "learning_rate": 7.517482517482517e-06, |
| "loss": -0.0006, |
| "step": 43 |
| }, |
| { |
| "clip_ratio": 0.001439297804608941, |
| "epoch": 0.03079076277116865, |
| "grad_norm": 0.09410970453927742, |
| "learning_rate": 7.692307692307694e-06, |
| "loss": -0.0007, |
| "step": 44 |
| }, |
| { |
| "clip_ratio": 0.0007226300658658147, |
| "completion_length": 88.4464340209961, |
| "epoch": 0.031490552834149754, |
| "grad_norm": 0.03381639342861229, |
| "learning_rate": 7.867132867132867e-06, |
| "loss": 0.0016, |
| "num_tokens": 234579.0, |
| "reward": -0.535714328289032, |
| "reward_std": 0.05050762742757797, |
| "rewards/check_winston_local_func/mean": -0.5357142686843872, |
| "rewards/check_winston_local_func/std": 0.8520410656929016, |
| "step": 45 |
| }, |
| { |
| "clip_ratio": 0.0017245642375200987, |
| "epoch": 0.03219034289713086, |
| "grad_norm": 0.033458450811929934, |
| "learning_rate": 8.041958041958042e-06, |
| "loss": 0.0016, |
| "step": 46 |
| }, |
| { |
| "clip_ratio": 0.0011628264328464866, |
| "epoch": 0.032890132960111965, |
| "grad_norm": 0.03442645231795752, |
| "learning_rate": 8.216783216783217e-06, |
| "loss": 0.0016, |
| "step": 47 |
| }, |
| { |
| "clip_ratio": 0.0013220456894487143, |
| "epoch": 0.03358992302309307, |
| "grad_norm": 0.028415415852515197, |
| "learning_rate": 8.391608391608393e-06, |
| "loss": 0.0015, |
| "step": 48 |
| }, |
| { |
| "clip_ratio": 0.0034361626021564007, |
| "completion_length": 91.46428680419922, |
| "epoch": 0.034289713086074175, |
| "grad_norm": 0.04920502199586618, |
| "learning_rate": 8.566433566433566e-06, |
| "loss": 0.0013, |
| "num_tokens": 253761.0, |
| "reward": -0.8214285969734192, |
| "reward_std": 0.15152287483215332, |
| "rewards/check_winston_local_func/mean": -0.8214285969734192, |
| "rewards/check_winston_local_func/std": 0.5754727125167847, |
| "step": 49 |
| }, |
| { |
| "clip_ratio": 0.0016177111538127065, |
| "epoch": 0.03498950314905528, |
| "grad_norm": 0.04933398351205588, |
| "learning_rate": 8.741258741258741e-06, |
| "loss": 0.0013, |
| "step": 50 |
| }, |
| { |
| "clip_ratio": 0.0023581169079989195, |
| "epoch": 0.03568929321203639, |
| "grad_norm": 0.04990239817529589, |
| "learning_rate": 8.916083916083918e-06, |
| "loss": 0.0012, |
| "step": 51 |
| }, |
| { |
| "clip_ratio": 0.002693318761885166, |
| "epoch": 0.0363890832750175, |
| "grad_norm": 0.049258900801895884, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 0.0011, |
| "step": 52 |
| }, |
| { |
| "clip_ratio": 0.0013933092122897506, |
| "completion_length": 76.5, |
| "epoch": 0.0370888733379986, |
| "grad_norm": 0.06783434389116598, |
| "learning_rate": 9.265734265734266e-06, |
| "loss": 0.0004, |
| "num_tokens": 270951.0, |
| "reward": -0.4285714626312256, |
| "reward_std": 0.4040610194206238, |
| "rewards/check_winston_local_func/mean": -0.4285714328289032, |
| "rewards/check_winston_local_func/std": 0.9116845726966858, |
| "step": 53 |
| }, |
| { |
| "clip_ratio": 0.0012361541157588363, |
| "epoch": 0.03778866340097971, |
| "grad_norm": 0.06739214545276892, |
| "learning_rate": 9.44055944055944e-06, |
| "loss": 0.0002, |
| "step": 54 |
| }, |
| { |
| "clip_ratio": 0.0011503547430038452, |
| "epoch": 0.03848845346396081, |
| "grad_norm": 0.06968777943502087, |
| "learning_rate": 9.615384615384616e-06, |
| "loss": -0.0001, |
| "step": 55 |
| }, |
| { |
| "clip_ratio": 0.002146858721971512, |
| "epoch": 0.03918824352694192, |
| "grad_norm": 0.06632924367628588, |
| "learning_rate": 9.79020979020979e-06, |
| "loss": -0.0001, |
| "step": 56 |
| }, |
| { |
| "clip_ratio": 0.002248402452096343, |
| "completion_length": 100.05357360839844, |
| "epoch": 0.03988803358992302, |
| "grad_norm": 0.03339030440662342, |
| "learning_rate": 9.965034965034966e-06, |
| "loss": 0.0007, |
| "num_tokens": 291902.0, |
| "reward": -0.4285714626312256, |
| "reward_std": 0.10101525485515594, |
| "rewards/check_winston_local_func/mean": -0.4285714328289032, |
| "rewards/check_winston_local_func/std": 0.9116845726966858, |
| "step": 57 |
| }, |
| { |
| "clip_ratio": 0.002410450717434287, |
| "epoch": 0.04058782365290413, |
| "grad_norm": 0.03407166704900621, |
| "learning_rate": 1.013986013986014e-05, |
| "loss": 0.0008, |
| "step": 58 |
| }, |
| { |
| "clip_ratio": 0.0027997640427201986, |
| "epoch": 0.041287613715885234, |
| "grad_norm": 0.03397162163806953, |
| "learning_rate": 1.0314685314685315e-05, |
| "loss": 0.0007, |
| "step": 59 |
| }, |
| { |
| "clip_ratio": 0.0030621180776506662, |
| "epoch": 0.04198740377886634, |
| "grad_norm": 0.034258551127942245, |
| "learning_rate": 1.048951048951049e-05, |
| "loss": 0.0007, |
| "step": 60 |
| }, |
| { |
| "clip_ratio": 0.0021611705888062716, |
| "completion_length": 84.55357360839844, |
| "epoch": 0.042687193841847444, |
| "grad_norm": 0.04382869653282982, |
| "learning_rate": 1.0664335664335665e-05, |
| "loss": 0.0024, |
| "num_tokens": 310097.0, |
| "reward": -0.7500000596046448, |
| "reward_std": 0.15152287483215332, |
| "rewards/check_winston_local_func/mean": -0.75, |
| "rewards/check_winston_local_func/std": 0.6674237847328186, |
| "step": 61 |
| }, |
| { |
| "clip_ratio": 0.0010437711607664824, |
| "epoch": 0.04338698390482855, |
| "grad_norm": 0.04632797121542376, |
| "learning_rate": 1.0839160839160838e-05, |
| "loss": 0.0024, |
| "step": 62 |
| }, |
| { |
| "clip_ratio": 0.0013273117365315557, |
| "epoch": 0.044086773967809655, |
| "grad_norm": 0.046019113122256816, |
| "learning_rate": 1.1013986013986015e-05, |
| "loss": 0.0021, |
| "step": 63 |
| }, |
| { |
| "clip_ratio": 0.002344512613490224, |
| "epoch": 0.04478656403079076, |
| "grad_norm": 0.04434571865803126, |
| "learning_rate": 1.118881118881119e-05, |
| "loss": 0.0022, |
| "step": 64 |
| }, |
| { |
| "clip_ratio": 0.0014854084001854062, |
| "completion_length": 93.58928680419922, |
| "epoch": 0.04548635409377187, |
| "grad_norm": 0.013792833624525774, |
| "learning_rate": 1.1363636363636365e-05, |
| "loss": -0.0006, |
| "num_tokens": 329004.0, |
| "reward": -0.8214285969734192, |
| "reward_std": 0.05050762742757797, |
| "rewards/check_winston_local_func/mean": -0.8214285969734192, |
| "rewards/check_winston_local_func/std": 0.5754727125167847, |
| "step": 65 |
| }, |
| { |
| "clip_ratio": 0.002451003296300769, |
| "epoch": 0.04618614415675298, |
| "grad_norm": 0.01395207894798071, |
| "learning_rate": 1.153846153846154e-05, |
| "loss": -0.0006, |
| "step": 66 |
| }, |
| { |
| "clip_ratio": 0.0016351536614820361, |
| "epoch": 0.04688593421973408, |
| "grad_norm": 0.01316846865814371, |
| "learning_rate": 1.1713286713286714e-05, |
| "loss": -0.0006, |
| "step": 67 |
| }, |
| { |
| "clip_ratio": 0.0026426080148667097, |
| "epoch": 0.04758572428271519, |
| "grad_norm": 0.013837974578886334, |
| "learning_rate": 1.188811188811189e-05, |
| "loss": -0.0006, |
| "step": 68 |
| }, |
| { |
| "clip_ratio": 0.0009010470239445567, |
| "completion_length": 95.62500762939453, |
| "epoch": 0.04828551434569629, |
| "grad_norm": 0.08072032529486632, |
| "learning_rate": 1.2062937062937063e-05, |
| "loss": 0.0018, |
| "num_tokens": 349081.0, |
| "reward": -0.7142857313156128, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.7142857313156128, |
| "rewards/check_winston_local_func/std": 0.7061878442764282, |
| "step": 69 |
| }, |
| { |
| "clip_ratio": 0.0022199582308530807, |
| "epoch": 0.0489853044086774, |
| "grad_norm": 0.0818785835069056, |
| "learning_rate": 1.2237762237762239e-05, |
| "loss": 0.0014, |
| "step": 70 |
| }, |
| { |
| "clip_ratio": 0.0017290068790316582, |
| "epoch": 0.0496850944716585, |
| "grad_norm": 0.07902681914542756, |
| "learning_rate": 1.2412587412587414e-05, |
| "loss": 0.001, |
| "step": 71 |
| }, |
| { |
| "clip_ratio": 0.0016852362314239144, |
| "epoch": 0.05038488453463961, |
| "grad_norm": 0.08449847550325483, |
| "learning_rate": 1.2587412587412589e-05, |
| "loss": 0.0005, |
| "step": 72 |
| }, |
| { |
| "clip_ratio": 0.0020247853826731443, |
| "completion_length": 91.78572082519531, |
| "epoch": 0.05108467459762071, |
| "grad_norm": 0.04909551324890117, |
| "learning_rate": 1.2762237762237764e-05, |
| "loss": -0.003, |
| "num_tokens": 367819.0, |
| "reward": -0.5, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.5, |
| "rewards/check_winston_local_func/std": 0.8738628625869751, |
| "step": 73 |
| }, |
| { |
| "clip_ratio": 0.0034576961770653725, |
| "epoch": 0.05178446466060182, |
| "grad_norm": 0.050114477186976265, |
| "learning_rate": 1.2937062937062939e-05, |
| "loss": -0.0031, |
| "step": 74 |
| }, |
| { |
| "clip_ratio": 0.0019642652478069067, |
| "epoch": 0.052484254723582924, |
| "grad_norm": 0.05018386747388819, |
| "learning_rate": 1.3111888111888112e-05, |
| "loss": -0.0033, |
| "step": 75 |
| }, |
| { |
| "clip_ratio": 0.004161330871284008, |
| "epoch": 0.05318404478656403, |
| "grad_norm": 0.05121415635619661, |
| "learning_rate": 1.3286713286713287e-05, |
| "loss": -0.0037, |
| "step": 76 |
| }, |
| { |
| "clip_ratio": 0.0017616358818486333, |
| "completion_length": 106.50000762939453, |
| "epoch": 0.053883834849545134, |
| "grad_norm": 0.05530735796526797, |
| "learning_rate": 1.3461538461538462e-05, |
| "loss": -0.0041, |
| "num_tokens": 388977.0, |
| "reward": -0.6428571939468384, |
| "reward_std": 0.10101525485515594, |
| "rewards/check_winston_local_func/mean": -0.6428571343421936, |
| "rewards/check_winston_local_func/std": 0.7729182839393616, |
| "step": 77 |
| }, |
| { |
| "clip_ratio": 0.0018110191449522972, |
| "epoch": 0.05458362491252624, |
| "grad_norm": 0.05608373724379992, |
| "learning_rate": 1.3636363636363637e-05, |
| "loss": -0.0042, |
| "step": 78 |
| }, |
| { |
| "clip_ratio": 0.003877634182572365, |
| "epoch": 0.055283414975507345, |
| "grad_norm": 0.04218829661702587, |
| "learning_rate": 1.381118881118881e-05, |
| "loss": -0.0044, |
| "step": 79 |
| }, |
| { |
| "clip_ratio": 0.003191744675859809, |
| "epoch": 0.05598320503848846, |
| "grad_norm": 0.04293784405855666, |
| "learning_rate": 1.3986013986013988e-05, |
| "loss": -0.0046, |
| "step": 80 |
| }, |
| { |
| "clip_ratio": 0.002192563144490123, |
| "completion_length": 102.51786041259766, |
| "epoch": 0.05668299510146956, |
| "grad_norm": 0.01621220365623996, |
| "learning_rate": 1.4160839160839163e-05, |
| "loss": -0.0011, |
| "num_tokens": 409804.0, |
| "reward": -0.8214285969734192, |
| "reward_std": 0.05050762742757797, |
| "rewards/check_winston_local_func/mean": -0.8214285969734192, |
| "rewards/check_winston_local_func/std": 0.5754727125167847, |
| "step": 81 |
| }, |
| { |
| "clip_ratio": 0.0021092891693115234, |
| "epoch": 0.05738278516445067, |
| "grad_norm": 0.016235676972647724, |
| "learning_rate": 1.4335664335664336e-05, |
| "loss": -0.0012, |
| "step": 82 |
| }, |
| { |
| "clip_ratio": 0.0023684012703597546, |
| "epoch": 0.05808257522743177, |
| "grad_norm": 0.016915613552120477, |
| "learning_rate": 1.4510489510489511e-05, |
| "loss": -0.0011, |
| "step": 83 |
| }, |
| { |
| "clip_ratio": 0.002553236670792103, |
| "epoch": 0.05878236529041288, |
| "grad_norm": 0.016878300731711607, |
| "learning_rate": 1.4685314685314686e-05, |
| "loss": -0.0012, |
| "step": 84 |
| }, |
| { |
| "clip_ratio": 0.0023020573426038027, |
| "completion_length": 95.33928680419922, |
| "epoch": 0.05948215535339398, |
| "grad_norm": 0.09922403654931192, |
| "learning_rate": 1.486013986013986e-05, |
| "loss": 0.0034, |
| "num_tokens": 429265.0, |
| "reward": -0.392857164144516, |
| "reward_std": 0.3535533845424652, |
| "rewards/check_winston_local_func/mean": -0.3928571343421936, |
| "rewards/check_winston_local_func/std": 0.9279217720031738, |
| "step": 85 |
| }, |
| { |
| "clip_ratio": 0.002049357397481799, |
| "epoch": 0.06018194541637509, |
| "grad_norm": 0.0820235579491076, |
| "learning_rate": 1.5034965034965034e-05, |
| "loss": 0.0032, |
| "step": 86 |
| }, |
| { |
| "clip_ratio": 0.003993889316916466, |
| "epoch": 0.06088173547935619, |
| "grad_norm": 0.07572179365943402, |
| "learning_rate": 1.5209790209790212e-05, |
| "loss": 0.0028, |
| "step": 87 |
| }, |
| { |
| "clip_ratio": 0.004023912828415632, |
| "epoch": 0.0615815255423373, |
| "grad_norm": 0.0786883863698215, |
| "learning_rate": 1.5384615384615387e-05, |
| "loss": 0.0025, |
| "step": 88 |
| }, |
| { |
| "clip_ratio": 0.0012786961160600185, |
| "completion_length": 101.01786041259766, |
| "epoch": 0.0622813156053184, |
| "grad_norm": 0.13944075765142377, |
| "learning_rate": 1.555944055944056e-05, |
| "loss": -0.0059, |
| "num_tokens": 449712.0, |
| "reward": -0.4642857313156128, |
| "reward_std": 0.3535533845424652, |
| "rewards/check_winston_local_func/mean": -0.4642857015132904, |
| "rewards/check_winston_local_func/std": 0.893700897693634, |
| "step": 89 |
| }, |
| { |
| "clip_ratio": 0.0012525760103017092, |
| "epoch": 0.06298110566829951, |
| "grad_norm": 0.14046611947962784, |
| "learning_rate": 1.5734265734265734e-05, |
| "loss": -0.0062, |
| "step": 90 |
| }, |
| { |
| "clip_ratio": 0.003111109836027026, |
| "epoch": 0.06368089573128062, |
| "grad_norm": 0.13042169819458227, |
| "learning_rate": 1.590909090909091e-05, |
| "loss": -0.0079, |
| "step": 91 |
| }, |
| { |
| "clip_ratio": 0.013328815810382366, |
| "epoch": 0.06438068579426172, |
| "grad_norm": 0.0955558239371001, |
| "learning_rate": 1.6083916083916083e-05, |
| "loss": -0.0092, |
| "step": 92 |
| }, |
| { |
| "clip_ratio": 0.0021473567467182875, |
| "completion_length": 80.30357360839844, |
| "epoch": 0.06508047585724283, |
| "grad_norm": 0.17357283543483568, |
| "learning_rate": 1.625874125874126e-05, |
| "loss": 0.0104, |
| "num_tokens": 466947.0, |
| "reward": -0.2142857313156128, |
| "reward_std": 0.4040610194206238, |
| "rewards/check_winston_local_func/mean": -0.2142857164144516, |
| "rewards/check_winston_local_func/std": 0.9856107234954834, |
| "step": 93 |
| }, |
| { |
| "clip_ratio": 0.004891776479780674, |
| "epoch": 0.06578026592022393, |
| "grad_norm": 0.16677719867011565, |
| "learning_rate": 1.6433566433566433e-05, |
| "loss": 0.0089, |
| "step": 94 |
| }, |
| { |
| "clip_ratio": 0.008591952733695507, |
| "epoch": 0.06648005598320504, |
| "grad_norm": 0.15657656176787582, |
| "learning_rate": 1.660839160839161e-05, |
| "loss": 0.0066, |
| "step": 95 |
| }, |
| { |
| "clip_ratio": 0.017924649640917778, |
| "epoch": 0.06717984604618614, |
| "grad_norm": 0.1468099989251008, |
| "learning_rate": 1.6783216783216786e-05, |
| "loss": 0.004, |
| "step": 96 |
| }, |
| { |
| "clip_ratio": 0.0016577127389609814, |
| "completion_length": 120.50000762939453, |
| "epoch": 0.06787963610916725, |
| "grad_norm": 0.06674060360416964, |
| "learning_rate": 1.695804195804196e-05, |
| "loss": -0.001, |
| "num_tokens": 489863.0, |
| "reward": -0.785714328289032, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.7857142686843872, |
| "rewards/check_winston_local_func/std": 0.6241878271102905, |
| "step": 97 |
| }, |
| { |
| "clip_ratio": 0.003024409292265773, |
| "epoch": 0.06857942617214835, |
| "grad_norm": 0.05912491262092969, |
| "learning_rate": 1.7132867132867133e-05, |
| "loss": -0.0015, |
| "step": 98 |
| }, |
| { |
| "clip_ratio": 0.004554019309580326, |
| "epoch": 0.06927921623512946, |
| "grad_norm": 0.05430919883014471, |
| "learning_rate": 1.730769230769231e-05, |
| "loss": -0.0019, |
| "step": 99 |
| }, |
| { |
| "clip_ratio": 0.00875174906104803, |
| "epoch": 0.06997900629811056, |
| "grad_norm": 0.04764899914092442, |
| "learning_rate": 1.7482517482517483e-05, |
| "loss": -0.0025, |
| "step": 100 |
| }, |
| { |
| "clip_ratio": 0.0022144827526062727, |
| "completion_length": 85.92857360839844, |
| "epoch": 0.07067879636109167, |
| "grad_norm": 0.09514652279620683, |
| "learning_rate": 1.7657342657342656e-05, |
| "loss": 0.0045, |
| "num_tokens": 508189.0, |
| "reward": -0.535714328289032, |
| "reward_std": 0.3535534143447876, |
| "rewards/check_winston_local_func/mean": -0.5357142686843872, |
| "rewards/check_winston_local_func/std": 0.8520411252975464, |
| "step": 101 |
| }, |
| { |
| "clip_ratio": 0.004405450075864792, |
| "epoch": 0.07137858642407278, |
| "grad_norm": 0.08384041265838117, |
| "learning_rate": 1.7832167832167836e-05, |
| "loss": 0.004, |
| "step": 102 |
| }, |
| { |
| "clip_ratio": 0.007100887596607208, |
| "epoch": 0.07207837648705388, |
| "grad_norm": 0.07838152678466788, |
| "learning_rate": 1.800699300699301e-05, |
| "loss": 0.0031, |
| "step": 103 |
| }, |
| { |
| "clip_ratio": 0.012806176207959652, |
| "epoch": 0.072778166550035, |
| "grad_norm": 0.054761747581467624, |
| "learning_rate": 1.8181818181818182e-05, |
| "loss": 0.0022, |
| "step": 104 |
| }, |
| { |
| "clip_ratio": 0.002409872133284807, |
| "completion_length": 89.16072082519531, |
| "epoch": 0.0734779566130161, |
| "grad_norm": 0.088338886024993, |
| "learning_rate": 1.835664335664336e-05, |
| "loss": -0.0024, |
| "num_tokens": 526646.0, |
| "reward": -0.5, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.5, |
| "rewards/check_winston_local_func/std": 0.8738628625869751, |
| "step": 105 |
| }, |
| { |
| "clip_ratio": 0.003525706473737955, |
| "epoch": 0.0741777466759972, |
| "grad_norm": 0.08578242655426072, |
| "learning_rate": 1.8531468531468532e-05, |
| "loss": -0.0031, |
| "step": 106 |
| }, |
| { |
| "clip_ratio": 0.010289273224771023, |
| "epoch": 0.0748775367389783, |
| "grad_norm": 0.07249149477931406, |
| "learning_rate": 1.8706293706293705e-05, |
| "loss": -0.0043, |
| "step": 107 |
| }, |
| { |
| "clip_ratio": 0.027354398742318153, |
| "epoch": 0.07557732680195942, |
| "grad_norm": 0.059934840975274094, |
| "learning_rate": 1.888111888111888e-05, |
| "loss": -0.005, |
| "step": 108 |
| }, |
| { |
| "clip_ratio": 0.002047365065664053, |
| "completion_length": 99.67857360839844, |
| "epoch": 0.07627711686494051, |
| "grad_norm": 0.052068804469643425, |
| "learning_rate": 1.9055944055944055e-05, |
| "loss": -0.0002, |
| "num_tokens": 546720.0, |
| "reward": -0.7142857313156128, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.7142857313156128, |
| "rewards/check_winston_local_func/std": 0.7061878442764282, |
| "step": 109 |
| }, |
| { |
| "clip_ratio": 0.006073285825550556, |
| "epoch": 0.07697690692792163, |
| "grad_norm": 0.053601884682000965, |
| "learning_rate": 1.923076923076923e-05, |
| "loss": -0.0004, |
| "step": 110 |
| }, |
| { |
| "clip_ratio": 0.01850634068250656, |
| "epoch": 0.07767669699090272, |
| "grad_norm": 0.0262467926642497, |
| "learning_rate": 1.9405594405594408e-05, |
| "loss": -0.0006, |
| "step": 111 |
| }, |
| { |
| "clip_ratio": 0.02557740919291973, |
| "epoch": 0.07837648705388384, |
| "grad_norm": 0.028862292431493224, |
| "learning_rate": 1.958041958041958e-05, |
| "loss": -0.0007, |
| "step": 112 |
| }, |
| { |
| "clip_ratio": 0.0017085629515349865, |
| "completion_length": 102.46428680419922, |
| "epoch": 0.07907627711686493, |
| "grad_norm": 0.047073786173530204, |
| "learning_rate": 1.9755244755244758e-05, |
| "loss": 0.0003, |
| "num_tokens": 567098.0, |
| "reward": -0.6428571939468384, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.6428571343421936, |
| "rewards/check_winston_local_func/std": 0.7729182839393616, |
| "step": 113 |
| }, |
| { |
| "clip_ratio": 0.0032013251911848783, |
| "epoch": 0.07977606717984605, |
| "grad_norm": 0.04603497030180295, |
| "learning_rate": 1.993006993006993e-05, |
| "loss": 0.0, |
| "step": 114 |
| }, |
| { |
| "clip_ratio": 0.00821536686271429, |
| "epoch": 0.08047585724282715, |
| "grad_norm": 0.04227017570834447, |
| "learning_rate": 2.0104895104895104e-05, |
| "loss": -0.0003, |
| "step": 115 |
| }, |
| { |
| "clip_ratio": 0.020387563854455948, |
| "epoch": 0.08117564730580826, |
| "grad_norm": 0.03197828312555116, |
| "learning_rate": 2.027972027972028e-05, |
| "loss": -0.0007, |
| "step": 116 |
| }, |
| { |
| "clip_ratio": 0.002021044958382845, |
| "completion_length": 81.4464340209961, |
| "epoch": 0.08187543736878937, |
| "grad_norm": 0.13324413388648826, |
| "learning_rate": 2.0454545454545457e-05, |
| "loss": 0.0049, |
| "num_tokens": 584845.0, |
| "reward": -0.4285714626312256, |
| "reward_std": 0.4040610194206238, |
| "rewards/check_winston_local_func/mean": -0.4285714328289032, |
| "rewards/check_winston_local_func/std": 0.9116845726966858, |
| "step": 117 |
| }, |
| { |
| "clip_ratio": 0.01248109433799982, |
| "epoch": 0.08257522743177047, |
| "grad_norm": 0.11616979541003405, |
| "learning_rate": 2.062937062937063e-05, |
| "loss": 0.0033, |
| "step": 118 |
| }, |
| { |
| "clip_ratio": 0.03872568532824516, |
| "epoch": 0.08327501749475158, |
| "grad_norm": 0.08932212762248723, |
| "learning_rate": 2.0804195804195807e-05, |
| "loss": 0.0016, |
| "step": 119 |
| }, |
| { |
| "clip_ratio": 0.0671406015753746, |
| "epoch": 0.08397480755773268, |
| "grad_norm": 0.07829121992484567, |
| "learning_rate": 2.097902097902098e-05, |
| "loss": 0.0001, |
| "step": 120 |
| }, |
| { |
| "clip_ratio": 0.0027903958689421415, |
| "completion_length": 109.67857360839844, |
| "epoch": 0.08467459762071379, |
| "grad_norm": 0.10038881852522909, |
| "learning_rate": 2.1153846153846154e-05, |
| "loss": 0.0027, |
| "num_tokens": 606389.0, |
| "reward": -0.1071428656578064, |
| "reward_std": 0.05050762742757797, |
| "rewards/check_winston_local_func/mean": -0.1071428582072258, |
| "rewards/check_winston_local_func/std": 1.0032415390014648, |
| "step": 121 |
| }, |
| { |
| "clip_ratio": 0.010223714634776115, |
| "epoch": 0.08537438768369489, |
| "grad_norm": 0.08037774781050448, |
| "learning_rate": 2.132867132867133e-05, |
| "loss": 0.0017, |
| "step": 122 |
| }, |
| { |
| "clip_ratio": 0.02510545216500759, |
| "epoch": 0.086074177746676, |
| "grad_norm": 0.06594532056416831, |
| "learning_rate": 2.1503496503496503e-05, |
| "loss": 0.0009, |
| "step": 123 |
| }, |
| { |
| "clip_ratio": 0.04746328294277191, |
| "epoch": 0.0867739678096571, |
| "grad_norm": 0.0495169500454822, |
| "learning_rate": 2.1678321678321677e-05, |
| "loss": 0.0004, |
| "step": 124 |
| }, |
| { |
| "clip_ratio": 0.002552854595705867, |
| "completion_length": 92.92857360839844, |
| "epoch": 0.08747375787263821, |
| "grad_norm": 0.07383050554477533, |
| "learning_rate": 2.1853146853146857e-05, |
| "loss": -0.003, |
| "num_tokens": 625723.0, |
| "reward": -0.392857164144516, |
| "reward_std": 0.05050762742757797, |
| "rewards/check_winston_local_func/mean": -0.3928571343421936, |
| "rewards/check_winston_local_func/std": 0.9279217720031738, |
| "step": 125 |
| }, |
| { |
| "clip_ratio": 0.017842039465904236, |
| "epoch": 0.08817354793561931, |
| "grad_norm": 0.043649507012091936, |
| "learning_rate": 2.202797202797203e-05, |
| "loss": -0.0037, |
| "step": 126 |
| }, |
| { |
| "clip_ratio": 0.045183245092630386, |
| "epoch": 0.08887333799860042, |
| "grad_norm": 0.03451108201943257, |
| "learning_rate": 2.2202797202797203e-05, |
| "loss": -0.0039, |
| "step": 127 |
| }, |
| { |
| "clip_ratio": 0.0701090469956398, |
| "epoch": 0.08957312806158152, |
| "grad_norm": 0.023061406081443397, |
| "learning_rate": 2.237762237762238e-05, |
| "loss": -0.0041, |
| "step": 128 |
| }, |
| { |
| "clip_ratio": 0.0010595758212730289, |
| "completion_length": 103.25000762939453, |
| "epoch": 0.09027291812456263, |
| "grad_norm": 0.15282505586734968, |
| "learning_rate": 2.2552447552447553e-05, |
| "loss": 0.0011, |
| "num_tokens": 646391.0, |
| "reward": -0.1071428656578064, |
| "reward_std": 0.45456868410110474, |
| "rewards/check_winston_local_func/mean": -0.1071428582072258, |
| "rewards/check_winston_local_func/std": 1.0032414197921753, |
| "step": 129 |
| }, |
| { |
| "clip_ratio": 0.015386571176350117, |
| "epoch": 0.09097270818754374, |
| "grad_norm": 0.13868014978577842, |
| "learning_rate": 2.272727272727273e-05, |
| "loss": -0.0008, |
| "step": 130 |
| }, |
| { |
| "clip_ratio": 0.025238754227757454, |
| "epoch": 0.09167249825052484, |
| "grad_norm": 0.12901702065673692, |
| "learning_rate": 2.2902097902097902e-05, |
| "loss": -0.0033, |
| "step": 131 |
| }, |
| { |
| "clip_ratio": 0.026391755789518356, |
| "epoch": 0.09237228831350595, |
| "grad_norm": 0.09971213845352783, |
| "learning_rate": 2.307692307692308e-05, |
| "loss": -0.006, |
| "step": 132 |
| }, |
| { |
| "clip_ratio": 0.0035059447400271893, |
| "completion_length": 98.33928680419922, |
| "epoch": 0.09307207837648705, |
| "grad_norm": 0.11128060046799104, |
| "learning_rate": 2.3251748251748252e-05, |
| "loss": -0.0018, |
| "num_tokens": 666114.0, |
| "reward": -0.3571428656578064, |
| "reward_std": 0.30304574966430664, |
| "rewards/check_winston_local_func/mean": -0.3571428656578064, |
| "rewards/check_winston_local_func/std": 0.9425028562545776, |
| "step": 133 |
| }, |
| { |
| "clip_ratio": 0.0042843748815357685, |
| "epoch": 0.09377186843946816, |
| "grad_norm": 0.1065458455684171, |
| "learning_rate": 2.342657342657343e-05, |
| "loss": -0.0028, |
| "step": 134 |
| }, |
| { |
| "clip_ratio": 0.01338073518127203, |
| "epoch": 0.09447165850244926, |
| "grad_norm": 0.07422771205194853, |
| "learning_rate": 2.3601398601398602e-05, |
| "loss": -0.0042, |
| "step": 135 |
| }, |
| { |
| "clip_ratio": 0.02260064147412777, |
| "epoch": 0.09517144856543037, |
| "grad_norm": 0.05031624319039464, |
| "learning_rate": 2.377622377622378e-05, |
| "loss": -0.0051, |
| "step": 136 |
| }, |
| { |
| "clip_ratio": 0.0022608404979109764, |
| "completion_length": 87.00000762939453, |
| "epoch": 0.09587123862841147, |
| "grad_norm": 0.17254445638790264, |
| "learning_rate": 2.3951048951048952e-05, |
| "loss": -0.0094, |
| "num_tokens": 684454.0, |
| "reward": -0.0357142873108387, |
| "reward_std": 0.3535533845424652, |
| "rewards/check_winston_local_func/mean": -0.0357142873108387, |
| "rewards/check_winston_local_func/std": 1.0084062814712524, |
| "step": 137 |
| }, |
| { |
| "clip_ratio": 0.011511722579598427, |
| "epoch": 0.09657102869139259, |
| "grad_norm": 0.146693566733721, |
| "learning_rate": 2.4125874125874125e-05, |
| "loss": -0.0129, |
| "step": 138 |
| }, |
| { |
| "clip_ratio": 0.0310056172311306, |
| "epoch": 0.09727081875437368, |
| "grad_norm": 0.1269304320317103, |
| "learning_rate": 2.43006993006993e-05, |
| "loss": -0.0159, |
| "step": 139 |
| }, |
| { |
| "clip_ratio": 0.04163637384772301, |
| "epoch": 0.0979706088173548, |
| "grad_norm": 0.09818573191869126, |
| "learning_rate": 2.4475524475524478e-05, |
| "loss": -0.0187, |
| "step": 140 |
| }, |
| { |
| "clip_ratio": 0.0025228250306099653, |
| "completion_length": 104.9464340209961, |
| "epoch": 0.0986703988803359, |
| "grad_norm": 0.1003012262187849, |
| "learning_rate": 2.465034965034965e-05, |
| "loss": -0.0031, |
| "num_tokens": 705439.0, |
| "reward": -0.1428571492433548, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.1428571492433548, |
| "rewards/check_winston_local_func/std": 0.9987004995346069, |
| "step": 141 |
| }, |
| { |
| "clip_ratio": 0.012786303646862507, |
| "epoch": 0.099370188943317, |
| "grad_norm": 0.08364392907940049, |
| "learning_rate": 2.4825174825174828e-05, |
| "loss": -0.0043, |
| "step": 142 |
| }, |
| { |
| "clip_ratio": 0.04185020551085472, |
| "epoch": 0.1000699790062981, |
| "grad_norm": 0.055411268177771554, |
| "learning_rate": 2.5e-05, |
| "loss": -0.0052, |
| "step": 143 |
| }, |
| { |
| "clip_ratio": 0.06534933298826218, |
| "epoch": 0.10076976906927922, |
| "grad_norm": 0.04043680317744743, |
| "learning_rate": 2.5174825174825178e-05, |
| "loss": -0.0057, |
| "step": 144 |
| }, |
| { |
| "clip_ratio": 0.00163670489564538, |
| "completion_length": 79.0, |
| "epoch": 0.10146955913226033, |
| "grad_norm": 0.26037723002756274, |
| "learning_rate": 2.534965034965035e-05, |
| "loss": -0.0013, |
| "num_tokens": 722363.0, |
| "reward": -0.2857142984867096, |
| "reward_std": 0.4040610194206238, |
| "rewards/check_winston_local_func/mean": -0.2857142984867096, |
| "rewards/check_winston_local_func/std": 0.9669875502586365, |
| "step": 145 |
| }, |
| { |
| "clip_ratio": 0.010257317684590816, |
| "epoch": 0.10216934919524143, |
| "grad_norm": 0.21925125532352843, |
| "learning_rate": 2.5524475524475528e-05, |
| "loss": -0.0078, |
| "step": 146 |
| }, |
| { |
| "clip_ratio": 0.03979513794183731, |
| "epoch": 0.10286913925822254, |
| "grad_norm": 0.1493741677240076, |
| "learning_rate": 2.5699300699300697e-05, |
| "loss": -0.014, |
| "step": 147 |
| }, |
| { |
| "clip_ratio": 0.06495730578899384, |
| "epoch": 0.10356892932120364, |
| "grad_norm": 0.09721255929993584, |
| "learning_rate": 2.5874125874125877e-05, |
| "loss": -0.0176, |
| "step": 148 |
| }, |
| { |
| "clip_ratio": 0.003729403717443347, |
| "completion_length": 98.60714721679688, |
| "epoch": 0.10426871938418475, |
| "grad_norm": 0.21024181860460278, |
| "learning_rate": 2.6048951048951047e-05, |
| "loss": -0.0029, |
| "num_tokens": 742543.0, |
| "reward": -0.3214285969734192, |
| "reward_std": 0.25253814458847046, |
| "rewards/check_winston_local_func/mean": -0.3214285671710968, |
| "rewards/check_winston_local_func/std": 0.955503523349762, |
| "step": 149 |
| }, |
| { |
| "clip_ratio": 0.011744500137865543, |
| "epoch": 0.10496850944716585, |
| "grad_norm": 0.16355956559118884, |
| "learning_rate": 2.6223776223776224e-05, |
| "loss": -0.0069, |
| "step": 150 |
| }, |
| { |
| "clip_ratio": 0.030442187562584877, |
| "epoch": 0.10566829951014696, |
| "grad_norm": 0.1182182136437938, |
| "learning_rate": 2.6398601398601404e-05, |
| "loss": -0.0104, |
| "step": 151 |
| }, |
| { |
| "clip_ratio": 0.04920857399702072, |
| "epoch": 0.10636808957312806, |
| "grad_norm": 0.09156116791582807, |
| "learning_rate": 2.6573426573426574e-05, |
| "loss": -0.013, |
| "step": 152 |
| }, |
| { |
| "clip_ratio": 0.0027830980252474546, |
| "completion_length": 101.71428680419922, |
| "epoch": 0.10706787963610917, |
| "grad_norm": 0.07071809306988276, |
| "learning_rate": 2.674825174825175e-05, |
| "loss": 0.0011, |
| "num_tokens": 762373.0, |
| "reward": -0.4285714626312256, |
| "reward_std": 0.10101525485515594, |
| "rewards/check_winston_local_func/mean": -0.4285714328289032, |
| "rewards/check_winston_local_func/std": 0.9116845726966858, |
| "step": 153 |
| }, |
| { |
| "clip_ratio": 0.004099779762327671, |
| "epoch": 0.10776766969909027, |
| "grad_norm": 0.07036975743996918, |
| "learning_rate": 2.6923076923076923e-05, |
| "loss": 0.0006, |
| "step": 154 |
| }, |
| { |
| "clip_ratio": 0.011427856050431728, |
| "epoch": 0.10846745976207138, |
| "grad_norm": 0.060173537559690966, |
| "learning_rate": 2.70979020979021e-05, |
| "loss": -0.0002, |
| "step": 155 |
| }, |
| { |
| "clip_ratio": 0.02241707034409046, |
| "epoch": 0.10916724982505248, |
| "grad_norm": 0.04464993792503255, |
| "learning_rate": 2.7272727272727273e-05, |
| "loss": -0.0007, |
| "step": 156 |
| }, |
| { |
| "clip_ratio": 0.003849891945719719, |
| "completion_length": 126.60714721679688, |
| "epoch": 0.10986703988803359, |
| "grad_norm": 0.14247799791087257, |
| "learning_rate": 2.744755244755245e-05, |
| "loss": 0.003, |
| "num_tokens": 785161.0, |
| "reward": -0.25, |
| "reward_std": 0.3535533845424652, |
| "rewards/check_winston_local_func/mean": -0.25, |
| "rewards/check_winston_local_func/std": 0.9770084023475647, |
| "step": 157 |
| }, |
| { |
| "clip_ratio": 0.00803058035671711, |
| "epoch": 0.11056682995101469, |
| "grad_norm": 0.13081890796714626, |
| "learning_rate": 2.762237762237762e-05, |
| "loss": 0.0006, |
| "step": 158 |
| }, |
| { |
| "clip_ratio": 0.02222571335732937, |
| "epoch": 0.1112666200139958, |
| "grad_norm": 0.10130551565904075, |
| "learning_rate": 2.77972027972028e-05, |
| "loss": -0.0021, |
| "step": 159 |
| }, |
| { |
| "clip_ratio": 0.03389605134725571, |
| "epoch": 0.11196641007697691, |
| "grad_norm": 0.0756074031523243, |
| "learning_rate": 2.7972027972027976e-05, |
| "loss": -0.0047, |
| "step": 160 |
| }, |
| { |
| "clip_ratio": 0.003950103186070919, |
| "completion_length": 104.12500762939453, |
| "epoch": 0.11266620013995801, |
| "grad_norm": 0.1290577443710622, |
| "learning_rate": 2.8146853146853146e-05, |
| "loss": 0.002, |
| "num_tokens": 805162.0, |
| "reward": -0.3571428656578064, |
| "reward_std": 0.2020305097103119, |
| "rewards/check_winston_local_func/mean": -0.3571428656578064, |
| "rewards/check_winston_local_func/std": 0.9425028562545776, |
| "step": 161 |
| }, |
| { |
| "clip_ratio": 0.007646625861525536, |
| "epoch": 0.11336599020293912, |
| "grad_norm": 0.11025990408745222, |
| "learning_rate": 2.8321678321678326e-05, |
| "loss": 0.0002, |
| "step": 162 |
| }, |
| { |
| "clip_ratio": 0.01921841874718666, |
| "epoch": 0.11406578026592022, |
| "grad_norm": 0.07245932578181155, |
| "learning_rate": 2.8496503496503496e-05, |
| "loss": -0.0017, |
| "step": 163 |
| }, |
| { |
| "clip_ratio": 0.03461840748786926, |
| "epoch": 0.11476557032890133, |
| "grad_norm": 0.051788726865233656, |
| "learning_rate": 2.8671328671328672e-05, |
| "loss": -0.0028, |
| "step": 164 |
| }, |
| { |
| "clip_ratio": 0.002369140973314643, |
| "completion_length": 98.73214721679688, |
| "epoch": 0.11546536039188243, |
| "grad_norm": 0.1842865637323427, |
| "learning_rate": 2.8846153846153845e-05, |
| "loss": 0.0051, |
| "num_tokens": 825007.0, |
| "reward": -0.0714285746216774, |
| "reward_std": 0.4040610194206238, |
| "rewards/check_winston_local_func/mean": -0.0714285746216774, |
| "rewards/check_winston_local_func/std": 1.0064724683761597, |
| "step": 165 |
| }, |
| { |
| "clip_ratio": 0.012421431951224804, |
| "epoch": 0.11616515045486354, |
| "grad_norm": 0.15047989090077732, |
| "learning_rate": 2.9020979020979022e-05, |
| "loss": 0.0018, |
| "step": 166 |
| }, |
| { |
| "clip_ratio": 0.027025196701288223, |
| "epoch": 0.11686494051784464, |
| "grad_norm": 0.09721729056776199, |
| "learning_rate": 2.91958041958042e-05, |
| "loss": -0.0012, |
| "step": 167 |
| }, |
| { |
| "clip_ratio": 0.04289395734667778, |
| "epoch": 0.11756473058082575, |
| "grad_norm": 0.06975068028842074, |
| "learning_rate": 2.9370629370629372e-05, |
| "loss": -0.0033, |
| "step": 168 |
| }, |
| { |
| "clip_ratio": 0.0018040953436866403, |
| "completion_length": 105.3214340209961, |
| "epoch": 0.11826452064380685, |
| "grad_norm": 0.2677758748707387, |
| "learning_rate": 2.954545454545455e-05, |
| "loss": -0.0015, |
| "num_tokens": 845663.0, |
| "reward": -0.0714285746216774, |
| "reward_std": 0.4040609896183014, |
| "rewards/check_winston_local_func/mean": -0.0714285746216774, |
| "rewards/check_winston_local_func/std": 1.0064724683761597, |
| "step": 169 |
| }, |
| { |
| "clip_ratio": 0.01557006873190403, |
| "epoch": 0.11896431070678797, |
| "grad_norm": 0.2023749080142164, |
| "learning_rate": 2.972027972027972e-05, |
| "loss": -0.0082, |
| "step": 170 |
| }, |
| { |
| "clip_ratio": 0.04334796220064163, |
| "epoch": 0.11966410076976906, |
| "grad_norm": 0.12184054679561386, |
| "learning_rate": 2.9895104895104898e-05, |
| "loss": -0.0128, |
| "step": 171 |
| }, |
| { |
| "clip_ratio": 0.06698625534772873, |
| "epoch": 0.12036389083275018, |
| "grad_norm": 0.08481462032815572, |
| "learning_rate": 3.0069930069930068e-05, |
| "loss": -0.0152, |
| "step": 172 |
| }, |
| { |
| "clip_ratio": 0.002561988076195121, |
| "completion_length": 106.25000762939453, |
| "epoch": 0.12106368089573127, |
| "grad_norm": 0.3222269362637656, |
| "learning_rate": 3.0244755244755245e-05, |
| "loss": -0.0225, |
| "num_tokens": 866213.0, |
| "reward": -0.1785714328289032, |
| "reward_std": 0.45456865429878235, |
| "rewards/check_winston_local_func/mean": -0.1785714328289032, |
| "rewards/check_winston_local_func/std": 0.9928314685821533, |
| "step": 173 |
| }, |
| { |
| "clip_ratio": 0.027860935777425766, |
| "epoch": 0.12176347095871239, |
| "grad_norm": 0.23558262189831372, |
| "learning_rate": 3.0419580419580425e-05, |
| "loss": -0.0308, |
| "step": 174 |
| }, |
| { |
| "clip_ratio": 0.0620122067630291, |
| "epoch": 0.1224632610216935, |
| "grad_norm": 0.16018574686601328, |
| "learning_rate": 3.0594405594405594e-05, |
| "loss": -0.0363, |
| "step": 175 |
| }, |
| { |
| "clip_ratio": 0.0841975286602974, |
| "epoch": 0.1231630510846746, |
| "grad_norm": 0.11961313174632249, |
| "learning_rate": 3.0769230769230774e-05, |
| "loss": -0.0402, |
| "step": 176 |
| }, |
| { |
| "clip_ratio": 0.0038492009043693542, |
| "completion_length": 112.05357360839844, |
| "epoch": 0.12386284114765571, |
| "grad_norm": 0.16841580805802311, |
| "learning_rate": 3.094405594405594e-05, |
| "loss": 0.0144, |
| "num_tokens": 887380.0, |
| "reward": -0.3571428656578064, |
| "reward_std": 0.30304574966430664, |
| "rewards/check_winston_local_func/mean": -0.3571428656578064, |
| "rewards/check_winston_local_func/std": 0.9425028562545776, |
| "step": 177 |
| }, |
| { |
| "clip_ratio": 0.0077889300882816315, |
| "epoch": 0.1245626312106368, |
| "grad_norm": 0.14772994247321478, |
| "learning_rate": 3.111888111888112e-05, |
| "loss": 0.0116, |
| "step": 178 |
| }, |
| { |
| "clip_ratio": 0.024449503049254417, |
| "epoch": 0.12526242127361792, |
| "grad_norm": 0.11835892505404014, |
| "learning_rate": 3.1293706293706294e-05, |
| "loss": 0.0085, |
| "step": 179 |
| }, |
| { |
| "clip_ratio": 0.04173960164189339, |
| "epoch": 0.12596221133659902, |
| "grad_norm": 0.07150863048317996, |
| "learning_rate": 3.146853146853147e-05, |
| "loss": 0.0065, |
| "step": 180 |
| }, |
| { |
| "clip_ratio": 0.0018401921261101961, |
| "completion_length": 130.8928680419922, |
| "epoch": 0.12666200139958012, |
| "grad_norm": 0.19977261410354588, |
| "learning_rate": 3.164335664335665e-05, |
| "loss": 0.0079, |
| "num_tokens": 910956.0, |
| "reward": -0.2857142984867096, |
| "reward_std": 0.4040610194206238, |
| "rewards/check_winston_local_func/mean": -0.2857142984867096, |
| "rewards/check_winston_local_func/std": 0.9669875502586365, |
| "step": 181 |
| }, |
| { |
| "clip_ratio": 0.013327782042324543, |
| "epoch": 0.12736179146256124, |
| "grad_norm": 0.18304114191410556, |
| "learning_rate": 3.181818181818182e-05, |
| "loss": 0.0037, |
| "step": 182 |
| }, |
| { |
| "clip_ratio": 0.035625942051410675, |
| "epoch": 0.12806158152554234, |
| "grad_norm": 0.13774519057774026, |
| "learning_rate": 3.1993006993006994e-05, |
| "loss": -0.0011, |
| "step": 183 |
| }, |
| { |
| "clip_ratio": 0.055436424911022186, |
| "epoch": 0.12876137158852344, |
| "grad_norm": 0.1037769963224376, |
| "learning_rate": 3.216783216783217e-05, |
| "loss": -0.005, |
| "step": 184 |
| }, |
| { |
| "clip_ratio": 0.003232809714972973, |
| "completion_length": 115.85714721679688, |
| "epoch": 0.12946116165150454, |
| "grad_norm": 0.22848421400085006, |
| "learning_rate": 3.234265734265735e-05, |
| "loss": -0.002, |
| "num_tokens": 932532.0, |
| "reward": -0.0714285746216774, |
| "reward_std": 0.4040610194206238, |
| "rewards/check_winston_local_func/mean": -0.0714285746216774, |
| "rewards/check_winston_local_func/std": 1.0064724683761597, |
| "step": 185 |
| }, |
| { |
| "clip_ratio": 0.013981361873447895, |
| "epoch": 0.13016095171448566, |
| "grad_norm": 0.14544684282491208, |
| "learning_rate": 3.251748251748252e-05, |
| "loss": -0.006, |
| "step": 186 |
| }, |
| { |
| "clip_ratio": 0.032285287976264954, |
| "epoch": 0.13086074177746676, |
| "grad_norm": 0.11489336548569441, |
| "learning_rate": 3.269230769230769e-05, |
| "loss": -0.0092, |
| "step": 187 |
| }, |
| { |
| "clip_ratio": 0.05701437592506409, |
| "epoch": 0.13156053184044786, |
| "grad_norm": 0.08462838304844368, |
| "learning_rate": 3.2867132867132866e-05, |
| "loss": -0.0115, |
| "step": 188 |
| }, |
| { |
| "clip_ratio": 0.0032829714473336935, |
| "completion_length": 81.66072082519531, |
| "epoch": 0.13226032190342898, |
| "grad_norm": 0.33315610129233025, |
| "learning_rate": 3.3041958041958046e-05, |
| "loss": -0.0109, |
| "num_tokens": 950027.0, |
| "reward": 0.1428571492433548, |
| "reward_std": 0.5050762891769409, |
| "rewards/check_winston_local_func/mean": 0.1428571492433548, |
| "rewards/check_winston_local_func/std": 0.9987004399299622, |
| "step": 189 |
| }, |
| { |
| "clip_ratio": 0.02284400910139084, |
| "epoch": 0.13296011196641008, |
| "grad_norm": 0.25411099199841936, |
| "learning_rate": 3.321678321678322e-05, |
| "loss": -0.0211, |
| "step": 190 |
| }, |
| { |
| "clip_ratio": 0.06905340403318405, |
| "epoch": 0.13365990202939118, |
| "grad_norm": 0.17966938259048712, |
| "learning_rate": 3.339160839160839e-05, |
| "loss": -0.0292, |
| "step": 191 |
| }, |
| { |
| "clip_ratio": 0.10486488789319992, |
| "epoch": 0.13435969209237228, |
| "grad_norm": 0.12395870104426593, |
| "learning_rate": 3.356643356643357e-05, |
| "loss": -0.0345, |
| "step": 192 |
| }, |
| { |
| "clip_ratio": 0.0039912075735628605, |
| "completion_length": 98.53572082519531, |
| "epoch": 0.1350594821553534, |
| "grad_norm": 0.19967198347280601, |
| "learning_rate": 3.374125874125874e-05, |
| "loss": -0.0034, |
| "num_tokens": 969635.0, |
| "reward": -0.2142857313156128, |
| "reward_std": 0.4040610194206238, |
| "rewards/check_winston_local_func/mean": -0.2142857164144516, |
| "rewards/check_winston_local_func/std": 0.9856107234954834, |
| "step": 193 |
| }, |
| { |
| "clip_ratio": 0.015699883922934532, |
| "epoch": 0.1357592722183345, |
| "grad_norm": 0.14403516372623493, |
| "learning_rate": 3.391608391608392e-05, |
| "loss": -0.0078, |
| "step": 194 |
| }, |
| { |
| "clip_ratio": 0.0434185229241848, |
| "epoch": 0.1364590622813156, |
| "grad_norm": 0.09019420685306537, |
| "learning_rate": 3.409090909090909e-05, |
| "loss": -0.0106, |
| "step": 195 |
| }, |
| { |
| "clip_ratio": 0.06262689083814621, |
| "epoch": 0.1371588523442967, |
| "grad_norm": 0.06525358070480064, |
| "learning_rate": 3.4265734265734265e-05, |
| "loss": -0.012, |
| "step": 196 |
| }, |
| { |
| "clip_ratio": 0.0026181330904364586, |
| "completion_length": 112.64286041259766, |
| "epoch": 0.13785864240727783, |
| "grad_norm": 0.26666733225679445, |
| "learning_rate": 3.4440559440559445e-05, |
| "loss": -0.0005, |
| "num_tokens": 990877.0, |
| "reward": -0.0357142873108387, |
| "reward_std": 0.45456865429878235, |
| "rewards/check_winston_local_func/mean": -0.0357142873108387, |
| "rewards/check_winston_local_func/std": 1.0084062814712524, |
| "step": 197 |
| }, |
| { |
| "clip_ratio": 0.02237066999077797, |
| "epoch": 0.13855843247025892, |
| "grad_norm": 0.196293468974288, |
| "learning_rate": 3.461538461538462e-05, |
| "loss": -0.0081, |
| "step": 198 |
| }, |
| { |
| "clip_ratio": 0.05692709609866142, |
| "epoch": 0.13925822253324002, |
| "grad_norm": 0.12274152758367594, |
| "learning_rate": 3.479020979020979e-05, |
| "loss": -0.0128, |
| "step": 199 |
| }, |
| { |
| "clip_ratio": 0.07971518486738205, |
| "epoch": 0.13995801259622112, |
| "grad_norm": 0.08223267167561776, |
| "learning_rate": 3.4965034965034965e-05, |
| "loss": -0.0154, |
| "step": 200 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 5716, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|