| { |
| "best_metric": 0.21406607329845428, |
| "best_model_checkpoint": "saves/sycophancy/Llama-8B-3.1-Instruct/kto-700/checkpoint-700", |
| "epoch": 9.933333333333334, |
| "eval_steps": 50, |
| "global_step": 780, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12698412698412698, |
| "grad_norm": 0.6788301467895508, |
| "kl": 4.387261390686035, |
| "learning_rate": 6.41025641025641e-07, |
| "logits/chosen": -5470348.467532467, |
| "logits/rejected": -7884086.746987952, |
| "logps/chosen": -18.302797788149352, |
| "logps/rejected": -20.305931734751507, |
| "loss": 0.5003, |
| "rewards/chosen": -0.0015607382376472672, |
| "rewards/margins": -0.002322281828985735, |
| "rewards/rejected": 0.0007615435913384679, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.25396825396825395, |
| "grad_norm": 0.5719761252403259, |
| "kl": 5.789945602416992, |
| "learning_rate": 1.282051282051282e-06, |
| "logits/chosen": -5099802.810810811, |
| "logits/rejected": -7728657.860465116, |
| "logps/chosen": -15.63258815456081, |
| "logps/rejected": -19.22288335755814, |
| "loss": 0.4999, |
| "rewards/chosen": 0.0009517762306574228, |
| "rewards/margins": 0.0008202694979274445, |
| "rewards/rejected": 0.00013150673272997835, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.38095238095238093, |
| "grad_norm": 1.0039992332458496, |
| "kl": 5.201549530029297, |
| "learning_rate": 1.9230769230769234e-06, |
| "logits/chosen": -6082714.074074074, |
| "logits/rejected": -6860618.53164557, |
| "logps/chosen": -18.337599766107253, |
| "logps/rejected": -18.856711085838608, |
| "loss": 0.5001, |
| "rewards/chosen": 0.0019511194140822798, |
| "rewards/margins": -0.0016700430789353758, |
| "rewards/rejected": 0.0036211624930176556, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5079365079365079, |
| "grad_norm": 0.8720445036888123, |
| "kl": 5.458462715148926, |
| "learning_rate": 2.564102564102564e-06, |
| "logits/chosen": -6005976.847058823, |
| "logits/rejected": -7850483.2, |
| "logps/chosen": -16.87245662913603, |
| "logps/rejected": -20.8893798828125, |
| "loss": 0.4999, |
| "rewards/chosen": 0.0022517660084892723, |
| "rewards/margins": 0.0017136495136747176, |
| "rewards/rejected": 0.0005381164948145548, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "grad_norm": 0.7587161064147949, |
| "kl": 5.623558044433594, |
| "learning_rate": 3.205128205128206e-06, |
| "logits/chosen": -6492087.466666667, |
| "logits/rejected": -6836711.152941177, |
| "logps/chosen": -17.93591796875, |
| "logps/rejected": -20.48527401194853, |
| "loss": 0.4998, |
| "rewards/chosen": 0.0073052302996317545, |
| "rewards/margins": 0.0005395582610485603, |
| "rewards/rejected": 0.006765672038583194, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "eval_logits/chosen": -6144470.349206349, |
| "eval_logits/rejected": -7140612.155844156, |
| "eval_logps/chosen": -15.744288853236608, |
| "eval_logps/rejected": -19.299322747564936, |
| "eval_loss": 0.4993804395198822, |
| "eval_rewards/chosen": 0.011447552650693863, |
| "eval_rewards/margins": 0.006427939363028236, |
| "eval_rewards/rejected": 0.0050196132876656275, |
| "eval_runtime": 28.9366, |
| "eval_samples_per_second": 4.838, |
| "eval_steps_per_second": 2.419, |
| "kl": 4.069240093231201, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7619047619047619, |
| "grad_norm": 0.7702682018280029, |
| "kl": 4.712460517883301, |
| "learning_rate": 3.846153846153847e-06, |
| "logits/chosen": -6175395.348837209, |
| "logits/rejected": -7327229.405405405, |
| "logps/chosen": -18.813822901526162, |
| "logps/rejected": -19.35801243137669, |
| "loss": 0.4996, |
| "rewards/chosen": 0.015079736709594727, |
| "rewards/margins": 0.0028353905355608146, |
| "rewards/rejected": 0.012244346174033912, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.8070928454399109, |
| "kl": 5.428678512573242, |
| "learning_rate": 4.487179487179488e-06, |
| "logits/chosen": -5747504.898876404, |
| "logits/rejected": -7797529.23943662, |
| "logps/chosen": -16.341429292485955, |
| "logps/rejected": -20.183217223261444, |
| "loss": 0.4981, |
| "rewards/chosen": 0.02937422173746516, |
| "rewards/margins": 0.011152571412388481, |
| "rewards/rejected": 0.01822165032507668, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.019047619047619, |
| "grad_norm": 0.9550174474716187, |
| "kl": 8.223318099975586, |
| "learning_rate": 4.999899863449631e-06, |
| "logits/chosen": -6264741.052631579, |
| "logits/rejected": -7049278.476190476, |
| "logps/chosen": -16.723974930612666, |
| "logps/rejected": -18.56858898344494, |
| "loss": 0.4976, |
| "rewards/chosen": 0.05320446114791067, |
| "rewards/margins": 0.022168612121639397, |
| "rewards/rejected": 0.031035849026271274, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.146031746031746, |
| "grad_norm": 1.3274585008621216, |
| "kl": 13.437724113464355, |
| "learning_rate": 4.996395926410354e-06, |
| "logits/chosen": -7089864.091954023, |
| "logits/rejected": -7814464.876712329, |
| "logps/chosen": -14.959357601472702, |
| "logps/rejected": -19.25765196917808, |
| "loss": 0.4894, |
| "rewards/chosen": 0.11598257086742883, |
| "rewards/margins": 0.080287168277866, |
| "rewards/rejected": 0.03569540258956282, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.273015873015873, |
| "grad_norm": 1.2156286239624023, |
| "kl": 15.28927993774414, |
| "learning_rate": 4.9878931808274796e-06, |
| "logits/chosen": -5328334.222222222, |
| "logits/rejected": -7435643.636363637, |
| "logps/chosen": -16.729354858398438, |
| "logps/rejected": -20.145260897549715, |
| "loss": 0.4897, |
| "rewards/chosen": 0.1613529788123237, |
| "rewards/margins": 0.09188530421016193, |
| "rewards/rejected": 0.06946767460216176, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.273015873015873, |
| "eval_logits/chosen": -6027973.079365079, |
| "eval_logits/rejected": -7100376.103896104, |
| "eval_logps/chosen": -14.120830233134921, |
| "eval_logps/rejected": -19.052055854301948, |
| "eval_loss": 0.4845275282859802, |
| "eval_rewards/chosen": 0.17379347483317056, |
| "eval_rewards/margins": 0.1440471064992797, |
| "eval_rewards/rejected": 0.02974636833389084, |
| "eval_runtime": 29.0593, |
| "eval_samples_per_second": 4.818, |
| "eval_steps_per_second": 2.409, |
| "kl": 8.730419158935547, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 1.2826446294784546, |
| "kl": 16.87066078186035, |
| "learning_rate": 4.9744086526850724e-06, |
| "logits/chosen": -6426819.6, |
| "logits/rejected": -6980560.0, |
| "logps/chosen": -14.482907104492188, |
| "logps/rejected": -20.08314208984375, |
| "loss": 0.4695, |
| "rewards/chosen": 0.25329229831695554, |
| "rewards/margins": 0.2625655651092529, |
| "rewards/rejected": -0.009273266792297364, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.5269841269841269, |
| "grad_norm": 1.266455054283142, |
| "kl": 9.237666130065918, |
| "learning_rate": 4.955969343539162e-06, |
| "logits/chosen": -5808524.273972603, |
| "logits/rejected": -7559982.344827586, |
| "logps/chosen": -15.187309369648972, |
| "logps/rejected": -21.856114179238507, |
| "loss": 0.4685, |
| "rewards/chosen": 0.2450432973365261, |
| "rewards/margins": 0.2776630093330211, |
| "rewards/rejected": -0.03261971199649504, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.6539682539682539, |
| "grad_norm": 1.8095053434371948, |
| "kl": 5.514086723327637, |
| "learning_rate": 4.93261217644956e-06, |
| "logits/chosen": -5919740.651162791, |
| "logits/rejected": -6616346.810810811, |
| "logps/chosen": -15.901311830032704, |
| "logps/rejected": -23.59872848923142, |
| "loss": 0.4326, |
| "rewards/chosen": 0.22375809869100882, |
| "rewards/margins": 0.563330051510833, |
| "rewards/rejected": -0.3395719528198242, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.7809523809523808, |
| "grad_norm": 2.0375888347625732, |
| "kl": 2.2235469818115234, |
| "learning_rate": 4.90438392204474e-06, |
| "logits/chosen": -6043188.043956044, |
| "logits/rejected": -6697519.304347826, |
| "logps/chosen": -14.64537366929945, |
| "logps/rejected": -23.984520069067027, |
| "loss": 0.402, |
| "rewards/chosen": 0.3531372573349502, |
| "rewards/margins": 0.8639071981396944, |
| "rewards/rejected": -0.5107699408047441, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.9079365079365078, |
| "grad_norm": 1.6195893287658691, |
| "kl": 4.571013927459717, |
| "learning_rate": 4.8713411048678635e-06, |
| "logits/chosen": -3909229.4492753623, |
| "logits/rejected": -7315652.923076923, |
| "logps/chosen": -15.875658118206522, |
| "logps/rejected": -24.993848192822803, |
| "loss": 0.3937, |
| "rewards/chosen": 0.2891683163850204, |
| "rewards/margins": 0.8816854565324705, |
| "rewards/rejected": -0.5925171401474502, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.9079365079365078, |
| "eval_logits/chosen": -5056723.301587301, |
| "eval_logits/rejected": -6579766.025974026, |
| "eval_logps/chosen": -12.606183733258929, |
| "eval_logps/rejected": -26.850310090300326, |
| "eval_loss": 0.37451449036598206, |
| "eval_rewards/chosen": 0.3252580128018818, |
| "eval_rewards/margins": 1.0753370810724783, |
| "eval_rewards/rejected": -0.7500790682705966, |
| "eval_runtime": 28.8826, |
| "eval_samples_per_second": 4.847, |
| "eval_steps_per_second": 2.424, |
| "kl": 0.8561515808105469, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.038095238095238, |
| "grad_norm": 1.7494949102401733, |
| "kl": 0.0, |
| "learning_rate": 4.83354989019146e-06, |
| "logits/chosen": -4793371.826086956, |
| "logits/rejected": -6571360.94117647, |
| "logps/chosen": -12.75027200450068, |
| "logps/rejected": -29.481036017922793, |
| "loss": 0.3622, |
| "rewards/chosen": 0.4139024900353473, |
| "rewards/margins": 1.3404797775971005, |
| "rewards/rejected": -0.9265772875617532, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.165079365079365, |
| "grad_norm": 1.6517891883850098, |
| "kl": 0.9644966125488281, |
| "learning_rate": 4.791085951527408e-06, |
| "logits/chosen": -5720041.788235294, |
| "logits/rejected": -6800391.253333333, |
| "logps/chosen": -14.041024241727941, |
| "logps/rejected": -31.237737630208333, |
| "loss": 0.3456, |
| "rewards/chosen": 0.43515203139361214, |
| "rewards/margins": 1.515641024720435, |
| "rewards/rejected": -1.080488993326823, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.292063492063492, |
| "grad_norm": 2.6275811195373535, |
| "kl": 4.330084800720215, |
| "learning_rate": 4.744034319097536e-06, |
| "logits/chosen": -5309098.366197183, |
| "logits/rejected": -6363057.617977528, |
| "logps/chosen": -12.973335373569542, |
| "logps/rejected": -34.70361876755618, |
| "loss": 0.29, |
| "rewards/chosen": 0.5134559953716439, |
| "rewards/margins": 2.0540087132877254, |
| "rewards/rejected": -1.5405527179160814, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.419047619047619, |
| "grad_norm": 1.039764642715454, |
| "kl": 0.5446796417236328, |
| "learning_rate": 4.692489209568234e-06, |
| "logits/chosen": -5042669.6, |
| "logits/rejected": -7686574.4, |
| "logps/chosen": -10.94500503540039, |
| "logps/rejected": -37.45916748046875, |
| "loss": 0.2867, |
| "rewards/chosen": 0.6056368827819825, |
| "rewards/margins": 2.4049492835998536, |
| "rewards/rejected": -1.7993124008178711, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.546031746031746, |
| "grad_norm": 4.989805221557617, |
| "kl": 3.305936813354492, |
| "learning_rate": 4.636553837390051e-06, |
| "logits/chosen": -3932123.4285714286, |
| "logits/rejected": -6716783.036144578, |
| "logps/chosen": -14.493068942775974, |
| "logps/rejected": -43.533367846385545, |
| "loss": 0.2821, |
| "rewards/chosen": 0.3073018061650264, |
| "rewards/margins": 2.6910732199451473, |
| "rewards/rejected": -2.3837714137801207, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.546031746031746, |
| "eval_logits/chosen": -4519878.603174604, |
| "eval_logits/rejected": -6449476.571428572, |
| "eval_logps/chosen": -12.711128355964782, |
| "eval_logps/rejected": -41.09160980620941, |
| "eval_loss": 0.27482813596725464, |
| "eval_rewards/chosen": 0.31476356869652156, |
| "eval_rewards/margins": 2.4889729652569925, |
| "eval_rewards/rejected": -2.174209396560471, |
| "eval_runtime": 28.943, |
| "eval_samples_per_second": 4.837, |
| "eval_steps_per_second": 2.419, |
| "kl": 0.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.6730158730158733, |
| "grad_norm": 1.1182432174682617, |
| "kl": 0.0, |
| "learning_rate": 4.5763402081200295e-06, |
| "logits/chosen": -4791521.156626506, |
| "logits/rejected": -7367645.090909091, |
| "logps/chosen": -12.727749376411898, |
| "logps/rejected": -46.403529575892854, |
| "loss": 0.2666, |
| "rewards/chosen": 0.36001044583607866, |
| "rewards/margins": 3.122220251411363, |
| "rewards/rejected": -2.762209805575284, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 1.5499324798583984, |
| "kl": 0.0, |
| "learning_rate": 4.511968894140639e-06, |
| "logits/chosen": -3259810.835443038, |
| "logits/rejected": -7566860.641975309, |
| "logps/chosen": -13.120756897745252, |
| "logps/rejected": -44.52433871045525, |
| "loss": 0.2741, |
| "rewards/chosen": 0.44851534879660304, |
| "rewards/margins": 2.8181276775967574, |
| "rewards/rejected": -2.3696123288001543, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.9269841269841272, |
| "grad_norm": 1.0529024600982666, |
| "kl": 8.710567474365234, |
| "learning_rate": 4.443568793224415e-06, |
| "logits/chosen": -4267431.804878049, |
| "logits/rejected": -7291750.564102564, |
| "logps/chosen": -12.697584198742378, |
| "logps/rejected": -43.92189065004006, |
| "loss": 0.2893, |
| "rewards/chosen": 0.34749545120611425, |
| "rewards/margins": 2.7181909076864827, |
| "rewards/rejected": -2.3706954564803686, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.057142857142857, |
| "grad_norm": 3.3076581954956055, |
| "kl": 0.0, |
| "learning_rate": 4.3712768704277535e-06, |
| "logits/chosen": -3580050.337078652, |
| "logits/rejected": -7818372.507042253, |
| "logps/chosen": -10.8568588428283, |
| "logps/rejected": -48.93149964238556, |
| "loss": 0.2325, |
| "rewards/chosen": 0.8793054644981128, |
| "rewards/margins": 3.6636493451649264, |
| "rewards/rejected": -2.7843438806668135, |
| "step": 240 |
| }, |
| { |
| "epoch": 3.1841269841269844, |
| "grad_norm": 0.7128299474716187, |
| "kl": 0.0, |
| "learning_rate": 4.2952378838306855e-06, |
| "logits/chosen": -4378369.263157895, |
| "logits/rejected": -6322734.095238095, |
| "logps/chosen": -10.363418579101562, |
| "logps/rejected": -55.15029761904762, |
| "loss": 0.2081, |
| "rewards/chosen": 0.657376439947831, |
| "rewards/margins": 4.1836971244716405, |
| "rewards/rejected": -3.5263206845238093, |
| "step": 250 |
| }, |
| { |
| "epoch": 3.1841269841269844, |
| "eval_logits/chosen": -4612124.952380952, |
| "eval_logits/rejected": -6554266.597402598, |
| "eval_logps/chosen": -12.67233615451389, |
| "eval_logps/rejected": -53.95966289569805, |
| "eval_loss": 0.2370194047689438, |
| "eval_rewards/chosen": 0.31864278278653585, |
| "eval_rewards/margins": 3.7796571712301232, |
| "eval_rewards/rejected": -3.4610143884435876, |
| "eval_runtime": 28.9457, |
| "eval_samples_per_second": 4.837, |
| "eval_steps_per_second": 2.418, |
| "kl": 0.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 3.311111111111111, |
| "grad_norm": 2.5890753269195557, |
| "kl": 0.0, |
| "learning_rate": 4.215604094671835e-06, |
| "logits/chosen": -4538084.512820513, |
| "logits/rejected": -7292885.853658536, |
| "logps/chosen": -14.071156037159454, |
| "logps/rejected": -56.76498189786585, |
| "loss": 0.2585, |
| "rewards/chosen": 0.23726805662497497, |
| "rewards/margins": 3.8709395147995176, |
| "rewards/rejected": -3.633671458174543, |
| "step": 260 |
| }, |
| { |
| "epoch": 3.4380952380952383, |
| "grad_norm": 2.1079933643341064, |
| "kl": 0.0, |
| "learning_rate": 4.1325349624589625e-06, |
| "logits/chosen": -5173939.9529411765, |
| "logits/rejected": -6703439.36, |
| "logps/chosen": -11.632867072610294, |
| "logps/rejected": -67.75546875, |
| "loss": 0.231, |
| "rewards/chosen": 0.6439653284409467, |
| "rewards/margins": 5.392097652659697, |
| "rewards/rejected": -4.74813232421875, |
| "step": 270 |
| }, |
| { |
| "epoch": 3.565079365079365, |
| "grad_norm": 1.217970609664917, |
| "kl": 0.0, |
| "learning_rate": 4.046196825665638e-06, |
| "logits/chosen": -5146073.518987342, |
| "logits/rejected": -6300332.641975309, |
| "logps/chosen": -11.147742935373813, |
| "logps/rejected": -71.35388937114197, |
| "loss": 0.1961, |
| "rewards/chosen": 0.5749296840233139, |
| "rewards/margins": 5.709579266874393, |
| "rewards/rejected": -5.13464958285108, |
| "step": 280 |
| }, |
| { |
| "epoch": 3.6920634920634923, |
| "grad_norm": 1.1603816747665405, |
| "kl": 0.0, |
| "learning_rate": 3.956762568653378e-06, |
| "logits/chosen": -4096646.0235294118, |
| "logits/rejected": -6349589.333333333, |
| "logps/chosen": -13.472160788143382, |
| "logps/rejected": -70.83629557291667, |
| "loss": 0.2347, |
| "rewards/chosen": 0.394403076171875, |
| "rewards/margins": 5.51095947265625, |
| "rewards/rejected": -5.116556396484375, |
| "step": 290 |
| }, |
| { |
| "epoch": 3.819047619047619, |
| "grad_norm": 9.697389602661133, |
| "kl": 0.0, |
| "learning_rate": 3.8644112754862614e-06, |
| "logits/chosen": -4819242.271604938, |
| "logits/rejected": -6257292.151898734, |
| "logps/chosen": -17.26012279369213, |
| "logps/rejected": -67.79164606408227, |
| "loss": 0.2651, |
| "rewards/chosen": 0.10824219974470728, |
| "rewards/margins": 4.995783765458711, |
| "rewards/rejected": -4.887541565714003, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.819047619047619, |
| "eval_logits/chosen": -4312815.746031746, |
| "eval_logits/rejected": -6286123.220779221, |
| "eval_logps/chosen": -15.552150181361608, |
| "eval_logps/rejected": -72.25813590706169, |
| "eval_loss": 0.22668065130710602, |
| "eval_rewards/chosen": 0.030661461845276846, |
| "eval_rewards/margins": 5.321523309958102, |
| "eval_rewards/rejected": -5.290861848112825, |
| "eval_runtime": 28.9019, |
| "eval_samples_per_second": 4.844, |
| "eval_steps_per_second": 2.422, |
| "kl": 0.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 3.9460317460317462, |
| "grad_norm": 2.1276402473449707, |
| "kl": 0.0, |
| "learning_rate": 3.76932787133117e-06, |
| "logits/chosen": -5180418.157303371, |
| "logits/rejected": -6265091.605633803, |
| "logps/chosen": -13.515192953388343, |
| "logps/rejected": -74.84758747799296, |
| "loss": 0.2341, |
| "rewards/chosen": 0.4038938458046217, |
| "rewards/margins": 6.011258124248724, |
| "rewards/rejected": -5.607364278444102, |
| "step": 310 |
| }, |
| { |
| "epoch": 4.076190476190476, |
| "grad_norm": 2.4103806018829346, |
| "kl": 0.0, |
| "learning_rate": 3.6717027521617593e-06, |
| "logits/chosen": -5085308.097560976, |
| "logits/rejected": -6567972.512820513, |
| "logps/chosen": -9.632892143435594, |
| "logps/rejected": -75.03093699919872, |
| "loss": 0.1853, |
| "rewards/chosen": 0.7703150307259908, |
| "rewards/margins": 6.228486777395662, |
| "rewards/rejected": -5.458171746669671, |
| "step": 320 |
| }, |
| { |
| "epoch": 4.203174603174603, |
| "grad_norm": 1.1956801414489746, |
| "kl": 0.0, |
| "learning_rate": 3.5717314035076355e-06, |
| "logits/chosen": -4886774.588235294, |
| "logits/rejected": -6263912.96, |
| "logps/chosen": -12.183177274816176, |
| "logps/rejected": -80.09672526041666, |
| "loss": 0.1884, |
| "rewards/chosen": 0.5844921336454504, |
| "rewards/margins": 6.655275418150659, |
| "rewards/rejected": -6.070783284505208, |
| "step": 330 |
| }, |
| { |
| "epoch": 4.33015873015873, |
| "grad_norm": 1.5028984546661377, |
| "kl": 0.0, |
| "learning_rate": 3.4696140090121377e-06, |
| "logits/chosen": -3578742.436781609, |
| "logits/rejected": -6682695.890410959, |
| "logps/chosen": -11.774405643857758, |
| "logps/rejected": -76.78813944777397, |
| "loss": 0.2051, |
| "rewards/chosen": 0.5974952434671337, |
| "rewards/margins": 6.330363176766578, |
| "rewards/rejected": -5.732867933299444, |
| "step": 340 |
| }, |
| { |
| "epoch": 4.457142857142857, |
| "grad_norm": 0.1931433230638504, |
| "kl": 0.0, |
| "learning_rate": 3.3655550495825824e-06, |
| "logits/chosen": -4233703.2, |
| "logits/rejected": -6651906.4, |
| "logps/chosen": -9.5177734375, |
| "logps/rejected": -78.2944580078125, |
| "loss": 0.194, |
| "rewards/chosen": 0.690055513381958, |
| "rewards/margins": 6.46673321723938, |
| "rewards/rejected": -5.7766777038574215, |
| "step": 350 |
| }, |
| { |
| "epoch": 4.457142857142857, |
| "eval_logits/chosen": -4256054.349206349, |
| "eval_logits/rejected": -6266110.337662337, |
| "eval_logps/chosen": -15.28530544704861, |
| "eval_logps/rejected": -73.9473924512987, |
| "eval_loss": 0.22176755964756012, |
| "eval_rewards/chosen": 0.057345905001201326, |
| "eval_rewards/margins": 5.517134041669221, |
| "eval_rewards/rejected": -5.45978813666802, |
| "eval_runtime": 28.8957, |
| "eval_samples_per_second": 4.845, |
| "eval_steps_per_second": 2.423, |
| "kl": 0.0, |
| "step": 350 |
| }, |
| { |
| "epoch": 4.584126984126984, |
| "grad_norm": 1.0705475807189941, |
| "kl": 0.0, |
| "learning_rate": 3.2597628939356174e-06, |
| "logits/chosen": -5245969.777777778, |
| "logits/rejected": -6771841.454545454, |
| "logps/chosen": -16.555372450086807, |
| "logps/rejected": -78.77862548828125, |
| "loss": 0.2249, |
| "rewards/chosen": -0.15620039569007027, |
| "rewards/margins": 5.747189274942032, |
| "rewards/rejected": -5.9033896706321025, |
| "step": 360 |
| }, |
| { |
| "epoch": 4.711111111111111, |
| "grad_norm": 1.4375556707382202, |
| "kl": 0.0, |
| "learning_rate": 3.1524493813575936e-06, |
| "logits/chosen": -4801529.518987342, |
| "logits/rejected": -6302219.0617283955, |
| "logps/chosen": -13.753037851068038, |
| "logps/rejected": -85.18824749228395, |
| "loss": 0.181, |
| "rewards/chosen": 0.5262709508968305, |
| "rewards/margins": 7.078058904214732, |
| "rewards/rejected": -6.551787953317901, |
| "step": 370 |
| }, |
| { |
| "epoch": 4.838095238095238, |
| "grad_norm": 2.527923822402954, |
| "kl": 0.0, |
| "learning_rate": 3.043829397515419e-06, |
| "logits/chosen": -4641179.368421053, |
| "logits/rejected": -6583240.380952381, |
| "logps/chosen": -15.901164807771382, |
| "logps/rejected": -83.90581984747024, |
| "loss": 0.2053, |
| "rewards/chosen": 0.2012840572156404, |
| "rewards/margins": 6.545129969604034, |
| "rewards/rejected": -6.343845912388393, |
| "step": 380 |
| }, |
| { |
| "epoch": 4.965079365079365, |
| "grad_norm": 2.1507320404052734, |
| "kl": 1.0980682373046875, |
| "learning_rate": 2.9341204441673267e-06, |
| "logits/chosen": -4626031.816091954, |
| "logits/rejected": -5921111.671232876, |
| "logps/chosen": -18.242044383081897, |
| "logps/rejected": -79.91437687285959, |
| "loss": 0.2487, |
| "rewards/chosen": 0.032672991697815644, |
| "rewards/margins": 5.970230682461942, |
| "rewards/rejected": -5.937557690764127, |
| "step": 390 |
| }, |
| { |
| "epoch": 5.095238095238095, |
| "grad_norm": 0.8530715107917786, |
| "kl": 0.0, |
| "learning_rate": 2.8235422036351384e-06, |
| "logits/chosen": -3311293.263157895, |
| "logits/rejected": -6519166.476190476, |
| "logps/chosen": -10.424122860557155, |
| "logps/rejected": -86.8595958891369, |
| "loss": 0.168, |
| "rewards/chosen": 0.7402381395038805, |
| "rewards/margins": 7.611847781895993, |
| "rewards/rejected": -6.871609642392113, |
| "step": 400 |
| }, |
| { |
| "epoch": 5.095238095238095, |
| "eval_logits/chosen": -4093047.365079365, |
| "eval_logits/rejected": -6134765.714285715, |
| "eval_logps/chosen": -16.138509114583332, |
| "eval_logps/rejected": -79.63079596185065, |
| "eval_loss": 0.22183124721050262, |
| "eval_rewards/chosen": -0.027974476889958456, |
| "eval_rewards/margins": 6.000154010833256, |
| "eval_rewards/rejected": -6.028128487723214, |
| "eval_runtime": 28.9036, |
| "eval_samples_per_second": 4.844, |
| "eval_steps_per_second": 2.422, |
| "kl": 0.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 5.222222222222222, |
| "grad_norm": 0.8624681830406189, |
| "kl": 5.43211555480957, |
| "learning_rate": 2.7123160989101623e-06, |
| "logits/chosen": -3609233.095890411, |
| "logits/rejected": -7120320.0, |
| "logps/chosen": -15.152233385059931, |
| "logps/rejected": -86.3641006645115, |
| "loss": 0.1736, |
| "rewards/chosen": 0.36869326029738336, |
| "rewards/margins": 7.0947001299094525, |
| "rewards/rejected": -6.726006869612069, |
| "step": 410 |
| }, |
| { |
| "epoch": 5.349206349206349, |
| "grad_norm": 0.0985303744673729, |
| "kl": 0.0, |
| "learning_rate": 2.6006648502735384e-06, |
| "logits/chosen": -4783202.666666667, |
| "logits/rejected": -5948822.736842105, |
| "logps/chosen": -11.800664992559524, |
| "logps/rejected": -89.84853001644737, |
| "loss": 0.1953, |
| "rewards/chosen": 0.5908058257330031, |
| "rewards/margins": 7.559863410796737, |
| "rewards/rejected": -6.969057585063734, |
| "step": 420 |
| }, |
| { |
| "epoch": 5.476190476190476, |
| "grad_norm": 1.2574081420898438, |
| "kl": 0.0, |
| "learning_rate": 2.4888120293188915e-06, |
| "logits/chosen": -4590456.746666667, |
| "logits/rejected": -6820007.152941177, |
| "logps/chosen": -17.311272786458332, |
| "logps/rejected": -84.2857479319853, |
| "loss": 0.2004, |
| "rewards/chosen": 0.06162989298502604, |
| "rewards/margins": 6.595223642985026, |
| "rewards/rejected": -6.53359375, |
| "step": 430 |
| }, |
| { |
| "epoch": 5.603174603174603, |
| "grad_norm": 0.8560766577720642, |
| "kl": 0.0, |
| "learning_rate": 2.376981611270305e-06, |
| "logits/chosen": -5536871.48051948, |
| "logits/rejected": -5756388.626506024, |
| "logps/chosen": -14.159840769581981, |
| "logps/rejected": -91.81920651355422, |
| "loss": 0.1843, |
| "rewards/chosen": 0.1429041949185458, |
| "rewards/margins": 7.223366914939254, |
| "rewards/rejected": -7.080462720020708, |
| "step": 440 |
| }, |
| { |
| "epoch": 5.73015873015873, |
| "grad_norm": 0.9085766673088074, |
| "kl": 0.0, |
| "learning_rate": 2.265397526492052e-06, |
| "logits/chosen": -3770334.7848101268, |
| "logits/rejected": -6611462.320987654, |
| "logps/chosen": -16.884657461431964, |
| "logps/rejected": -86.77527006172839, |
| "loss": 0.2268, |
| "rewards/chosen": -0.03714804106120822, |
| "rewards/margins": 6.53030560236086, |
| "rewards/rejected": -6.567453643422068, |
| "step": 450 |
| }, |
| { |
| "epoch": 5.73015873015873, |
| "eval_logits/chosen": -4156519.111111111, |
| "eval_logits/rejected": -6205292.883116883, |
| "eval_logps/chosen": -17.346346416170636, |
| "eval_logps/rejected": -87.04986810064935, |
| "eval_loss": 0.2162507325410843, |
| "eval_rewards/chosen": -0.14875823732406374, |
| "eval_rewards/margins": 6.6212763545489075, |
| "eval_rewards/rejected": -6.770034591872971, |
| "eval_runtime": 28.93, |
| "eval_samples_per_second": 4.839, |
| "eval_steps_per_second": 2.42, |
| "kl": 0.0, |
| "step": 450 |
| }, |
| { |
| "epoch": 5.857142857142857, |
| "grad_norm": 0.10580164194107056, |
| "kl": 0.0, |
| "learning_rate": 2.154283212088168e-06, |
| "logits/chosen": -5665110.476190476, |
| "logits/rejected": -7303102.315789473, |
| "logps/chosen": -18.501912434895832, |
| "logps/rejected": -97.78473221628289, |
| "loss": 0.1947, |
| "rewards/chosen": -0.05397450356256394, |
| "rewards/margins": 7.66953945100158, |
| "rewards/rejected": -7.723513954564145, |
| "step": 460 |
| }, |
| { |
| "epoch": 5.984126984126984, |
| "grad_norm": 11.131406784057617, |
| "kl": 0.0, |
| "learning_rate": 2.0438611644897186e-06, |
| "logits/chosen": -5625277.9130434785, |
| "logits/rejected": -5779500.235294118, |
| "logps/chosen": -12.774372930112092, |
| "logps/rejected": -83.66164263556985, |
| "loss": 0.2255, |
| "rewards/chosen": 0.533203995746115, |
| "rewards/margins": 6.9899062842054445, |
| "rewards/rejected": -6.456702288459329, |
| "step": 470 |
| }, |
| { |
| "epoch": 6.114285714285714, |
| "grad_norm": 1.4914944171905518, |
| "kl": 0.0, |
| "learning_rate": 1.934352493925695e-06, |
| "logits/chosen": -4793293.268292683, |
| "logits/rejected": -6536903.384615385, |
| "logps/chosen": -11.555212997808688, |
| "logps/rejected": -92.60106169871794, |
| "loss": 0.1766, |
| "rewards/chosen": 0.5902807654404059, |
| "rewards/margins": 7.9360464906006625, |
| "rewards/rejected": -7.345765725160256, |
| "step": 480 |
| }, |
| { |
| "epoch": 6.241269841269841, |
| "grad_norm": 0.13757291436195374, |
| "kl": 0.0, |
| "learning_rate": 1.8259764816696413e-06, |
| "logits/chosen": -3754589.5384615385, |
| "logits/rejected": -7616852.292682927, |
| "logps/chosen": -11.699952736879007, |
| "logps/rejected": -88.94584246379573, |
| "loss": 0.1854, |
| "rewards/chosen": 0.593261474218124, |
| "rewards/margins": 7.3913400577261275, |
| "rewards/rejected": -6.798078583508003, |
| "step": 490 |
| }, |
| { |
| "epoch": 6.368253968253969, |
| "grad_norm": 0.5682312846183777, |
| "kl": 0.0, |
| "learning_rate": 1.7189501409486061e-06, |
| "logits/chosen": -5201961.558441559, |
| "logits/rejected": -6742080.7710843375, |
| "logps/chosen": -16.54386921672078, |
| "logps/rejected": -90.30962914156626, |
| "loss": 0.1915, |
| "rewards/chosen": 0.06833470332158076, |
| "rewards/margins": 6.907243071167966, |
| "rewards/rejected": -6.838908367846385, |
| "step": 500 |
| }, |
| { |
| "epoch": 6.368253968253969, |
| "eval_logits/chosen": -4369097.650793651, |
| "eval_logits/rejected": -6383271.064935065, |
| "eval_logps/chosen": -16.69223942832341, |
| "eval_logps/rejected": -84.2469434862013, |
| "eval_loss": 0.21938754618167877, |
| "eval_rewards/chosen": -0.08334738110739087, |
| "eval_rewards/margins": 6.406394749320775, |
| "eval_rewards/rejected": -6.489742130428166, |
| "eval_runtime": 28.9437, |
| "eval_samples_per_second": 4.837, |
| "eval_steps_per_second": 2.418, |
| "kl": 0.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 6.495238095238095, |
| "grad_norm": 1.1735241413116455, |
| "kl": 0.0, |
| "learning_rate": 1.613487782393661e-06, |
| "logits/chosen": -3442194.8235294116, |
| "logits/rejected": -6263149.653333333, |
| "logps/chosen": -10.886692181755516, |
| "logps/rejected": -93.58182291666667, |
| "loss": 0.176, |
| "rewards/chosen": 0.7260306414435892, |
| "rewards/margins": 8.333493206547756, |
| "rewards/rejected": -7.607462565104167, |
| "step": 510 |
| }, |
| { |
| "epoch": 6.622222222222222, |
| "grad_norm": 0.7961702942848206, |
| "kl": 0.0, |
| "learning_rate": 1.509800584902108e-06, |
| "logits/chosen": -3291153.3333333335, |
| "logits/rejected": -6859345.454545454, |
| "logps/chosen": -17.832650078667534, |
| "logps/rejected": -86.62845680930398, |
| "loss": 0.2017, |
| "rewards/chosen": -0.013874345355563693, |
| "rewards/margins": 6.7318663717520355, |
| "rewards/rejected": -6.7457407171076, |
| "step": 520 |
| }, |
| { |
| "epoch": 6.749206349206349, |
| "grad_norm": 0.05785604566335678, |
| "kl": 0.0, |
| "learning_rate": 1.4080961727707185e-06, |
| "logits/chosen": -5060515.084337349, |
| "logits/rejected": -5728966.64935065, |
| "logps/chosen": -16.07660426863705, |
| "logps/rejected": -87.74072899756493, |
| "loss": 0.2131, |
| "rewards/chosen": 0.19671148277190795, |
| "rewards/margins": 6.894206980438304, |
| "rewards/rejected": -6.697495497666396, |
| "step": 530 |
| }, |
| { |
| "epoch": 6.876190476190477, |
| "grad_norm": 0.3619508147239685, |
| "kl": 0.0, |
| "learning_rate": 1.3085781999467303e-06, |
| "logits/chosen": -5035710.608695652, |
| "logits/rejected": -7935128.470588235, |
| "logps/chosen": -9.16219827403193, |
| "logps/rejected": -96.11182358685662, |
| "loss": 0.1709, |
| "rewards/chosen": 0.8181397811226223, |
| "rewards/margins": 8.408293194783008, |
| "rewards/rejected": -7.590153413660386, |
| "step": 540 |
| }, |
| { |
| "epoch": 7.006349206349206, |
| "grad_norm": 0.5851492285728455, |
| "kl": 0.0, |
| "learning_rate": 1.2114459422291205e-06, |
| "logits/chosen": -4834725.052631579, |
| "logits/rejected": -6980896.761904762, |
| "logps/chosen": -20.62944914165296, |
| "logps/rejected": -96.27944800967262, |
| "loss": 0.201, |
| "rewards/chosen": -0.46881359501888875, |
| "rewards/margins": 7.173670797419728, |
| "rewards/rejected": -7.642484392438616, |
| "step": 550 |
| }, |
| { |
| "epoch": 7.006349206349206, |
| "eval_logits/chosen": -4304407.365079365, |
| "eval_logits/rejected": -6343543.688311689, |
| "eval_logps/chosen": -17.056785946800595, |
| "eval_logps/rejected": -86.64208350243507, |
| "eval_loss": 0.21892935037612915, |
| "eval_rewards/chosen": -0.11980220249720983, |
| "eval_rewards/margins": 6.6094553563501925, |
| "eval_rewards/rejected": -6.729257558847403, |
| "eval_runtime": 28.9259, |
| "eval_samples_per_second": 4.84, |
| "eval_steps_per_second": 2.42, |
| "kl": 0.0, |
| "step": 550 |
| }, |
| { |
| "epoch": 7.133333333333334, |
| "grad_norm": 0.6484764814376831, |
| "kl": 0.0, |
| "learning_rate": 1.1168938982367162e-06, |
| "logits/chosen": -4232963.555555556, |
| "logits/rejected": -8014400.0, |
| "logps/chosen": -18.053126205632715, |
| "logps/rejected": -94.37042622626582, |
| "loss": 0.1951, |
| "rewards/chosen": 0.06175481537241995, |
| "rewards/margins": 7.504841463661581, |
| "rewards/rejected": -7.443086648289161, |
| "step": 560 |
| }, |
| { |
| "epoch": 7.26031746031746, |
| "grad_norm": 1.4253727197647095, |
| "kl": 0.0, |
| "learning_rate": 1.0251113999421936e-06, |
| "logits/chosen": -5810530.157303371, |
| "logits/rejected": -5309918.647887324, |
| "logps/chosen": -14.917400917310394, |
| "logps/rejected": -89.89147777288733, |
| "loss": 0.2343, |
| "rewards/chosen": 0.2091124030981171, |
| "rewards/margins": 7.287517649026814, |
| "rewards/rejected": -7.078405245928697, |
| "step": 570 |
| }, |
| { |
| "epoch": 7.387301587301588, |
| "grad_norm": 0.9377846121788025, |
| "kl": 0.0, |
| "learning_rate": 9.362822335518062e-07, |
| "logits/chosen": -4028119.8048780486, |
| "logits/rejected": -6628607.58974359, |
| "logps/chosen": -8.946031523913872, |
| "logps/rejected": -93.52982271634616, |
| "loss": 0.1676, |
| "rewards/chosen": 0.678685444157298, |
| "rewards/margins": 8.047686783800728, |
| "rewards/rejected": -7.36900133964343, |
| "step": 580 |
| }, |
| { |
| "epoch": 7.514285714285714, |
| "grad_norm": 0.4060008227825165, |
| "kl": 0.0, |
| "learning_rate": 8.505842714900298e-07, |
| "logits/chosen": -5100800.831168831, |
| "logits/rejected": -7023735.518072289, |
| "logps/chosen": -14.822768174208603, |
| "logps/rejected": -92.95827842620481, |
| "loss": 0.1884, |
| "rewards/chosen": 0.11542927135120738, |
| "rewards/margins": 7.41691573478306, |
| "rewards/rejected": -7.301486463431853, |
| "step": 590 |
| }, |
| { |
| "epoch": 7.641269841269842, |
| "grad_norm": 0.44603800773620605, |
| "kl": 0.0, |
| "learning_rate": 7.681891162260016e-07, |
| "logits/chosen": -4478451.358024691, |
| "logits/rejected": -7117348.455696203, |
| "logps/chosen": -18.77003460165895, |
| "logps/rejected": -97.46780434137658, |
| "loss": 0.1961, |
| "rewards/chosen": 0.0032680240678198543, |
| "rewards/margins": 7.678102070250573, |
| "rewards/rejected": -7.674834046182753, |
| "step": 600 |
| }, |
| { |
| "epoch": 7.641269841269842, |
| "eval_logits/chosen": -4324965.079365079, |
| "eval_logits/rejected": -6328156.675324676, |
| "eval_logps/chosen": -16.93195258246528, |
| "eval_logps/rejected": -86.6986924208604, |
| "eval_loss": 0.2157014012336731, |
| "eval_rewards/chosen": -0.10731880248539032, |
| "eval_rewards/margins": 6.627598379941558, |
| "eval_rewards/rejected": -6.7349171824269485, |
| "eval_runtime": 28.9184, |
| "eval_samples_per_second": 4.841, |
| "eval_steps_per_second": 2.421, |
| "kl": 0.0, |
| "step": 600 |
| }, |
| { |
| "epoch": 7.768253968253968, |
| "grad_norm": 0.1919259876012802, |
| "kl": 0.0, |
| "learning_rate": 6.892617566550044e-07, |
| "logits/chosen": -4999124.8, |
| "logits/rejected": -7450809.6, |
| "logps/chosen": -15.9777099609375, |
| "logps/rejected": -94.30171508789063, |
| "loss": 0.1997, |
| "rewards/chosen": 0.05527666211128235, |
| "rewards/margins": 7.558959370851516, |
| "rewards/rejected": -7.503682708740234, |
| "step": 610 |
| }, |
| { |
| "epoch": 7.895238095238096, |
| "grad_norm": 0.4005780518054962, |
| "kl": 0.0, |
| "learning_rate": 6.139602377230247e-07, |
| "logits/chosen": -5132258.835443038, |
| "logits/rejected": -5855081.086419753, |
| "logps/chosen": -9.797328514388845, |
| "logps/rejected": -91.02250916280865, |
| "loss": 0.1697, |
| "rewards/chosen": 0.7228279113769531, |
| "rewards/margins": 7.915330533628111, |
| "rewards/rejected": -7.192502622251157, |
| "step": 620 |
| }, |
| { |
| "epoch": 8.025396825396825, |
| "grad_norm": 0.36264705657958984, |
| "kl": 0.0, |
| "learning_rate": 5.424353439559446e-07, |
| "logits/chosen": -5221032.8, |
| "logits/rejected": -7366360.0, |
| "logps/chosen": -14.4659912109375, |
| "logps/rejected": -95.17356567382812, |
| "loss": 0.1683, |
| "rewards/chosen": 0.5725198745727539, |
| "rewards/margins": 7.98663272857666, |
| "rewards/rejected": -7.414112854003906, |
| "step": 630 |
| }, |
| { |
| "epoch": 8.152380952380952, |
| "grad_norm": 0.14060212671756744, |
| "kl": 0.0, |
| "learning_rate": 4.748302975270838e-07, |
| "logits/chosen": -5158584.98630137, |
| "logits/rejected": -7203751.724137931, |
| "logps/chosen": -8.887396825502996, |
| "logps/rejected": -91.2306707974138, |
| "loss": 0.1526, |
| "rewards/chosen": 0.7418512318232288, |
| "rewards/margins": 7.8209843642316335, |
| "rewards/rejected": -7.079133132408405, |
| "step": 640 |
| }, |
| { |
| "epoch": 8.27936507936508, |
| "grad_norm": 1.8452895879745483, |
| "kl": 6.364190101623535, |
| "learning_rate": 4.1128047146765936e-07, |
| "logits/chosen": -4475600.963855422, |
| "logits/rejected": -6167757.714285715, |
| "logps/chosen": -10.917626070689005, |
| "logps/rejected": -88.98798954951299, |
| "loss": 0.1721, |
| "rewards/chosen": 0.7498773781650038, |
| "rewards/margins": 7.733158501338705, |
| "rewards/rejected": -6.983281123173701, |
| "step": 650 |
| }, |
| { |
| "epoch": 8.27936507936508, |
| "eval_logits/chosen": -4329249.523809524, |
| "eval_logits/rejected": -6320159.584415585, |
| "eval_logps/chosen": -17.140528118799605, |
| "eval_logps/rejected": -87.93011870941558, |
| "eval_loss": 0.21569015085697174, |
| "eval_rewards/chosen": -0.12817655290876115, |
| "eval_rewards/margins": 6.729884209570947, |
| "eval_rewards/rejected": -6.858060762479708, |
| "eval_runtime": 28.9092, |
| "eval_samples_per_second": 4.843, |
| "eval_steps_per_second": 2.421, |
| "kl": 0.0, |
| "step": 650 |
| }, |
| { |
| "epoch": 8.406349206349207, |
| "grad_norm": 0.17593051493167877, |
| "kl": 0.0, |
| "learning_rate": 3.51913118594458e-07, |
| "logits/chosen": -4085644.8, |
| "logits/rejected": -7389671.314285714, |
| "logps/chosen": -19.100984700520833, |
| "logps/rejected": -98.90892159598214, |
| "loss": 0.2138, |
| "rewards/chosen": 0.05359976026746962, |
| "rewards/margins": 7.967655284821041, |
| "rewards/rejected": -7.9140555245535715, |
| "step": 660 |
| }, |
| { |
| "epoch": 8.533333333333333, |
| "grad_norm": 0.11416521668434143, |
| "kl": 0.0, |
| "learning_rate": 2.9684711669750313e-07, |
| "logits/chosen": -5479745.471264368, |
| "logits/rejected": -7563712.0, |
| "logps/chosen": -26.79979907507184, |
| "logps/rejected": -94.35269959332192, |
| "loss": 0.2584, |
| "rewards/chosen": -0.9998831913389009, |
| "rewards/margins": 6.4843028505061335, |
| "rewards/rejected": -7.484186041845034, |
| "step": 670 |
| }, |
| { |
| "epoch": 8.66031746031746, |
| "grad_norm": 17.24502182006836, |
| "kl": 0.0, |
| "learning_rate": 2.4619273049796e-07, |
| "logits/chosen": -4943775.555555556, |
| "logits/rejected": -6408256.0, |
| "logps/chosen": -10.501305474175346, |
| "logps/rejected": -97.72674005681819, |
| "loss": 0.1406, |
| "rewards/chosen": 0.6181484858194987, |
| "rewards/margins": 8.29336793494947, |
| "rewards/rejected": -7.675219449129972, |
| "step": 680 |
| }, |
| { |
| "epoch": 8.787301587301588, |
| "grad_norm": 2.4637882709503174, |
| "kl": 0.0, |
| "learning_rate": 2.0005139085293945e-07, |
| "logits/chosen": -5370018.285714285, |
| "logits/rejected": -4552856.0, |
| "logps/chosen": -12.256514776320685, |
| "logps/rejected": -95.3783023231908, |
| "loss": 0.19, |
| "rewards/chosen": 0.5139439446585519, |
| "rewards/margins": 8.120755468096052, |
| "rewards/rejected": -7.6068115234375, |
| "step": 690 |
| }, |
| { |
| "epoch": 8.914285714285715, |
| "grad_norm": 5.328949928283691, |
| "kl": 0.0, |
| "learning_rate": 1.5851549164932118e-07, |
| "logits/chosen": -4298147.7402597405, |
| "logits/rejected": -6892323.469879518, |
| "logps/chosen": -12.497201894784903, |
| "logps/rejected": -95.17148672816265, |
| "loss": 0.1879, |
| "rewards/chosen": 0.4705321572043679, |
| "rewards/margins": 7.979313866071085, |
| "rewards/rejected": -7.508781708866717, |
| "step": 700 |
| }, |
| { |
| "epoch": 8.914285714285715, |
| "eval_logits/chosen": -4337002.666666667, |
| "eval_logits/rejected": -6325528.935064935, |
| "eval_logps/chosen": -17.09599376860119, |
| "eval_logps/rejected": -87.86666751217533, |
| "eval_loss": 0.21406607329845428, |
| "eval_rewards/chosen": -0.123722961970738, |
| "eval_rewards/margins": 6.7279925222520705, |
| "eval_rewards/rejected": -6.851715484222808, |
| "eval_runtime": 28.9604, |
| "eval_samples_per_second": 4.834, |
| "eval_steps_per_second": 2.417, |
| "kl": 0.0, |
| "step": 700 |
| }, |
| { |
| "epoch": 9.044444444444444, |
| "grad_norm": 1.3945693969726562, |
| "kl": 0.0, |
| "learning_rate": 1.2166820479329572e-07, |
| "logits/chosen": -4539691.317073171, |
| "logits/rejected": -7435501.128205128, |
| "logps/chosen": -17.02510480182927, |
| "logps/rejected": -93.79669345953526, |
| "loss": 0.2091, |
| "rewards/chosen": 0.06722603774652249, |
| "rewards/margins": 7.387677823103092, |
| "rewards/rejected": -7.32045178535657, |
| "step": 710 |
| }, |
| { |
| "epoch": 9.17142857142857, |
| "grad_norm": 1.6716283559799194, |
| "kl": 0.0, |
| "learning_rate": 8.958331366609424e-08, |
| "logits/chosen": -4157219.2, |
| "logits/rejected": -6966880.711111112, |
| "logps/chosen": -10.706447056361608, |
| "logps/rejected": -92.81272786458334, |
| "loss": 0.1464, |
| "rewards/chosen": 0.6557066781180245, |
| "rewards/margins": 8.015752386668371, |
| "rewards/rejected": -7.360045708550347, |
| "step": 720 |
| }, |
| { |
| "epoch": 9.2984126984127, |
| "grad_norm": 0.8815398812294006, |
| "kl": 0.0, |
| "learning_rate": 6.232506537939942e-08, |
| "logits/chosen": -3989393.734939759, |
| "logits/rejected": -5874582.441558441, |
| "logps/chosen": -15.733169004141565, |
| "logps/rejected": -99.3365145596591, |
| "loss": 0.1782, |
| "rewards/chosen": 0.12129955981151168, |
| "rewards/margins": 8.070443006696658, |
| "rewards/rejected": -7.949143446885146, |
| "step": 730 |
| }, |
| { |
| "epoch": 9.425396825396826, |
| "grad_norm": 0.6676042079925537, |
| "kl": 0.0, |
| "learning_rate": 3.994804212627462e-08, |
| "logits/chosen": -4523181.714285715, |
| "logits/rejected": -7066150.554216867, |
| "logps/chosen": -17.300652838372564, |
| "logps/rejected": -97.79651614269578, |
| "loss": 0.1915, |
| "rewards/chosen": -0.0630725266097428, |
| "rewards/margins": 7.753394023183179, |
| "rewards/rejected": -7.8164665497929215, |
| "step": 740 |
| }, |
| { |
| "epoch": 9.552380952380952, |
| "grad_norm": 0.48196038603782654, |
| "kl": 0.0, |
| "learning_rate": 2.2497051885228825e-08, |
| "logits/chosen": -4720270.769230769, |
| "logits/rejected": -6136928.463768116, |
| "logps/chosen": -19.533128004807693, |
| "logps/rejected": -87.8185009057971, |
| "loss": 0.2335, |
| "rewards/chosen": -0.10939908289647364, |
| "rewards/margins": 6.794888946428708, |
| "rewards/rejected": -6.904288029325182, |
| "step": 750 |
| }, |
| { |
| "epoch": 9.552380952380952, |
| "eval_logits/chosen": -4329628.444444444, |
| "eval_logits/rejected": -6335641.35064935, |
| "eval_logps/chosen": -17.1407470703125, |
| "eval_logps/rejected": -87.96758319805195, |
| "eval_loss": 0.21577061712741852, |
| "eval_rewards/chosen": -0.12819823007734996, |
| "eval_rewards/margins": 6.733608664200247, |
| "eval_rewards/rejected": -6.861806894277597, |
| "eval_runtime": 28.9438, |
| "eval_samples_per_second": 4.837, |
| "eval_steps_per_second": 2.418, |
| "kl": 0.0, |
| "step": 750 |
| }, |
| { |
| "epoch": 9.679365079365079, |
| "grad_norm": 1.612648844718933, |
| "kl": 0.0, |
| "learning_rate": 1.0007038696262517e-08, |
| "logits/chosen": -5430106.046511628, |
| "logits/rejected": -6555316.324324325, |
| "logps/chosen": -11.707361265670421, |
| "logps/rejected": -99.71012589738176, |
| "loss": 0.1738, |
| "rewards/chosen": 0.5538684933684593, |
| "rewards/margins": 8.572764647823695, |
| "rewards/rejected": -8.018896154455236, |
| "step": 760 |
| }, |
| { |
| "epoch": 9.806349206349207, |
| "grad_norm": 0.2559433877468109, |
| "kl": 0.0, |
| "learning_rate": 2.5030126885694505e-09, |
| "logits/chosen": -4649945.142857143, |
| "logits/rejected": -6114650.52631579, |
| "logps/chosen": -11.616304670061384, |
| "logps/rejected": -92.70840614720395, |
| "loss": 0.1815, |
| "rewards/chosen": 0.6737716311500186, |
| "rewards/margins": 7.9078245903913835, |
| "rewards/rejected": -7.234052959241365, |
| "step": 770 |
| }, |
| { |
| "epoch": 9.933333333333334, |
| "grad_norm": 0.04368880018591881, |
| "kl": 0.0, |
| "learning_rate": 0.0, |
| "logits/chosen": -4589314.493506493, |
| "logits/rejected": -6493096.86746988, |
| "logps/chosen": -12.316443505225244, |
| "logps/rejected": -93.54054499246988, |
| "loss": 0.1779, |
| "rewards/chosen": 0.5961562317687196, |
| "rewards/margins": 8.000441340955467, |
| "rewards/rejected": -7.404285109186747, |
| "step": 780 |
| }, |
| { |
| "epoch": 9.933333333333334, |
| "step": 780, |
| "total_flos": 6.965271112148582e+16, |
| "train_loss": 0.26206854444283706, |
| "train_runtime": 4670.2135, |
| "train_samples_per_second": 2.698, |
| "train_steps_per_second": 0.167 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 780, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.965271112148582e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|