| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.3425393172228615, |
| "eval_steps": 500, |
| "global_step": 3500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0019179133103183737, |
| "grad_norm": 1.7578125, |
| "learning_rate": 2.468474965491109e-07, |
| "logits/chosen": -6.4269118309021, |
| "logits/rejected": -6.423974514007568, |
| "logps/chosen": -56.09489059448242, |
| "logps/rejected": -55.90266799926758, |
| "loss": 0.6935, |
| "rewards/accuracies": 0.27812498807907104, |
| "rewards/chosen": -4.531387094175443e-05, |
| "rewards/margins": -0.0008066934533417225, |
| "rewards/rejected": 0.0007613796042278409, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0038358266206367474, |
| "grad_norm": 2.125, |
| "learning_rate": 5.554068672354994e-07, |
| "logits/chosen": -6.386446952819824, |
| "logits/rejected": -6.378155708312988, |
| "logps/chosen": -57.592018127441406, |
| "logps/rejected": -56.58235549926758, |
| "loss": 0.693, |
| "rewards/accuracies": 0.5062500238418579, |
| "rewards/chosen": 0.0007413614657707512, |
| "rewards/margins": 0.0003508913214318454, |
| "rewards/rejected": 0.00039047005702741444, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005753739930955121, |
| "grad_norm": 1.921875, |
| "learning_rate": 8.63966237921888e-07, |
| "logits/chosen": -6.385983467102051, |
| "logits/rejected": -6.420374393463135, |
| "logps/chosen": -60.8471565246582, |
| "logps/rejected": -59.70293045043945, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.4781250059604645, |
| "rewards/chosen": -0.0009136783191934228, |
| "rewards/margins": -0.0002154188259737566, |
| "rewards/rejected": -0.0006982595077715814, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.007671653241273495, |
| "grad_norm": 2.46875, |
| "learning_rate": 1.1725256086082765e-06, |
| "logits/chosen": -6.456655979156494, |
| "logits/rejected": -6.467028617858887, |
| "logps/chosen": -59.60247039794922, |
| "logps/rejected": -58.28697967529297, |
| "loss": 0.6934, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": 0.0006150425178930163, |
| "rewards/margins": -0.0006757881492376328, |
| "rewards/rejected": 0.0012908302014693618, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.009589566551591868, |
| "grad_norm": 1.7421875, |
| "learning_rate": 1.4810849792946651e-06, |
| "logits/chosen": -6.433564186096191, |
| "logits/rejected": -6.4307708740234375, |
| "logps/chosen": -56.00537872314453, |
| "logps/rejected": -55.16205978393555, |
| "loss": 0.6938, |
| "rewards/accuracies": 0.4124999940395355, |
| "rewards/chosen": -0.0005184346809983253, |
| "rewards/margins": -0.0015976792201399803, |
| "rewards/rejected": 0.001079244539141655, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.011507479861910242, |
| "grad_norm": 1.9765625, |
| "learning_rate": 1.7896443499810537e-06, |
| "logits/chosen": -6.4431304931640625, |
| "logits/rejected": -6.4442572593688965, |
| "logps/chosen": -55.598548889160156, |
| "logps/rejected": -54.446998596191406, |
| "loss": 0.6934, |
| "rewards/accuracies": 0.503125011920929, |
| "rewards/chosen": -0.00016300799325108528, |
| "rewards/margins": -0.0005609053187072277, |
| "rewards/rejected": 0.00039789750007912517, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.013425393172228616, |
| "grad_norm": 2.203125, |
| "learning_rate": 2.0982037206674425e-06, |
| "logits/chosen": -6.4433274269104, |
| "logits/rejected": -6.450094699859619, |
| "logps/chosen": -59.896209716796875, |
| "logps/rejected": -57.89789581298828, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.5093749761581421, |
| "rewards/chosen": 0.0006533617270179093, |
| "rewards/margins": 0.0006716603529639542, |
| "rewards/rejected": -1.8298602299182676e-05, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01534330648254699, |
| "grad_norm": 2.03125, |
| "learning_rate": 2.4067630913538307e-06, |
| "logits/chosen": -6.476667881011963, |
| "logits/rejected": -6.468405723571777, |
| "logps/chosen": -58.06646728515625, |
| "logps/rejected": -56.530914306640625, |
| "loss": 0.6934, |
| "rewards/accuracies": 0.4468750059604645, |
| "rewards/chosen": -0.00031386903719976544, |
| "rewards/margins": -0.0005673372070305049, |
| "rewards/rejected": 0.00025346819893456995, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01726121979286536, |
| "grad_norm": 2.296875, |
| "learning_rate": 2.7153224620402197e-06, |
| "logits/chosen": -6.508776664733887, |
| "logits/rejected": -6.486769676208496, |
| "logps/chosen": -54.24138259887695, |
| "logps/rejected": -53.89955520629883, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5062500238418579, |
| "rewards/chosen": 0.0002679133613128215, |
| "rewards/margins": 0.000910227361600846, |
| "rewards/rejected": -0.0006423138547688723, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.019179133103183737, |
| "grad_norm": 2.46875, |
| "learning_rate": 3.023881832726608e-06, |
| "logits/chosen": -6.429900169372559, |
| "logits/rejected": -6.4368720054626465, |
| "logps/chosen": -59.0792121887207, |
| "logps/rejected": -57.9002571105957, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": 0.001494865515269339, |
| "rewards/margins": 0.0021004711743444204, |
| "rewards/rejected": -0.0006056058336980641, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02109704641350211, |
| "grad_norm": 1.796875, |
| "learning_rate": 3.332441203412997e-06, |
| "logits/chosen": -6.480206489562988, |
| "logits/rejected": -6.4737958908081055, |
| "logps/chosen": -57.52396774291992, |
| "logps/rejected": -57.11433029174805, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.534375011920929, |
| "rewards/chosen": 0.0009556323057040572, |
| "rewards/margins": 0.002387548331171274, |
| "rewards/rejected": -0.0014319162582978606, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.023014959723820484, |
| "grad_norm": 2.25, |
| "learning_rate": 3.6410005740993855e-06, |
| "logits/chosen": -6.504572868347168, |
| "logits/rejected": -6.499182224273682, |
| "logps/chosen": -58.1120491027832, |
| "logps/rejected": -57.13053512573242, |
| "loss": 0.6911, |
| "rewards/accuracies": 0.5562499761581421, |
| "rewards/chosen": 0.001851649023592472, |
| "rewards/margins": 0.005305788479745388, |
| "rewards/rejected": -0.0034541389904916286, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.024932873034138856, |
| "grad_norm": 2.0625, |
| "learning_rate": 3.9495599447857745e-06, |
| "logits/chosen": -6.491766452789307, |
| "logits/rejected": -6.455049991607666, |
| "logps/chosen": -57.039581298828125, |
| "logps/rejected": -55.7880859375, |
| "loss": 0.6908, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.0029739458113908768, |
| "rewards/margins": 0.0059506590478122234, |
| "rewards/rejected": -0.0029767132364213467, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.02685078634445723, |
| "grad_norm": 1.9453125, |
| "learning_rate": 4.258119315472162e-06, |
| "logits/chosen": -6.455752372741699, |
| "logits/rejected": -6.4692840576171875, |
| "logps/chosen": -53.70838165283203, |
| "logps/rejected": -54.29150390625, |
| "loss": 0.6904, |
| "rewards/accuracies": 0.6156250238418579, |
| "rewards/chosen": 0.003416051622480154, |
| "rewards/margins": 0.006988453213125467, |
| "rewards/rejected": -0.003572401124984026, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.028768699654775604, |
| "grad_norm": 2.25, |
| "learning_rate": 4.566678686158551e-06, |
| "logits/chosen": -6.434661865234375, |
| "logits/rejected": -6.4351091384887695, |
| "logps/chosen": -57.85565185546875, |
| "logps/rejected": -56.18524169921875, |
| "loss": 0.69, |
| "rewards/accuracies": 0.590624988079071, |
| "rewards/chosen": 0.0047136032953858376, |
| "rewards/margins": 0.008176136761903763, |
| "rewards/rejected": -0.0034625339321792126, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.03068661296509398, |
| "grad_norm": 2.15625, |
| "learning_rate": 4.875238056844939e-06, |
| "logits/chosen": -6.418464660644531, |
| "logits/rejected": -6.435811519622803, |
| "logps/chosen": -59.70001220703125, |
| "logps/rejected": -58.532691955566406, |
| "loss": 0.6882, |
| "rewards/accuracies": 0.6781250238418579, |
| "rewards/chosen": 0.007058107294142246, |
| "rewards/margins": 0.012781137600541115, |
| "rewards/rejected": -0.005723030772060156, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.032604526275412354, |
| "grad_norm": 1.7109375, |
| "learning_rate": 5.183797427531328e-06, |
| "logits/chosen": -6.434415340423584, |
| "logits/rejected": -6.414219856262207, |
| "logps/chosen": -56.356300354003906, |
| "logps/rejected": -55.965721130371094, |
| "loss": 0.6885, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.006996457930654287, |
| "rewards/margins": 0.011944364756345749, |
| "rewards/rejected": -0.004947904963046312, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.03452243958573072, |
| "grad_norm": 2.15625, |
| "learning_rate": 5.4923567982177174e-06, |
| "logits/chosen": -6.413939476013184, |
| "logits/rejected": -6.413136959075928, |
| "logps/chosen": -57.609031677246094, |
| "logps/rejected": -56.8945198059082, |
| "loss": 0.6848, |
| "rewards/accuracies": 0.706250011920929, |
| "rewards/chosen": 0.012066302821040154, |
| "rewards/margins": 0.021702740341424942, |
| "rewards/rejected": -0.009636437520384789, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0364403528960491, |
| "grad_norm": 1.8203125, |
| "learning_rate": 5.800916168904105e-06, |
| "logits/chosen": -6.4709014892578125, |
| "logits/rejected": -6.410592079162598, |
| "logps/chosen": -57.7935791015625, |
| "logps/rejected": -56.025917053222656, |
| "loss": 0.6847, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": 0.011256908066570759, |
| "rewards/margins": 0.022287515923380852, |
| "rewards/rejected": -0.011030609719455242, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.038358266206367474, |
| "grad_norm": 2.015625, |
| "learning_rate": 6.109475539590495e-06, |
| "logits/chosen": -6.4798479080200195, |
| "logits/rejected": -6.4916839599609375, |
| "logps/chosen": -58.99842071533203, |
| "logps/rejected": -58.10333251953125, |
| "loss": 0.6842, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.0121616804972291, |
| "rewards/margins": 0.023794159293174744, |
| "rewards/rejected": -0.011632479727268219, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04027617951668585, |
| "grad_norm": 1.9296875, |
| "learning_rate": 6.418034910276882e-06, |
| "logits/chosen": -6.448805332183838, |
| "logits/rejected": -6.4762372970581055, |
| "logps/chosen": -58.24238967895508, |
| "logps/rejected": -57.392173767089844, |
| "loss": 0.6822, |
| "rewards/accuracies": 0.6812499761581421, |
| "rewards/chosen": 0.016423719003796577, |
| "rewards/margins": 0.029310371726751328, |
| "rewards/rejected": -0.0128866508603096, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.04219409282700422, |
| "grad_norm": 2.203125, |
| "learning_rate": 6.726594280963271e-06, |
| "logits/chosen": -6.426924228668213, |
| "logits/rejected": -6.42023229598999, |
| "logps/chosen": -56.317108154296875, |
| "logps/rejected": -56.5660400390625, |
| "loss": 0.6817, |
| "rewards/accuracies": 0.715624988079071, |
| "rewards/chosen": 0.015550317242741585, |
| "rewards/margins": 0.031005825847387314, |
| "rewards/rejected": -0.015455508604645729, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04411200613732259, |
| "grad_norm": 1.9453125, |
| "learning_rate": 7.03515365164966e-06, |
| "logits/chosen": -6.488696098327637, |
| "logits/rejected": -6.494551658630371, |
| "logps/chosen": -54.70813751220703, |
| "logps/rejected": -55.6793098449707, |
| "loss": 0.6792, |
| "rewards/accuracies": 0.746874988079071, |
| "rewards/chosen": 0.01944402977824211, |
| "rewards/margins": 0.03836524114012718, |
| "rewards/rejected": -0.01892121136188507, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.04602991944764097, |
| "grad_norm": 1.9609375, |
| "learning_rate": 7.343713022336048e-06, |
| "logits/chosen": -6.468048095703125, |
| "logits/rejected": -6.445672512054443, |
| "logps/chosen": -55.66649627685547, |
| "logps/rejected": -56.139678955078125, |
| "loss": 0.6761, |
| "rewards/accuracies": 0.7406250238418579, |
| "rewards/chosen": 0.021984096616506577, |
| "rewards/margins": 0.047887708991765976, |
| "rewards/rejected": -0.025903616100549698, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04794783275795934, |
| "grad_norm": 1.6640625, |
| "learning_rate": 7.652272393022438e-06, |
| "logits/chosen": -6.430941581726074, |
| "logits/rejected": -6.431049346923828, |
| "logps/chosen": -58.177818298339844, |
| "logps/rejected": -59.124046325683594, |
| "loss": 0.6745, |
| "rewards/accuracies": 0.8031250238418579, |
| "rewards/chosen": 0.023997317999601364, |
| "rewards/margins": 0.052540235221385956, |
| "rewards/rejected": -0.028542915359139442, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.04986574606827771, |
| "grad_norm": 1.7578125, |
| "learning_rate": 7.960831763708825e-06, |
| "logits/chosen": -6.422308921813965, |
| "logits/rejected": -6.405043601989746, |
| "logps/chosen": -54.78827667236328, |
| "logps/rejected": -53.91057205200195, |
| "loss": 0.6757, |
| "rewards/accuracies": 0.7593749761581421, |
| "rewards/chosen": 0.023111283779144287, |
| "rewards/margins": 0.050740379840135574, |
| "rewards/rejected": -0.027629096060991287, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05178365937859609, |
| "grad_norm": 1.4765625, |
| "learning_rate": 8.269391134395214e-06, |
| "logits/chosen": -6.452818393707275, |
| "logits/rejected": -6.4319610595703125, |
| "logps/chosen": -56.06396484375, |
| "logps/rejected": -55.55432891845703, |
| "loss": 0.6725, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.028292512521147728, |
| "rewards/margins": 0.0630292147397995, |
| "rewards/rejected": -0.03473670035600662, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.05370157268891446, |
| "grad_norm": 1.671875, |
| "learning_rate": 8.577950505081603e-06, |
| "logits/chosen": -6.453330993652344, |
| "logits/rejected": -6.442866325378418, |
| "logps/chosen": -58.166526794433594, |
| "logps/rejected": -57.297515869140625, |
| "loss": 0.6677, |
| "rewards/accuracies": 0.7718750238418579, |
| "rewards/chosen": 0.03513844683766365, |
| "rewards/margins": 0.0794719010591507, |
| "rewards/rejected": -0.04433346539735794, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05561948599923283, |
| "grad_norm": 1.6640625, |
| "learning_rate": 8.886509875767991e-06, |
| "logits/chosen": -6.464579105377197, |
| "logits/rejected": -6.458718776702881, |
| "logps/chosen": -53.89129638671875, |
| "logps/rejected": -54.87738037109375, |
| "loss": 0.6712, |
| "rewards/accuracies": 0.7906249761581421, |
| "rewards/chosen": 0.030111517757177353, |
| "rewards/margins": 0.06265858560800552, |
| "rewards/rejected": -0.03254706412553787, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.05753739930955121, |
| "grad_norm": 1.6171875, |
| "learning_rate": 9.19506924645438e-06, |
| "logits/chosen": -6.414637565612793, |
| "logits/rejected": -6.429790496826172, |
| "logps/chosen": -59.0002326965332, |
| "logps/rejected": -58.41865921020508, |
| "loss": 0.6654, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": 0.039772503077983856, |
| "rewards/margins": 0.08216916769742966, |
| "rewards/rejected": -0.0423966720700264, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.05945531261986958, |
| "grad_norm": 1.4609375, |
| "learning_rate": 9.503628617140768e-06, |
| "logits/chosen": -6.430724143981934, |
| "logits/rejected": -6.439866542816162, |
| "logps/chosen": -56.56650924682617, |
| "logps/rejected": -56.43073654174805, |
| "loss": 0.6646, |
| "rewards/accuracies": 0.784375011920929, |
| "rewards/chosen": 0.04069245606660843, |
| "rewards/margins": 0.08599359542131424, |
| "rewards/rejected": -0.04530114680528641, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.06137322593018796, |
| "grad_norm": 1.625, |
| "learning_rate": 9.812187987827157e-06, |
| "logits/chosen": -6.413358211517334, |
| "logits/rejected": -6.445931434631348, |
| "logps/chosen": -56.371116638183594, |
| "logps/rejected": -57.74314498901367, |
| "loss": 0.6645, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 0.040298428386449814, |
| "rewards/margins": 0.0885874405503273, |
| "rewards/rejected": -0.04828901216387749, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06329113924050633, |
| "grad_norm": 1.625, |
| "learning_rate": 1.0120747358513545e-05, |
| "logits/chosen": -6.423710823059082, |
| "logits/rejected": -6.452187538146973, |
| "logps/chosen": -57.91943359375, |
| "logps/rejected": -58.41001510620117, |
| "loss": 0.6609, |
| "rewards/accuracies": 0.8343750238418579, |
| "rewards/chosen": 0.04707075282931328, |
| "rewards/margins": 0.10583791881799698, |
| "rewards/rejected": -0.058767169713974, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.06520905255082471, |
| "grad_norm": 1.6328125, |
| "learning_rate": 1.0429306729199934e-05, |
| "logits/chosen": -6.453238010406494, |
| "logits/rejected": -6.488819122314453, |
| "logps/chosen": -56.78459548950195, |
| "logps/rejected": -57.84309005737305, |
| "loss": 0.6561, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.054359547793865204, |
| "rewards/margins": 0.11485730111598969, |
| "rewards/rejected": -0.06049775332212448, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06712696586114307, |
| "grad_norm": 1.6484375, |
| "learning_rate": 1.0737866099886323e-05, |
| "logits/chosen": -6.370804786682129, |
| "logits/rejected": -6.389939308166504, |
| "logps/chosen": -56.378761291503906, |
| "logps/rejected": -57.62419509887695, |
| "loss": 0.6549, |
| "rewards/accuracies": 0.8343750238418579, |
| "rewards/chosen": 0.046738989651203156, |
| "rewards/margins": 0.12162216007709503, |
| "rewards/rejected": -0.07488318532705307, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06904487917146145, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.1046425470572711e-05, |
| "logits/chosen": -6.432094573974609, |
| "logits/rejected": -6.485767364501953, |
| "logps/chosen": -55.33378982543945, |
| "logps/rejected": -55.24385452270508, |
| "loss": 0.653, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.058635931462049484, |
| "rewards/margins": 0.12551145255565643, |
| "rewards/rejected": -0.06687554717063904, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07096279248177982, |
| "grad_norm": 1.5390625, |
| "learning_rate": 1.13549848412591e-05, |
| "logits/chosen": -6.448824405670166, |
| "logits/rejected": -6.453608512878418, |
| "logps/chosen": -56.46734619140625, |
| "logps/rejected": -56.84504318237305, |
| "loss": 0.6488, |
| "rewards/accuracies": 0.871874988079071, |
| "rewards/chosen": 0.06262876093387604, |
| "rewards/margins": 0.14282599091529846, |
| "rewards/rejected": -0.08019722998142242, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.0728807057920982, |
| "grad_norm": 1.6796875, |
| "learning_rate": 1.1663544211945488e-05, |
| "logits/chosen": -6.431854248046875, |
| "logits/rejected": -6.417555332183838, |
| "logps/chosen": -52.951072692871094, |
| "logps/rejected": -53.4705924987793, |
| "loss": 0.6489, |
| "rewards/accuracies": 0.846875011920929, |
| "rewards/chosen": 0.06671543419361115, |
| "rewards/margins": 0.1369214504957199, |
| "rewards/rejected": -0.07020601630210876, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07479861910241657, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.1972103582631877e-05, |
| "logits/chosen": -6.399285316467285, |
| "logits/rejected": -6.416836738586426, |
| "logps/chosen": -59.31772994995117, |
| "logps/rejected": -59.72453689575195, |
| "loss": 0.643, |
| "rewards/accuracies": 0.8843749761581421, |
| "rewards/chosen": 0.07494629919528961, |
| "rewards/margins": 0.15886636078357697, |
| "rewards/rejected": -0.08392004668712616, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.07671653241273495, |
| "grad_norm": 1.5546875, |
| "learning_rate": 1.2280662953318267e-05, |
| "logits/chosen": -6.442248344421387, |
| "logits/rejected": -6.403599739074707, |
| "logps/chosen": -56.35515213012695, |
| "logps/rejected": -57.01416778564453, |
| "loss": 0.6479, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.06972001492977142, |
| "rewards/margins": 0.14177976548671722, |
| "rewards/rejected": -0.0720597356557846, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07863444572305332, |
| "grad_norm": 1.5546875, |
| "learning_rate": 1.2589222324004653e-05, |
| "logits/chosen": -6.4987688064575195, |
| "logits/rejected": -6.481993198394775, |
| "logps/chosen": -56.06902313232422, |
| "logps/rejected": -57.0550537109375, |
| "loss": 0.6452, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": 0.06869082152843475, |
| "rewards/margins": 0.14782066643238068, |
| "rewards/rejected": -0.07912982255220413, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.0805523590333717, |
| "grad_norm": 1.734375, |
| "learning_rate": 1.2897781694691043e-05, |
| "logits/chosen": -6.460019588470459, |
| "logits/rejected": -6.460816383361816, |
| "logps/chosen": -57.56937789916992, |
| "logps/rejected": -58.26279830932617, |
| "loss": 0.641, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.07537061721086502, |
| "rewards/margins": 0.17141608893871307, |
| "rewards/rejected": -0.09604547917842865, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08247027234369006, |
| "grad_norm": 1.578125, |
| "learning_rate": 1.3206341065377431e-05, |
| "logits/chosen": -6.431567192077637, |
| "logits/rejected": -6.415804862976074, |
| "logps/chosen": -56.94245529174805, |
| "logps/rejected": -58.52058029174805, |
| "loss": 0.631, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 0.08770474791526794, |
| "rewards/margins": 0.2049822360277176, |
| "rewards/rejected": -0.11727748811244965, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.08438818565400844, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.3514900436063818e-05, |
| "logits/chosen": -6.531057834625244, |
| "logits/rejected": -6.522589206695557, |
| "logps/chosen": -56.293739318847656, |
| "logps/rejected": -56.6605224609375, |
| "loss": 0.6301, |
| "rewards/accuracies": 0.878125011920929, |
| "rewards/chosen": 0.09262285381555557, |
| "rewards/margins": 0.19946810603141785, |
| "rewards/rejected": -0.10684527456760406, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08630609896432681, |
| "grad_norm": 1.7734375, |
| "learning_rate": 1.3823459806750209e-05, |
| "logits/chosen": -6.420934200286865, |
| "logits/rejected": -6.435433864593506, |
| "logps/chosen": -58.42344284057617, |
| "logps/rejected": -58.990745544433594, |
| "loss": 0.6275, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": 0.08511321991682053, |
| "rewards/margins": 0.2056717574596405, |
| "rewards/rejected": -0.12055854499340057, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.08822401227464519, |
| "grad_norm": 1.6484375, |
| "learning_rate": 1.4132019177436597e-05, |
| "logits/chosen": -6.4476728439331055, |
| "logits/rejected": -6.445916175842285, |
| "logps/chosen": -57.582061767578125, |
| "logps/rejected": -58.44621658325195, |
| "loss": 0.6224, |
| "rewards/accuracies": 0.840624988079071, |
| "rewards/chosen": 0.09987424314022064, |
| "rewards/margins": 0.24059633910655975, |
| "rewards/rejected": -0.14072208106517792, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.09014192558496356, |
| "grad_norm": 1.6953125, |
| "learning_rate": 1.4440578548122987e-05, |
| "logits/chosen": -6.518248558044434, |
| "logits/rejected": -6.512531280517578, |
| "logps/chosen": -56.6078987121582, |
| "logps/rejected": -58.96805953979492, |
| "loss": 0.6151, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": 0.11163216829299927, |
| "rewards/margins": 0.271917462348938, |
| "rewards/rejected": -0.16028529405593872, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.09205983889528194, |
| "grad_norm": 1.609375, |
| "learning_rate": 1.4749137918809374e-05, |
| "logits/chosen": -6.455610752105713, |
| "logits/rejected": -6.456049919128418, |
| "logps/chosen": -58.586448669433594, |
| "logps/rejected": -59.85078048706055, |
| "loss": 0.6179, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.09219549596309662, |
| "rewards/margins": 0.255667120218277, |
| "rewards/rejected": -0.16347160935401917, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.09397775220560031, |
| "grad_norm": 1.6875, |
| "learning_rate": 1.5057697289495763e-05, |
| "logits/chosen": -6.4275312423706055, |
| "logits/rejected": -6.468707084655762, |
| "logps/chosen": -55.558929443359375, |
| "logps/rejected": -57.90214157104492, |
| "loss": 0.6167, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": 0.10123320668935776, |
| "rewards/margins": 0.26037126779556274, |
| "rewards/rejected": -0.15913811326026917, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.09589566551591867, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.5366256660182153e-05, |
| "logits/chosen": -6.539719581604004, |
| "logits/rejected": -6.5594964027404785, |
| "logps/chosen": -56.036964416503906, |
| "logps/rejected": -57.6839485168457, |
| "loss": 0.6076, |
| "rewards/accuracies": 0.903124988079071, |
| "rewards/chosen": 0.10764100402593613, |
| "rewards/margins": 0.28984779119491577, |
| "rewards/rejected": -0.18220680952072144, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09781357882623705, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.567481603086854e-05, |
| "logits/chosen": -6.509421348571777, |
| "logits/rejected": -6.502968788146973, |
| "logps/chosen": -57.25380325317383, |
| "logps/rejected": -58.84259796142578, |
| "loss": 0.6109, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": 0.11171796172857285, |
| "rewards/margins": 0.27925556898117065, |
| "rewards/rejected": -0.1675376147031784, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.09973149213655542, |
| "grad_norm": 1.6328125, |
| "learning_rate": 1.5983375401554927e-05, |
| "logits/chosen": -6.527918815612793, |
| "logits/rejected": -6.5247602462768555, |
| "logps/chosen": -54.80146408081055, |
| "logps/rejected": -56.07502365112305, |
| "loss": 0.6188, |
| "rewards/accuracies": 0.8531249761581421, |
| "rewards/chosen": 0.09086655080318451, |
| "rewards/margins": 0.25640976428985596, |
| "rewards/rejected": -0.16554318368434906, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1016494054468738, |
| "grad_norm": 1.4921875, |
| "learning_rate": 1.629193477224132e-05, |
| "logits/chosen": -6.579885005950928, |
| "logits/rejected": -6.561237335205078, |
| "logps/chosen": -56.587547302246094, |
| "logps/rejected": -59.09083938598633, |
| "loss": 0.5984, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 0.12236499786376953, |
| "rewards/margins": 0.32771119475364685, |
| "rewards/rejected": -0.20534619688987732, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.10356731875719218, |
| "grad_norm": 1.4921875, |
| "learning_rate": 1.6600494142927704e-05, |
| "logits/chosen": -6.541518211364746, |
| "logits/rejected": -6.5189385414123535, |
| "logps/chosen": -56.556121826171875, |
| "logps/rejected": -58.61604690551758, |
| "loss": 0.5879, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 0.1293206363916397, |
| "rewards/margins": 0.376719206571579, |
| "rewards/rejected": -0.24739857017993927, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.10548523206751055, |
| "grad_norm": 1.734375, |
| "learning_rate": 1.6909053513614093e-05, |
| "logits/chosen": -6.512734889984131, |
| "logits/rejected": -6.509590148925781, |
| "logps/chosen": -58.57611083984375, |
| "logps/rejected": -59.823936462402344, |
| "loss": 0.5972, |
| "rewards/accuracies": 0.8843749761581421, |
| "rewards/chosen": 0.11397837102413177, |
| "rewards/margins": 0.3285695016384125, |
| "rewards/rejected": -0.2145911455154419, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.10740314537782893, |
| "grad_norm": 1.25, |
| "learning_rate": 1.7217612884300485e-05, |
| "logits/chosen": -6.592258453369141, |
| "logits/rejected": -6.548396110534668, |
| "logps/chosen": -57.656585693359375, |
| "logps/rejected": -59.94633865356445, |
| "loss": 0.581, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 0.1426272690296173, |
| "rewards/margins": 0.40652403235435486, |
| "rewards/rejected": -0.26389676332473755, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1093210586881473, |
| "grad_norm": 1.484375, |
| "learning_rate": 1.7526172254986873e-05, |
| "logits/chosen": -6.576052188873291, |
| "logits/rejected": -6.558463096618652, |
| "logps/chosen": -53.556358337402344, |
| "logps/rejected": -57.3957633972168, |
| "loss": 0.5937, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 0.10910562425851822, |
| "rewards/margins": 0.348541259765625, |
| "rewards/rejected": -0.23943564295768738, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.11123897199846566, |
| "grad_norm": 1.5546875, |
| "learning_rate": 1.783473162567326e-05, |
| "logits/chosen": -6.541784763336182, |
| "logits/rejected": -6.5498552322387695, |
| "logps/chosen": -56.6390495300293, |
| "logps/rejected": -60.371116638183594, |
| "loss": 0.5724, |
| "rewards/accuracies": 0.8843749761581421, |
| "rewards/chosen": 0.13584202527999878, |
| "rewards/margins": 0.45434489846229553, |
| "rewards/rejected": -0.31850284337997437, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11315688530878404, |
| "grad_norm": 1.6953125, |
| "learning_rate": 1.814329099635965e-05, |
| "logits/chosen": -6.512528419494629, |
| "logits/rejected": -6.557837009429932, |
| "logps/chosen": -54.72956466674805, |
| "logps/rejected": -56.99449920654297, |
| "loss": 0.579, |
| "rewards/accuracies": 0.871874988079071, |
| "rewards/chosen": 0.12395240366458893, |
| "rewards/margins": 0.4107394218444824, |
| "rewards/rejected": -0.2867870330810547, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.11507479861910241, |
| "grad_norm": 1.5078125, |
| "learning_rate": 1.845185036704604e-05, |
| "logits/chosen": -6.576136589050293, |
| "logits/rejected": -6.607443332672119, |
| "logps/chosen": -54.656707763671875, |
| "logps/rejected": -58.98443603515625, |
| "loss": 0.579, |
| "rewards/accuracies": 0.8656250238418579, |
| "rewards/chosen": 0.1109938770532608, |
| "rewards/margins": 0.4175935387611389, |
| "rewards/rejected": -0.3065996766090393, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11699271192942079, |
| "grad_norm": 1.4921875, |
| "learning_rate": 1.8760409737732424e-05, |
| "logits/chosen": -6.587884426116943, |
| "logits/rejected": -6.619060516357422, |
| "logps/chosen": -58.541290283203125, |
| "logps/rejected": -63.607521057128906, |
| "loss": 0.5625, |
| "rewards/accuracies": 0.871874988079071, |
| "rewards/chosen": 0.1273353099822998, |
| "rewards/margins": 0.5049296021461487, |
| "rewards/rejected": -0.3775942921638489, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.11891062523973916, |
| "grad_norm": 1.59375, |
| "learning_rate": 1.9068969108418816e-05, |
| "logits/chosen": -6.658132076263428, |
| "logits/rejected": -6.647538185119629, |
| "logps/chosen": -55.23577880859375, |
| "logps/rejected": -58.95073699951172, |
| "loss": 0.5774, |
| "rewards/accuracies": 0.878125011920929, |
| "rewards/chosen": 0.09004654735326767, |
| "rewards/margins": 0.43088483810424805, |
| "rewards/rejected": -0.3408382833003998, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.12082853855005754, |
| "grad_norm": 1.421875, |
| "learning_rate": 1.9377528479105205e-05, |
| "logits/chosen": -6.616456508636475, |
| "logits/rejected": -6.5966973304748535, |
| "logps/chosen": -52.970489501953125, |
| "logps/rejected": -57.08417510986328, |
| "loss": 0.5793, |
| "rewards/accuracies": 0.878125011920929, |
| "rewards/chosen": 0.08462224155664444, |
| "rewards/margins": 0.41819873452186584, |
| "rewards/rejected": -0.333576500415802, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.12274645186037592, |
| "grad_norm": 1.5234375, |
| "learning_rate": 1.968608784979159e-05, |
| "logits/chosen": -6.626871585845947, |
| "logits/rejected": -6.633614540100098, |
| "logps/chosen": -51.43700408935547, |
| "logps/rejected": -56.098548889160156, |
| "loss": 0.5566, |
| "rewards/accuracies": 0.893750011920929, |
| "rewards/chosen": 0.11493581533432007, |
| "rewards/margins": 0.5229749083518982, |
| "rewards/rejected": -0.4080390930175781, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.12466436517069429, |
| "grad_norm": 1.78125, |
| "learning_rate": 1.999464722047798e-05, |
| "logits/chosen": -6.619255065917969, |
| "logits/rejected": -6.633837699890137, |
| "logps/chosen": -56.89793014526367, |
| "logps/rejected": -63.043235778808594, |
| "loss": 0.5433, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": 0.10207729041576385, |
| "rewards/margins": 0.6024643778800964, |
| "rewards/rejected": -0.5003870725631714, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.12658227848101267, |
| "grad_norm": 1.328125, |
| "learning_rate": 2.030320659116437e-05, |
| "logits/chosen": -6.6238603591918945, |
| "logits/rejected": -6.6286211013793945, |
| "logps/chosen": -53.7779655456543, |
| "logps/rejected": -59.46379852294922, |
| "loss": 0.542, |
| "rewards/accuracies": 0.903124988079071, |
| "rewards/chosen": 0.09198246896266937, |
| "rewards/margins": 0.6294280290603638, |
| "rewards/rejected": -0.5374454855918884, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.12850019179133104, |
| "grad_norm": 2.40625, |
| "learning_rate": 2.0611765961850756e-05, |
| "logits/chosen": -6.664628028869629, |
| "logits/rejected": -6.719674110412598, |
| "logps/chosen": -52.02434539794922, |
| "logps/rejected": -58.489112854003906, |
| "loss": 0.5397, |
| "rewards/accuracies": 0.909375011920929, |
| "rewards/chosen": 0.04687722399830818, |
| "rewards/margins": 0.6034437417984009, |
| "rewards/rejected": -0.5565665364265442, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.13041810510164942, |
| "grad_norm": 2.515625, |
| "learning_rate": 2.0920325332537144e-05, |
| "logits/chosen": -6.698832035064697, |
| "logits/rejected": -6.721909999847412, |
| "logps/chosen": -54.917030334472656, |
| "logps/rejected": -60.1119384765625, |
| "loss": 0.5401, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.039418745785951614, |
| "rewards/margins": 0.629611611366272, |
| "rewards/rejected": -0.6690303683280945, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1323360184119678, |
| "grad_norm": 2.421875, |
| "learning_rate": 2.1228884703223536e-05, |
| "logits/chosen": -6.65041971206665, |
| "logits/rejected": -6.657183647155762, |
| "logps/chosen": -60.004638671875, |
| "logps/rejected": -65.77970123291016, |
| "loss": 0.5344, |
| "rewards/accuracies": 0.893750011920929, |
| "rewards/chosen": -0.22128959000110626, |
| "rewards/margins": 0.7207987904548645, |
| "rewards/rejected": -0.9420884251594543, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.13425393172228614, |
| "grad_norm": 2.140625, |
| "learning_rate": 2.1537444073909925e-05, |
| "logits/chosen": -6.71240234375, |
| "logits/rejected": -6.70929479598999, |
| "logps/chosen": -60.435508728027344, |
| "logps/rejected": -68.03807830810547, |
| "loss": 0.5136, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.13982044160366058, |
| "rewards/margins": 0.8055053949356079, |
| "rewards/rejected": -0.9453258514404297, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.13617184503260452, |
| "grad_norm": 1.71875, |
| "learning_rate": 2.184600344459631e-05, |
| "logits/chosen": -6.7060980796813965, |
| "logits/rejected": -6.739293575286865, |
| "logps/chosen": -58.657188415527344, |
| "logps/rejected": -66.50128936767578, |
| "loss": 0.5188, |
| "rewards/accuracies": 0.8843749761581421, |
| "rewards/chosen": -0.16930516064167023, |
| "rewards/margins": 0.7925722002983093, |
| "rewards/rejected": -0.9618774652481079, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.1380897583429229, |
| "grad_norm": 2.078125, |
| "learning_rate": 2.2154562815282702e-05, |
| "logits/chosen": -6.688414096832275, |
| "logits/rejected": -6.681405067443848, |
| "logps/chosen": -58.76293182373047, |
| "logps/rejected": -66.48202514648438, |
| "loss": 0.5135, |
| "rewards/accuracies": 0.893750011920929, |
| "rewards/chosen": -0.20896108448505402, |
| "rewards/margins": 0.8072658777236938, |
| "rewards/rejected": -1.0162270069122314, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.14000767165324127, |
| "grad_norm": 1.890625, |
| "learning_rate": 2.246312218596909e-05, |
| "logits/chosen": -6.696159362792969, |
| "logits/rejected": -6.75206995010376, |
| "logps/chosen": -56.35298538208008, |
| "logps/rejected": -63.650901794433594, |
| "loss": 0.5076, |
| "rewards/accuracies": 0.9156249761581421, |
| "rewards/chosen": -0.2087055742740631, |
| "rewards/margins": 0.8270822763442993, |
| "rewards/rejected": -1.03578782081604, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.14192558496355964, |
| "grad_norm": 1.8203125, |
| "learning_rate": 2.2771681556655476e-05, |
| "logits/chosen": -6.695774078369141, |
| "logits/rejected": -6.776731967926025, |
| "logps/chosen": -55.71824264526367, |
| "logps/rejected": -63.604736328125, |
| "loss": 0.5042, |
| "rewards/accuracies": 0.903124988079071, |
| "rewards/chosen": -0.22564025223255157, |
| "rewards/margins": 0.8389409780502319, |
| "rewards/rejected": -1.0645811557769775, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.14384349827387802, |
| "grad_norm": 1.953125, |
| "learning_rate": 2.3080240927341865e-05, |
| "logits/chosen": -6.816876411437988, |
| "logits/rejected": -6.805089473724365, |
| "logps/chosen": -57.54021453857422, |
| "logps/rejected": -65.3156509399414, |
| "loss": 0.5065, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": -0.22473874688148499, |
| "rewards/margins": 0.8491897583007812, |
| "rewards/rejected": -1.0739285945892334, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.1457614115841964, |
| "grad_norm": 1.9140625, |
| "learning_rate": 2.3388800298028257e-05, |
| "logits/chosen": -6.714625358581543, |
| "logits/rejected": -6.712975978851318, |
| "logps/chosen": -58.104759216308594, |
| "logps/rejected": -65.9175033569336, |
| "loss": 0.4919, |
| "rewards/accuracies": 0.9281250238418579, |
| "rewards/chosen": -0.3026445806026459, |
| "rewards/margins": 0.9062640070915222, |
| "rewards/rejected": -1.2089087963104248, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.14767932489451477, |
| "grad_norm": 2.859375, |
| "learning_rate": 2.3697359668714642e-05, |
| "logits/chosen": -6.743095397949219, |
| "logits/rejected": -6.772109031677246, |
| "logps/chosen": -59.0074577331543, |
| "logps/rejected": -69.05128479003906, |
| "loss": 0.4822, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.37660008668899536, |
| "rewards/margins": 0.9904800653457642, |
| "rewards/rejected": -1.3670800924301147, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.14959723820483314, |
| "grad_norm": 3.125, |
| "learning_rate": 2.400591903940103e-05, |
| "logits/chosen": -6.754584312438965, |
| "logits/rejected": -6.786032676696777, |
| "logps/chosen": -62.139381408691406, |
| "logps/rejected": -71.784912109375, |
| "loss": 0.4866, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.5153251886367798, |
| "rewards/margins": 0.9975606799125671, |
| "rewards/rejected": -1.5128860473632812, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.15151515151515152, |
| "grad_norm": 2.28125, |
| "learning_rate": 2.431447841008742e-05, |
| "logits/chosen": -6.77377986907959, |
| "logits/rejected": -6.7619805335998535, |
| "logps/chosen": -62.71953201293945, |
| "logps/rejected": -71.13737487792969, |
| "loss": 0.4978, |
| "rewards/accuracies": 0.8843749761581421, |
| "rewards/chosen": -0.5491987466812134, |
| "rewards/margins": 0.8964099884033203, |
| "rewards/rejected": -1.4456088542938232, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.1534330648254699, |
| "grad_norm": 2.125, |
| "learning_rate": 2.4623037780773808e-05, |
| "logits/chosen": -6.71270751953125, |
| "logits/rejected": -6.765826225280762, |
| "logps/chosen": -63.85282516479492, |
| "logps/rejected": -72.63124084472656, |
| "loss": 0.4761, |
| "rewards/accuracies": 0.903124988079071, |
| "rewards/chosen": -0.5679726600646973, |
| "rewards/margins": 0.9939053654670715, |
| "rewards/rejected": -1.5618780851364136, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.15535097813578827, |
| "grad_norm": 2.390625, |
| "learning_rate": 2.4931597151460196e-05, |
| "logits/chosen": -6.759522914886475, |
| "logits/rejected": -6.8248186111450195, |
| "logps/chosen": -63.705726623535156, |
| "logps/rejected": -74.55303955078125, |
| "loss": 0.4626, |
| "rewards/accuracies": 0.9281250238418579, |
| "rewards/chosen": -0.6370080709457397, |
| "rewards/margins": 1.109112024307251, |
| "rewards/rejected": -1.7461200952529907, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.15726889144610665, |
| "grad_norm": 2.984375, |
| "learning_rate": 2.5240156522146588e-05, |
| "logits/chosen": -6.789227485656738, |
| "logits/rejected": -6.754014492034912, |
| "logps/chosen": -58.8348503112793, |
| "logps/rejected": -70.08036804199219, |
| "loss": 0.466, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.4667413830757141, |
| "rewards/margins": 1.0789754390716553, |
| "rewards/rejected": -1.5457168817520142, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.15918680475642502, |
| "grad_norm": 4.5, |
| "learning_rate": 2.5548715892832977e-05, |
| "logits/chosen": -6.742713928222656, |
| "logits/rejected": -6.790808200836182, |
| "logps/chosen": -59.2355842590332, |
| "logps/rejected": -69.20877075195312, |
| "loss": 0.4641, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.35354870557785034, |
| "rewards/margins": 1.1075929403305054, |
| "rewards/rejected": -1.46114182472229, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.1611047180667434, |
| "grad_norm": 2.875, |
| "learning_rate": 2.5857275263519365e-05, |
| "logits/chosen": -6.692159175872803, |
| "logits/rejected": -6.748051643371582, |
| "logps/chosen": -64.951416015625, |
| "logps/rejected": -74.98480224609375, |
| "loss": 0.4567, |
| "rewards/accuracies": 0.940625011920929, |
| "rewards/chosen": -0.5960937738418579, |
| "rewards/margins": 1.1240198612213135, |
| "rewards/rejected": -1.7201133966445923, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.16302263137706174, |
| "grad_norm": 2.0625, |
| "learning_rate": 2.6165834634205754e-05, |
| "logits/chosen": -6.632455348968506, |
| "logits/rejected": -6.703897953033447, |
| "logps/chosen": -64.6573486328125, |
| "logps/rejected": -75.8453140258789, |
| "loss": 0.4431, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.6735265851020813, |
| "rewards/margins": 1.2322088479995728, |
| "rewards/rejected": -1.9057356119155884, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.16494054468738012, |
| "grad_norm": 2.875, |
| "learning_rate": 2.647439400489214e-05, |
| "logits/chosen": -6.690484523773193, |
| "logits/rejected": -6.69437313079834, |
| "logps/chosen": -62.8514404296875, |
| "logps/rejected": -72.82466888427734, |
| "loss": 0.4438, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.622053861618042, |
| "rewards/margins": 1.215821385383606, |
| "rewards/rejected": -1.8378751277923584, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1668584579976985, |
| "grad_norm": 3.984375, |
| "learning_rate": 2.6782953375578528e-05, |
| "logits/chosen": -6.712440490722656, |
| "logits/rejected": -6.714225769042969, |
| "logps/chosen": -65.16419219970703, |
| "logps/rejected": -77.28175354003906, |
| "loss": 0.4356, |
| "rewards/accuracies": 0.940625011920929, |
| "rewards/chosen": -0.7391894459724426, |
| "rewards/margins": 1.278839349746704, |
| "rewards/rejected": -2.018028736114502, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.16877637130801687, |
| "grad_norm": 3.34375, |
| "learning_rate": 2.7091512746264916e-05, |
| "logits/chosen": -6.68596887588501, |
| "logits/rejected": -6.724273681640625, |
| "logps/chosen": -64.77339172363281, |
| "logps/rejected": -76.25152587890625, |
| "loss": 0.4358, |
| "rewards/accuracies": 0.9468749761581421, |
| "rewards/chosen": -0.7368718385696411, |
| "rewards/margins": 1.236840009689331, |
| "rewards/rejected": -1.973711609840393, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.17069428461833525, |
| "grad_norm": 2.46875, |
| "learning_rate": 2.740007211695131e-05, |
| "logits/chosen": -6.6403093338012695, |
| "logits/rejected": -6.6900529861450195, |
| "logps/chosen": -64.81290435791016, |
| "logps/rejected": -77.3272476196289, |
| "loss": 0.4259, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -0.5889671444892883, |
| "rewards/margins": 1.3001738786697388, |
| "rewards/rejected": -1.8891408443450928, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.17261219792865362, |
| "grad_norm": 3.828125, |
| "learning_rate": 2.7708631487637697e-05, |
| "logits/chosen": -6.612371921539307, |
| "logits/rejected": -6.659645080566406, |
| "logps/chosen": -61.89921951293945, |
| "logps/rejected": -74.57048034667969, |
| "loss": 0.4253, |
| "rewards/accuracies": 0.9593750238418579, |
| "rewards/chosen": -0.665620744228363, |
| "rewards/margins": 1.361120343208313, |
| "rewards/rejected": -2.0267410278320312, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.174530111238972, |
| "grad_norm": 4.15625, |
| "learning_rate": 2.8017190858324086e-05, |
| "logits/chosen": -6.555197238922119, |
| "logits/rejected": -6.548722743988037, |
| "logps/chosen": -67.78192901611328, |
| "logps/rejected": -80.3170394897461, |
| "loss": 0.4139, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -0.8611640930175781, |
| "rewards/margins": 1.415268063545227, |
| "rewards/rejected": -2.2764322757720947, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.17644802454929037, |
| "grad_norm": 2.625, |
| "learning_rate": 2.832575022901047e-05, |
| "logits/chosen": -6.598601341247559, |
| "logits/rejected": -6.614532470703125, |
| "logps/chosen": -64.5110092163086, |
| "logps/rejected": -76.21466827392578, |
| "loss": 0.4438, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.7278603315353394, |
| "rewards/margins": 1.279840111732483, |
| "rewards/rejected": -2.0077004432678223, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.17836593785960875, |
| "grad_norm": 3.046875, |
| "learning_rate": 2.863430959969686e-05, |
| "logits/chosen": -6.689515113830566, |
| "logits/rejected": -6.717551231384277, |
| "logps/chosen": -62.034454345703125, |
| "logps/rejected": -74.14480590820312, |
| "loss": 0.4283, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.5690684914588928, |
| "rewards/margins": 1.3283374309539795, |
| "rewards/rejected": -1.8974058628082275, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.18028385116992712, |
| "grad_norm": 2.71875, |
| "learning_rate": 2.8942868970383248e-05, |
| "logits/chosen": -6.623570442199707, |
| "logits/rejected": -6.677834987640381, |
| "logps/chosen": -64.6675033569336, |
| "logps/rejected": -76.27220153808594, |
| "loss": 0.4101, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -0.6819969415664673, |
| "rewards/margins": 1.3743338584899902, |
| "rewards/rejected": -2.056330680847168, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1822017644802455, |
| "grad_norm": 12.8125, |
| "learning_rate": 2.9251428341069636e-05, |
| "logits/chosen": -6.536647796630859, |
| "logits/rejected": -6.574659824371338, |
| "logps/chosen": -67.92814636230469, |
| "logps/rejected": -81.60063934326172, |
| "loss": 0.3935, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -1.0204700231552124, |
| "rewards/margins": 1.5342200994491577, |
| "rewards/rejected": -2.554690361022949, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.18411967779056387, |
| "grad_norm": 3.125, |
| "learning_rate": 2.955998771175603e-05, |
| "logits/chosen": -6.626161098480225, |
| "logits/rejected": -6.705386161804199, |
| "logps/chosen": -62.2401008605957, |
| "logps/rejected": -77.29728698730469, |
| "loss": 0.4065, |
| "rewards/accuracies": 0.956250011920929, |
| "rewards/chosen": -0.7265374064445496, |
| "rewards/margins": 1.4822876453399658, |
| "rewards/rejected": -2.208824634552002, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.18603759110088225, |
| "grad_norm": 2.015625, |
| "learning_rate": 2.9868547082442417e-05, |
| "logits/chosen": -6.6063337326049805, |
| "logits/rejected": -6.613149166107178, |
| "logps/chosen": -69.5428237915039, |
| "logps/rejected": -83.10125732421875, |
| "loss": 0.3982, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -0.9311842918395996, |
| "rewards/margins": 1.4642444849014282, |
| "rewards/rejected": -2.3954286575317383, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.18795550441120062, |
| "grad_norm": 6.71875, |
| "learning_rate": 3.0177106453128806e-05, |
| "logits/chosen": -6.5636162757873535, |
| "logits/rejected": -6.586276054382324, |
| "logps/chosen": -64.01178741455078, |
| "logps/rejected": -77.87983703613281, |
| "loss": 0.3923, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -0.789256751537323, |
| "rewards/margins": 1.5717377662658691, |
| "rewards/rejected": -2.360994577407837, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.189873417721519, |
| "grad_norm": 2.28125, |
| "learning_rate": 3.048566582381519e-05, |
| "logits/chosen": -6.557529449462891, |
| "logits/rejected": -6.565741539001465, |
| "logps/chosen": -67.82279968261719, |
| "logps/rejected": -82.83336639404297, |
| "loss": 0.3884, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -0.9904484748840332, |
| "rewards/margins": 1.550180196762085, |
| "rewards/rejected": -2.540628671646118, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.19179133103183735, |
| "grad_norm": 5.625, |
| "learning_rate": 3.079422519450158e-05, |
| "logits/chosen": -6.607028961181641, |
| "logits/rejected": -6.654728889465332, |
| "logps/chosen": -60.82036209106445, |
| "logps/rejected": -76.42386627197266, |
| "loss": 0.4073, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -0.6843277215957642, |
| "rewards/margins": 1.5797810554504395, |
| "rewards/rejected": -2.2641091346740723, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19179133103183735, |
| "eval_logits/chosen": -6.560471057891846, |
| "eval_logits/rejected": -6.7035040855407715, |
| "eval_logps/chosen": -64.2278823852539, |
| "eval_logps/rejected": -80.88381958007812, |
| "eval_loss": 0.3989410698413849, |
| "eval_rewards/accuracies": 0.9549999833106995, |
| "eval_rewards/chosen": -0.877504289150238, |
| "eval_rewards/margins": 1.5815826654434204, |
| "eval_rewards/rejected": -2.4590871334075928, |
| "eval_runtime": 5.3821, |
| "eval_samples_per_second": 37.16, |
| "eval_steps_per_second": 37.16, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.19370924434215572, |
| "grad_norm": 3.765625, |
| "learning_rate": 3.110278456518797e-05, |
| "logits/chosen": -6.609318733215332, |
| "logits/rejected": -6.6263556480407715, |
| "logps/chosen": -71.6617202758789, |
| "logps/rejected": -85.51215362548828, |
| "loss": 0.3906, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -0.9766203165054321, |
| "rewards/margins": 1.5865925550460815, |
| "rewards/rejected": -2.5632128715515137, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.1956271576524741, |
| "grad_norm": 3.625, |
| "learning_rate": 3.141134393587436e-05, |
| "logits/chosen": -6.633000373840332, |
| "logits/rejected": -6.646389961242676, |
| "logps/chosen": -69.64385986328125, |
| "logps/rejected": -86.03681945800781, |
| "loss": 0.3837, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -0.9670968055725098, |
| "rewards/margins": 1.6623718738555908, |
| "rewards/rejected": -2.6294689178466797, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.19754507096279247, |
| "grad_norm": 4.78125, |
| "learning_rate": 3.171990330656075e-05, |
| "logits/chosen": -6.478829860687256, |
| "logits/rejected": -6.520793914794922, |
| "logps/chosen": -70.28801727294922, |
| "logps/rejected": -86.75557708740234, |
| "loss": 0.3871, |
| "rewards/accuracies": 0.971875011920929, |
| "rewards/chosen": -1.1147288084030151, |
| "rewards/margins": 1.6480423212051392, |
| "rewards/rejected": -2.7627711296081543, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.19946298427311085, |
| "grad_norm": 3.734375, |
| "learning_rate": 3.202846267724714e-05, |
| "logits/chosen": -6.553919792175293, |
| "logits/rejected": -6.541110038757324, |
| "logps/chosen": -62.589447021484375, |
| "logps/rejected": -77.74308776855469, |
| "loss": 0.3918, |
| "rewards/accuracies": 0.965624988079071, |
| "rewards/chosen": -0.7898917198181152, |
| "rewards/margins": 1.6853268146514893, |
| "rewards/rejected": -2.4752185344696045, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.20138089758342922, |
| "grad_norm": 2.109375, |
| "learning_rate": 3.233702204793352e-05, |
| "logits/chosen": -6.471434593200684, |
| "logits/rejected": -6.537613868713379, |
| "logps/chosen": -62.585060119628906, |
| "logps/rejected": -79.62939453125, |
| "loss": 0.3703, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -0.7669985890388489, |
| "rewards/margins": 1.7265609502792358, |
| "rewards/rejected": -2.4935598373413086, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.2032988108937476, |
| "grad_norm": 3.25, |
| "learning_rate": 3.264558141861991e-05, |
| "logits/chosen": -6.616142272949219, |
| "logits/rejected": -6.6340813636779785, |
| "logps/chosen": -63.94733810424805, |
| "logps/rejected": -81.17515563964844, |
| "loss": 0.374, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -0.783534586429596, |
| "rewards/margins": 1.6740057468414307, |
| "rewards/rejected": -2.4575400352478027, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.20521672420406598, |
| "grad_norm": 2.859375, |
| "learning_rate": 3.29541407893063e-05, |
| "logits/chosen": -6.532637119293213, |
| "logits/rejected": -6.545807838439941, |
| "logps/chosen": -69.32952117919922, |
| "logps/rejected": -85.47936248779297, |
| "loss": 0.3789, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -0.9866589307785034, |
| "rewards/margins": 1.7148005962371826, |
| "rewards/rejected": -2.7014594078063965, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.20713463751438435, |
| "grad_norm": 1.359375, |
| "learning_rate": 3.326270015999269e-05, |
| "logits/chosen": -6.423262596130371, |
| "logits/rejected": -6.4812421798706055, |
| "logps/chosen": -63.51520919799805, |
| "logps/rejected": -80.93110656738281, |
| "loss": 0.3684, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -0.7824930548667908, |
| "rewards/margins": 1.7671178579330444, |
| "rewards/rejected": -2.5496110916137695, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.20905255082470273, |
| "grad_norm": 2.28125, |
| "learning_rate": 3.357125953067908e-05, |
| "logits/chosen": -6.493915557861328, |
| "logits/rejected": -6.586592197418213, |
| "logps/chosen": -64.7423095703125, |
| "logps/rejected": -81.45304870605469, |
| "loss": 0.359, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -0.8664926290512085, |
| "rewards/margins": 1.804779052734375, |
| "rewards/rejected": -2.671271800994873, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.2109704641350211, |
| "grad_norm": 1.59375, |
| "learning_rate": 3.3879818901365465e-05, |
| "logits/chosen": -6.5301971435546875, |
| "logits/rejected": -6.591165065765381, |
| "logps/chosen": -63.6221809387207, |
| "logps/rejected": -82.46671295166016, |
| "loss": 0.3567, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -0.8773013353347778, |
| "rewards/margins": 1.8641706705093384, |
| "rewards/rejected": -2.741472005844116, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.21288837744533948, |
| "grad_norm": 3.359375, |
| "learning_rate": 3.418837827205186e-05, |
| "logits/chosen": -6.451764106750488, |
| "logits/rejected": -6.493180274963379, |
| "logps/chosen": -66.15047454833984, |
| "logps/rejected": -83.76017761230469, |
| "loss": 0.3553, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -0.870043933391571, |
| "rewards/margins": 1.9231412410736084, |
| "rewards/rejected": -2.793184995651245, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.21480629075565785, |
| "grad_norm": 1.75, |
| "learning_rate": 3.449693764273824e-05, |
| "logits/chosen": -6.4344892501831055, |
| "logits/rejected": -6.489321708679199, |
| "logps/chosen": -70.24534606933594, |
| "logps/rejected": -87.98912048339844, |
| "loss": 0.3585, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -1.126381278038025, |
| "rewards/margins": 1.8942753076553345, |
| "rewards/rejected": -3.0206568241119385, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.21672420406597623, |
| "grad_norm": 6.25, |
| "learning_rate": 3.4805497013424635e-05, |
| "logits/chosen": -6.488032341003418, |
| "logits/rejected": -6.582372188568115, |
| "logps/chosen": -66.94841766357422, |
| "logps/rejected": -84.56275177001953, |
| "loss": 0.3636, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -0.9088627696037292, |
| "rewards/margins": 1.8437904119491577, |
| "rewards/rejected": -2.7526535987854004, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.2186421173762946, |
| "grad_norm": 1.6640625, |
| "learning_rate": 3.511405638411102e-05, |
| "logits/chosen": -6.510122776031494, |
| "logits/rejected": -6.623865604400635, |
| "logps/chosen": -68.36225128173828, |
| "logps/rejected": -87.87826538085938, |
| "loss": 0.3495, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -1.0378071069717407, |
| "rewards/margins": 1.9340025186538696, |
| "rewards/rejected": -2.9718098640441895, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.22056003068661298, |
| "grad_norm": 1.8125, |
| "learning_rate": 3.542261575479741e-05, |
| "logits/chosen": -6.497043609619141, |
| "logits/rejected": -6.4754462242126465, |
| "logps/chosen": -66.7702407836914, |
| "logps/rejected": -84.40606689453125, |
| "loss": 0.3512, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -0.9813149571418762, |
| "rewards/margins": 1.8828579187393188, |
| "rewards/rejected": -2.86417293548584, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.22247794399693133, |
| "grad_norm": 0.9140625, |
| "learning_rate": 3.57311751254838e-05, |
| "logits/chosen": -6.491125583648682, |
| "logits/rejected": -6.545646667480469, |
| "logps/chosen": -65.3485336303711, |
| "logps/rejected": -84.4004135131836, |
| "loss": 0.3476, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -0.8092457056045532, |
| "rewards/margins": 1.9012104272842407, |
| "rewards/rejected": -2.710456371307373, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.2243958573072497, |
| "grad_norm": 1.109375, |
| "learning_rate": 3.603973449617019e-05, |
| "logits/chosen": -6.469923496246338, |
| "logits/rejected": -6.535366058349609, |
| "logps/chosen": -66.1703872680664, |
| "logps/rejected": -84.5575180053711, |
| "loss": 0.3404, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8879055976867676, |
| "rewards/margins": 2.0098865032196045, |
| "rewards/rejected": -2.897792100906372, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.22631377061756808, |
| "grad_norm": 7.125, |
| "learning_rate": 3.6348293866856574e-05, |
| "logits/chosen": -6.473888397216797, |
| "logits/rejected": -6.5238189697265625, |
| "logps/chosen": -67.31280517578125, |
| "logps/rejected": -84.41548156738281, |
| "loss": 0.3516, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -0.9976273775100708, |
| "rewards/margins": 1.9460136890411377, |
| "rewards/rejected": -2.943641185760498, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.22823168392788645, |
| "grad_norm": 2.625, |
| "learning_rate": 3.665685323754296e-05, |
| "logits/chosen": -6.507689476013184, |
| "logits/rejected": -6.5280866622924805, |
| "logps/chosen": -67.81330108642578, |
| "logps/rejected": -87.51911163330078, |
| "loss": 0.3408, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9383264780044556, |
| "rewards/margins": 2.0480716228485107, |
| "rewards/rejected": -2.9863979816436768, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.23014959723820483, |
| "grad_norm": 0.5703125, |
| "learning_rate": 3.696541260822935e-05, |
| "logits/chosen": -6.59119176864624, |
| "logits/rejected": -6.690882682800293, |
| "logps/chosen": -66.38182067871094, |
| "logps/rejected": -84.89421844482422, |
| "loss": 0.3384, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8638726472854614, |
| "rewards/margins": 2.045499324798584, |
| "rewards/rejected": -2.909372568130493, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2320675105485232, |
| "grad_norm": 4.03125, |
| "learning_rate": 3.727397197891574e-05, |
| "logits/chosen": -6.517537593841553, |
| "logits/rejected": -6.529317378997803, |
| "logps/chosen": -71.48027038574219, |
| "logps/rejected": -92.27767944335938, |
| "loss": 0.341, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -1.1382275819778442, |
| "rewards/margins": 2.070920705795288, |
| "rewards/rejected": -3.209148406982422, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.23398542385884158, |
| "grad_norm": 0.9609375, |
| "learning_rate": 3.758253134960213e-05, |
| "logits/chosen": -6.474556922912598, |
| "logits/rejected": -6.567405700683594, |
| "logps/chosen": -71.85388946533203, |
| "logps/rejected": -92.21666717529297, |
| "loss": 0.3391, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -1.1598782539367676, |
| "rewards/margins": 2.081530809402466, |
| "rewards/rejected": -3.2414088249206543, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.23590333716915995, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.789109072028852e-05, |
| "logits/chosen": -6.5861101150512695, |
| "logits/rejected": -6.617328643798828, |
| "logps/chosen": -64.79547119140625, |
| "logps/rejected": -86.27708435058594, |
| "loss": 0.337, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9149611592292786, |
| "rewards/margins": 2.0861799716949463, |
| "rewards/rejected": -3.001140832901001, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.23782125047947833, |
| "grad_norm": 0.7109375, |
| "learning_rate": 3.819965009097491e-05, |
| "logits/chosen": -6.546975612640381, |
| "logits/rejected": -6.6057891845703125, |
| "logps/chosen": -71.35441589355469, |
| "logps/rejected": -91.72935485839844, |
| "loss": 0.3345, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0706521272659302, |
| "rewards/margins": 2.114802837371826, |
| "rewards/rejected": -3.185454845428467, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2397391637897967, |
| "grad_norm": 1.765625, |
| "learning_rate": 3.850820946166129e-05, |
| "logits/chosen": -6.566898345947266, |
| "logits/rejected": -6.611680507659912, |
| "logps/chosen": -64.45490264892578, |
| "logps/rejected": -84.6996841430664, |
| "loss": 0.3393, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -0.8809040784835815, |
| "rewards/margins": 2.0736887454986572, |
| "rewards/rejected": -2.9545929431915283, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.24165707710011508, |
| "grad_norm": 4.1875, |
| "learning_rate": 3.881676883234768e-05, |
| "logits/chosen": -6.510687351226807, |
| "logits/rejected": -6.579364776611328, |
| "logps/chosen": -69.7341537475586, |
| "logps/rejected": -89.81876373291016, |
| "loss": 0.3369, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -1.0892380475997925, |
| "rewards/margins": 2.1302390098571777, |
| "rewards/rejected": -3.2194771766662598, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.24357499041043346, |
| "grad_norm": 1.3828125, |
| "learning_rate": 3.9125328203034075e-05, |
| "logits/chosen": -6.530572414398193, |
| "logits/rejected": -6.635471343994141, |
| "logps/chosen": -63.116294860839844, |
| "logps/rejected": -82.94602966308594, |
| "loss": 0.336, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8233559727668762, |
| "rewards/margins": 2.076512575149536, |
| "rewards/rejected": -2.8998684883117676, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.24549290372075183, |
| "grad_norm": 0.703125, |
| "learning_rate": 3.943388757372046e-05, |
| "logits/chosen": -6.4983673095703125, |
| "logits/rejected": -6.571094512939453, |
| "logps/chosen": -67.33332824707031, |
| "logps/rejected": -87.63394927978516, |
| "loss": 0.333, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9015899896621704, |
| "rewards/margins": 2.184861660003662, |
| "rewards/rejected": -3.086452007293701, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2474108170310702, |
| "grad_norm": 0.80859375, |
| "learning_rate": 3.974244694440685e-05, |
| "logits/chosen": -6.597878932952881, |
| "logits/rejected": -6.640139579772949, |
| "logps/chosen": -65.09263610839844, |
| "logps/rejected": -85.24698638916016, |
| "loss": 0.3356, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8725703954696655, |
| "rewards/margins": 2.157386302947998, |
| "rewards/rejected": -3.029956817626953, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.24932873034138858, |
| "grad_norm": 0.78125, |
| "learning_rate": 4.005100631509324e-05, |
| "logits/chosen": -6.532644748687744, |
| "logits/rejected": -6.656785488128662, |
| "logps/chosen": -61.70920944213867, |
| "logps/rejected": -83.08709716796875, |
| "loss": 0.3481, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -0.7342279553413391, |
| "rewards/margins": 2.1510305404663086, |
| "rewards/rejected": -2.885258197784424, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.25124664365170696, |
| "grad_norm": 2.796875, |
| "learning_rate": 4.035956568577963e-05, |
| "logits/chosen": -6.683371067047119, |
| "logits/rejected": -6.702568054199219, |
| "logps/chosen": -60.66215896606445, |
| "logps/rejected": -81.75194549560547, |
| "loss": 0.334, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7626537680625916, |
| "rewards/margins": 2.157379627227783, |
| "rewards/rejected": -2.9200336933135986, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.25316455696202533, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.0668125056466014e-05, |
| "logits/chosen": -6.600775718688965, |
| "logits/rejected": -6.707993507385254, |
| "logps/chosen": -63.812767028808594, |
| "logps/rejected": -84.95133209228516, |
| "loss": 0.3337, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -0.8133424520492554, |
| "rewards/margins": 2.163587808609009, |
| "rewards/rejected": -2.9769299030303955, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2550824702723437, |
| "grad_norm": 4.875, |
| "learning_rate": 4.0976684427152406e-05, |
| "logits/chosen": -6.641859531402588, |
| "logits/rejected": -6.716789245605469, |
| "logps/chosen": -61.890541076660156, |
| "logps/rejected": -82.82421875, |
| "loss": 0.3364, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -0.5735510587692261, |
| "rewards/margins": 2.1314024925231934, |
| "rewards/rejected": -2.704953908920288, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.2570003835826621, |
| "grad_norm": 0.734375, |
| "learning_rate": 4.128524379783879e-05, |
| "logits/chosen": -6.6130547523498535, |
| "logits/rejected": -6.662297248840332, |
| "logps/chosen": -63.92805862426758, |
| "logps/rejected": -85.00175476074219, |
| "loss": 0.3333, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7270557880401611, |
| "rewards/margins": 2.136582612991333, |
| "rewards/rejected": -2.863638401031494, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.25891829689298046, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.1593803168525184e-05, |
| "logits/chosen": -6.675729274749756, |
| "logits/rejected": -6.806771755218506, |
| "logps/chosen": -63.05427169799805, |
| "logps/rejected": -84.34765625, |
| "loss": 0.3301, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6259730458259583, |
| "rewards/margins": 2.159316062927246, |
| "rewards/rejected": -2.7852890491485596, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.26083621020329883, |
| "grad_norm": 3.859375, |
| "learning_rate": 4.190236253921157e-05, |
| "logits/chosen": -6.6718950271606445, |
| "logits/rejected": -6.7197771072387695, |
| "logps/chosen": -59.5505485534668, |
| "logps/rejected": -79.26701354980469, |
| "loss": 0.3416, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -0.5589812994003296, |
| "rewards/margins": 2.1066300868988037, |
| "rewards/rejected": -2.6656112670898438, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.2627541235136172, |
| "grad_norm": 1.78125, |
| "learning_rate": 4.221092190989796e-05, |
| "logits/chosen": -6.631156921386719, |
| "logits/rejected": -6.725895881652832, |
| "logps/chosen": -64.82130432128906, |
| "logps/rejected": -85.05762481689453, |
| "loss": 0.3312, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7829705476760864, |
| "rewards/margins": 2.1255249977111816, |
| "rewards/rejected": -2.9084956645965576, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.2646720368239356, |
| "grad_norm": 0.765625, |
| "learning_rate": 4.2519481280584346e-05, |
| "logits/chosen": -6.656712532043457, |
| "logits/rejected": -6.712060451507568, |
| "logps/chosen": -64.23103332519531, |
| "logps/rejected": -85.4850845336914, |
| "loss": 0.3392, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -0.6182041168212891, |
| "rewards/margins": 2.193781852722168, |
| "rewards/rejected": -2.811986207962036, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.2665899501342539, |
| "grad_norm": 8.3125, |
| "learning_rate": 4.282804065127073e-05, |
| "logits/chosen": -6.607521057128906, |
| "logits/rejected": -6.724272727966309, |
| "logps/chosen": -62.71710205078125, |
| "logps/rejected": -83.67607879638672, |
| "loss": 0.3385, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -0.5952963829040527, |
| "rewards/margins": 2.1687827110290527, |
| "rewards/rejected": -2.7640790939331055, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.2685078634445723, |
| "grad_norm": 1.2421875, |
| "learning_rate": 4.313660002195712e-05, |
| "logits/chosen": -6.672816276550293, |
| "logits/rejected": -6.760655403137207, |
| "logps/chosen": -61.5716667175293, |
| "logps/rejected": -82.35960388183594, |
| "loss": 0.3339, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -0.5193628668785095, |
| "rewards/margins": 2.136096715927124, |
| "rewards/rejected": -2.6554598808288574, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.27042577675489066, |
| "grad_norm": 6.90625, |
| "learning_rate": 4.3445159392643515e-05, |
| "logits/chosen": -6.69228458404541, |
| "logits/rejected": -6.7472124099731445, |
| "logps/chosen": -70.73759460449219, |
| "logps/rejected": -91.33528900146484, |
| "loss": 0.3363, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -0.826391339302063, |
| "rewards/margins": 2.150942802429199, |
| "rewards/rejected": -2.9773337841033936, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.27234369006520903, |
| "grad_norm": 1.296875, |
| "learning_rate": 4.37537187633299e-05, |
| "logits/chosen": -6.696518898010254, |
| "logits/rejected": -6.7846574783325195, |
| "logps/chosen": -61.52152633666992, |
| "logps/rejected": -82.66785430908203, |
| "loss": 0.3322, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5135270357131958, |
| "rewards/margins": 2.1553821563720703, |
| "rewards/rejected": -2.6689088344573975, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2742616033755274, |
| "grad_norm": 0.828125, |
| "learning_rate": 4.406227813401629e-05, |
| "logits/chosen": -6.7538933753967285, |
| "logits/rejected": -6.772116661071777, |
| "logps/chosen": -62.2570686340332, |
| "logps/rejected": -83.71648406982422, |
| "loss": 0.3309, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6405705213546753, |
| "rewards/margins": 2.1925716400146484, |
| "rewards/rejected": -2.8331425189971924, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.2761795166858458, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.4370837504702684e-05, |
| "logits/chosen": -6.731971740722656, |
| "logits/rejected": -6.773854732513428, |
| "logps/chosen": -65.49516296386719, |
| "logps/rejected": -86.7280502319336, |
| "loss": 0.3311, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7476341724395752, |
| "rewards/margins": 2.2033274173736572, |
| "rewards/rejected": -2.9509613513946533, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.27809742999616416, |
| "grad_norm": 0.60546875, |
| "learning_rate": 4.467939687538906e-05, |
| "logits/chosen": -6.718592166900635, |
| "logits/rejected": -6.75787878036499, |
| "logps/chosen": -60.866233825683594, |
| "logps/rejected": -82.61892700195312, |
| "loss": 0.33, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5662195682525635, |
| "rewards/margins": 2.2021169662475586, |
| "rewards/rejected": -2.768336534500122, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.28001534330648253, |
| "grad_norm": 5.9375, |
| "learning_rate": 4.4987956246075455e-05, |
| "logits/chosen": -6.6921539306640625, |
| "logits/rejected": -6.746490478515625, |
| "logps/chosen": -61.933387756347656, |
| "logps/rejected": -83.12504577636719, |
| "loss": 0.3304, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -0.6692690253257751, |
| "rewards/margins": 2.167955160140991, |
| "rewards/rejected": -2.8372244834899902, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2819332566168009, |
| "grad_norm": 1.21875, |
| "learning_rate": 4.529651561676185e-05, |
| "logits/chosen": -6.7022576332092285, |
| "logits/rejected": -6.752945899963379, |
| "logps/chosen": -64.28565216064453, |
| "logps/rejected": -86.43739318847656, |
| "loss": 0.3303, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7322890162467957, |
| "rewards/margins": 2.194568157196045, |
| "rewards/rejected": -2.9268569946289062, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.2838511699271193, |
| "grad_norm": 0.45703125, |
| "learning_rate": 4.560507498744823e-05, |
| "logits/chosen": -6.755038261413574, |
| "logits/rejected": -6.783468723297119, |
| "logps/chosen": -66.19497680664062, |
| "logps/rejected": -86.26811218261719, |
| "loss": 0.3334, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -0.7362627387046814, |
| "rewards/margins": 2.1654791831970215, |
| "rewards/rejected": -2.9017419815063477, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.28576908323743766, |
| "grad_norm": 0.54296875, |
| "learning_rate": 4.5913634358134624e-05, |
| "logits/chosen": -6.751487731933594, |
| "logits/rejected": -6.81637716293335, |
| "logps/chosen": -63.06780242919922, |
| "logps/rejected": -84.47251892089844, |
| "loss": 0.3296, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5739712119102478, |
| "rewards/margins": 2.154513120651245, |
| "rewards/rejected": -2.7284841537475586, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.28768699654775604, |
| "grad_norm": 0.609375, |
| "learning_rate": 4.622219372882101e-05, |
| "logits/chosen": -6.6978349685668945, |
| "logits/rejected": -6.745043754577637, |
| "logps/chosen": -63.39635467529297, |
| "logps/rejected": -82.64900207519531, |
| "loss": 0.3317, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -0.5827551484107971, |
| "rewards/margins": 2.2173619270324707, |
| "rewards/rejected": -2.800117015838623, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2896049098580744, |
| "grad_norm": 0.84765625, |
| "learning_rate": 4.6530753099507394e-05, |
| "logits/chosen": -6.7169013023376465, |
| "logits/rejected": -6.784239768981934, |
| "logps/chosen": -63.23963165283203, |
| "logps/rejected": -84.25068664550781, |
| "loss": 0.3285, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.643856406211853, |
| "rewards/margins": 2.1869945526123047, |
| "rewards/rejected": -2.8308510780334473, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.2915228231683928, |
| "grad_norm": 0.53515625, |
| "learning_rate": 4.6839312470193786e-05, |
| "logits/chosen": -6.725077152252197, |
| "logits/rejected": -6.785587310791016, |
| "logps/chosen": -65.50286102294922, |
| "logps/rejected": -87.09868621826172, |
| "loss": 0.3333, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -0.8208054304122925, |
| "rewards/margins": 2.1953988075256348, |
| "rewards/rejected": -3.016204357147217, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.29344073647871116, |
| "grad_norm": 0.71875, |
| "learning_rate": 4.714787184088018e-05, |
| "logits/chosen": -6.824525356292725, |
| "logits/rejected": -6.858616828918457, |
| "logps/chosen": -60.581390380859375, |
| "logps/rejected": -81.6998062133789, |
| "loss": 0.3279, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5427888631820679, |
| "rewards/margins": 2.190028190612793, |
| "rewards/rejected": -2.732816696166992, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.29535864978902954, |
| "grad_norm": 1.953125, |
| "learning_rate": 4.7456431211566563e-05, |
| "logits/chosen": -6.723073482513428, |
| "logits/rejected": -6.727642059326172, |
| "logps/chosen": -64.07035827636719, |
| "logps/rejected": -85.05512237548828, |
| "loss": 0.3304, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6974167823791504, |
| "rewards/margins": 2.1716017723083496, |
| "rewards/rejected": -2.8690185546875, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.2972765630993479, |
| "grad_norm": 0.349609375, |
| "learning_rate": 4.7764990582252955e-05, |
| "logits/chosen": -6.807308197021484, |
| "logits/rejected": -6.894169807434082, |
| "logps/chosen": -59.81858444213867, |
| "logps/rejected": -80.54625701904297, |
| "loss": 0.3289, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5121219754219055, |
| "rewards/margins": 2.172778606414795, |
| "rewards/rejected": -2.6849007606506348, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.2991944764096663, |
| "grad_norm": 0.68359375, |
| "learning_rate": 4.807354995293934e-05, |
| "logits/chosen": -6.723723411560059, |
| "logits/rejected": -6.834429740905762, |
| "logps/chosen": -60.23179244995117, |
| "logps/rejected": -81.87191772460938, |
| "loss": 0.329, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4345468580722809, |
| "rewards/margins": 2.197113275527954, |
| "rewards/rejected": -2.631659984588623, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.30111238971998466, |
| "grad_norm": 0.47265625, |
| "learning_rate": 4.832039506658243e-05, |
| "logits/chosen": -6.752009391784668, |
| "logits/rejected": -6.826085567474365, |
| "logps/chosen": -64.78627014160156, |
| "logps/rejected": -86.14216613769531, |
| "loss": 0.3291, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7065446972846985, |
| "rewards/margins": 2.1886019706726074, |
| "rewards/rejected": -2.8951468467712402, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.30303030303030304, |
| "grad_norm": 0.431640625, |
| "learning_rate": 4.832031166492162e-05, |
| "logits/chosen": -6.7785844802856445, |
| "logits/rejected": -6.8263349533081055, |
| "logps/chosen": -66.33650207519531, |
| "logps/rejected": -88.02735900878906, |
| "loss": 0.3273, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6940505504608154, |
| "rewards/margins": 2.166769504547119, |
| "rewards/rejected": -2.8608198165893555, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3049482163406214, |
| "grad_norm": 0.458984375, |
| "learning_rate": 4.8320109118434643e-05, |
| "logits/chosen": -6.783939361572266, |
| "logits/rejected": -6.81668758392334, |
| "logps/chosen": -59.873023986816406, |
| "logps/rejected": -80.97574615478516, |
| "loss": 0.3292, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.49882563948631287, |
| "rewards/margins": 2.2213451862335205, |
| "rewards/rejected": -2.720170736312866, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.3068661296509398, |
| "grad_norm": 0.72265625, |
| "learning_rate": 4.8319787428130454e-05, |
| "logits/chosen": -6.754337310791016, |
| "logits/rejected": -6.8234124183654785, |
| "logps/chosen": -63.73439407348633, |
| "logps/rejected": -85.82870483398438, |
| "loss": 0.3271, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6053725481033325, |
| "rewards/margins": 2.2051076889038086, |
| "rewards/rejected": -2.8104803562164307, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.30878404296125816, |
| "grad_norm": 0.609375, |
| "learning_rate": 4.8319346595611474e-05, |
| "logits/chosen": -6.786177158355713, |
| "logits/rejected": -6.8356428146362305, |
| "logps/chosen": -66.29222106933594, |
| "logps/rejected": -87.39335632324219, |
| "loss": 0.3287, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7608428001403809, |
| "rewards/margins": 2.2171874046325684, |
| "rewards/rejected": -2.978030204772949, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.31070195627157654, |
| "grad_norm": 0.373046875, |
| "learning_rate": 4.8318786623073625e-05, |
| "logits/chosen": -6.763392448425293, |
| "logits/rejected": -6.827507019042969, |
| "logps/chosen": -64.34239196777344, |
| "logps/rejected": -85.38350677490234, |
| "loss": 0.3284, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6473758220672607, |
| "rewards/margins": 2.150489091873169, |
| "rewards/rejected": -2.797865390777588, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.3126198695818949, |
| "grad_norm": 0.408203125, |
| "learning_rate": 4.831810751330627e-05, |
| "logits/chosen": -6.711167812347412, |
| "logits/rejected": -6.766715049743652, |
| "logps/chosen": -62.98204803466797, |
| "logps/rejected": -84.40223693847656, |
| "loss": 0.3275, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5519531965255737, |
| "rewards/margins": 2.2026820182800293, |
| "rewards/rejected": -2.7546346187591553, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.3145377828922133, |
| "grad_norm": 0.478515625, |
| "learning_rate": 4.8317309269692265e-05, |
| "logits/chosen": -6.660008430480957, |
| "logits/rejected": -6.69751501083374, |
| "logps/chosen": -63.44572830200195, |
| "logps/rejected": -85.4597396850586, |
| "loss": 0.3272, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6921919584274292, |
| "rewards/margins": 2.1986453533172607, |
| "rewards/rejected": -2.8908371925354004, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.31645569620253167, |
| "grad_norm": 0.158203125, |
| "learning_rate": 4.831639189620787e-05, |
| "logits/chosen": -6.792325019836426, |
| "logits/rejected": -6.890578269958496, |
| "logps/chosen": -61.67216873168945, |
| "logps/rejected": -82.24214935302734, |
| "loss": 0.3265, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4790772497653961, |
| "rewards/margins": 2.210103750228882, |
| "rewards/rejected": -2.689181089401245, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.31837360951285004, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 4.831535539742279e-05, |
| "logits/chosen": -6.7359466552734375, |
| "logits/rejected": -6.817502498626709, |
| "logps/chosen": -66.23463439941406, |
| "logps/rejected": -87.56221008300781, |
| "loss": 0.326, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6128313541412354, |
| "rewards/margins": 2.193281650543213, |
| "rewards/rejected": -2.8061134815216064, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3202915228231684, |
| "grad_norm": 0.33203125, |
| "learning_rate": 4.831419977850011e-05, |
| "logits/chosen": -6.762195587158203, |
| "logits/rejected": -6.8095879554748535, |
| "logps/chosen": -61.4901237487793, |
| "logps/rejected": -82.7410888671875, |
| "loss": 0.3275, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5622529983520508, |
| "rewards/margins": 2.2180874347686768, |
| "rewards/rejected": -2.7803401947021484, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.3222094361334868, |
| "grad_norm": 0.4453125, |
| "learning_rate": 4.8312925045196294e-05, |
| "logits/chosen": -6.715124607086182, |
| "logits/rejected": -6.803443908691406, |
| "logps/chosen": -62.32609176635742, |
| "logps/rejected": -82.6832275390625, |
| "loss": 0.3273, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5614801645278931, |
| "rewards/margins": 2.209786891937256, |
| "rewards/rejected": -2.7712676525115967, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.3241273494438051, |
| "grad_norm": 0.2099609375, |
| "learning_rate": 4.8311531203861164e-05, |
| "logits/chosen": -6.737015724182129, |
| "logits/rejected": -6.792696952819824, |
| "logps/chosen": -63.43701934814453, |
| "logps/rejected": -85.36714935302734, |
| "loss": 0.3266, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6197028756141663, |
| "rewards/margins": 2.200639009475708, |
| "rewards/rejected": -2.8203418254852295, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3260452627541235, |
| "grad_norm": 0.283203125, |
| "learning_rate": 4.83100182614378e-05, |
| "logits/chosen": -6.828474521636963, |
| "logits/rejected": -6.907891273498535, |
| "logps/chosen": -62.228851318359375, |
| "logps/rejected": -84.7411117553711, |
| "loss": 0.3261, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.565654456615448, |
| "rewards/margins": 2.194936752319336, |
| "rewards/rejected": -2.7605910301208496, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.32796317606444186, |
| "grad_norm": 0.12158203125, |
| "learning_rate": 4.830838622546262e-05, |
| "logits/chosen": -6.825277805328369, |
| "logits/rejected": -6.85458517074585, |
| "logps/chosen": -61.36362838745117, |
| "logps/rejected": -83.68403625488281, |
| "loss": 0.3263, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5196749567985535, |
| "rewards/margins": 2.208179235458374, |
| "rewards/rejected": -2.7278542518615723, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.32988108937476024, |
| "grad_norm": 0.2060546875, |
| "learning_rate": 4.830663510406525e-05, |
| "logits/chosen": -6.76092004776001, |
| "logits/rejected": -6.817212104797363, |
| "logps/chosen": -60.813751220703125, |
| "logps/rejected": -82.04981994628906, |
| "loss": 0.326, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5437586903572083, |
| "rewards/margins": 2.1881277561187744, |
| "rewards/rejected": -2.731886386871338, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.3317990026850786, |
| "grad_norm": 0.208984375, |
| "learning_rate": 4.8304764905968524e-05, |
| "logits/chosen": -6.780856132507324, |
| "logits/rejected": -6.783792018890381, |
| "logps/chosen": -59.90398025512695, |
| "logps/rejected": -81.0885009765625, |
| "loss": 0.3264, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5090753436088562, |
| "rewards/margins": 2.1934762001037598, |
| "rewards/rejected": -2.70255184173584, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.333716915995397, |
| "grad_norm": 0.162109375, |
| "learning_rate": 4.830277564048841e-05, |
| "logits/chosen": -6.7684478759765625, |
| "logits/rejected": -6.799691677093506, |
| "logps/chosen": -61.917884826660156, |
| "logps/rejected": -82.83894348144531, |
| "loss": 0.3256, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.520404040813446, |
| "rewards/margins": 2.187943696975708, |
| "rewards/rejected": -2.708347797393799, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.33563482930571537, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 4.8300667317533996e-05, |
| "logits/chosen": -6.776177883148193, |
| "logits/rejected": -6.815954685211182, |
| "logps/chosen": -65.00544738769531, |
| "logps/rejected": -86.48484802246094, |
| "loss": 0.3255, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6944643259048462, |
| "rewards/margins": 2.1923465728759766, |
| "rewards/rejected": -2.8868112564086914, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.33755274261603374, |
| "grad_norm": 0.12255859375, |
| "learning_rate": 4.829843994760745e-05, |
| "logits/chosen": -6.812263488769531, |
| "logits/rejected": -6.835313320159912, |
| "logps/chosen": -57.126991271972656, |
| "logps/rejected": -79.21980285644531, |
| "loss": 0.3255, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4502105712890625, |
| "rewards/margins": 2.1968555450439453, |
| "rewards/rejected": -2.647066116333008, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3394706559263521, |
| "grad_norm": 0.123046875, |
| "learning_rate": 4.829609354180392e-05, |
| "logits/chosen": -6.792092323303223, |
| "logits/rejected": -6.8232903480529785, |
| "logps/chosen": -62.5068359375, |
| "logps/rejected": -82.26536560058594, |
| "loss": 0.3255, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4687983989715576, |
| "rewards/margins": 2.2032909393310547, |
| "rewards/rejected": -2.6720895767211914, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.3413885692366705, |
| "grad_norm": 0.08837890625, |
| "learning_rate": 4.8293628111811505e-05, |
| "logits/chosen": -6.755088806152344, |
| "logits/rejected": -6.835995674133301, |
| "logps/chosen": -63.63855743408203, |
| "logps/rejected": -85.75823211669922, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6732860803604126, |
| "rewards/margins": 2.1958742141723633, |
| "rewards/rejected": -2.8691601753234863, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.34330648254698887, |
| "grad_norm": 0.1865234375, |
| "learning_rate": 4.829104366991121e-05, |
| "logits/chosen": -6.732597351074219, |
| "logits/rejected": -6.824942588806152, |
| "logps/chosen": -62.95014572143555, |
| "logps/rejected": -84.47950744628906, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.569419264793396, |
| "rewards/margins": 2.2020137310028076, |
| "rewards/rejected": -2.7714333534240723, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.34522439585730724, |
| "grad_norm": 0.11767578125, |
| "learning_rate": 4.8288340228976864e-05, |
| "logits/chosen": -6.789787292480469, |
| "logits/rejected": -6.844014644622803, |
| "logps/chosen": -64.68815612792969, |
| "logps/rejected": -85.33354949951172, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.555423378944397, |
| "rewards/margins": 2.1973979473114014, |
| "rewards/rejected": -2.752821445465088, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3471423091676256, |
| "grad_norm": 0.0703125, |
| "learning_rate": 4.828551780247507e-05, |
| "logits/chosen": -6.739270210266113, |
| "logits/rejected": -6.838677406311035, |
| "logps/chosen": -63.15021514892578, |
| "logps/rejected": -85.18204498291016, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6110334992408752, |
| "rewards/margins": 2.194159507751465, |
| "rewards/rejected": -2.8051929473876953, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.349060222477944, |
| "grad_norm": 0.1259765625, |
| "learning_rate": 4.8282576404465136e-05, |
| "logits/chosen": -6.813556671142578, |
| "logits/rejected": -6.86798620223999, |
| "logps/chosen": -61.781394958496094, |
| "logps/rejected": -83.4185562133789, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4605577886104584, |
| "rewards/margins": 2.199518918991089, |
| "rewards/rejected": -2.66007661819458, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.35097813578826237, |
| "grad_norm": 0.06494140625, |
| "learning_rate": 4.8279516049598964e-05, |
| "logits/chosen": -6.776249885559082, |
| "logits/rejected": -6.871140956878662, |
| "logps/chosen": -61.13481521606445, |
| "logps/rejected": -82.49009704589844, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.41385921835899353, |
| "rewards/margins": 2.1962554454803467, |
| "rewards/rejected": -2.6101150512695312, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.35289604909858074, |
| "grad_norm": 0.10107421875, |
| "learning_rate": 4.827633675312108e-05, |
| "logits/chosen": -6.725433349609375, |
| "logits/rejected": -6.784366607666016, |
| "logps/chosen": -61.72522735595703, |
| "logps/rejected": -81.87347412109375, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.49139589071273804, |
| "rewards/margins": 2.188589572906494, |
| "rewards/rejected": -2.679985523223877, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3548139624088991, |
| "grad_norm": 0.123046875, |
| "learning_rate": 4.827303853086843e-05, |
| "logits/chosen": -6.812512397766113, |
| "logits/rejected": -6.855481147766113, |
| "logps/chosen": -58.620384216308594, |
| "logps/rejected": -80.5108642578125, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.48473721742630005, |
| "rewards/margins": 2.198575258255005, |
| "rewards/rejected": -2.68331241607666, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.3567318757192175, |
| "grad_norm": 0.0771484375, |
| "learning_rate": 4.826962139927038e-05, |
| "logits/chosen": -6.829216003417969, |
| "logits/rejected": -6.905343532562256, |
| "logps/chosen": -63.4243278503418, |
| "logps/rejected": -84.9180908203125, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5410398244857788, |
| "rewards/margins": 2.1947712898254395, |
| "rewards/rejected": -2.7358109951019287, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.35864978902953587, |
| "grad_norm": 0.0673828125, |
| "learning_rate": 4.826608537534864e-05, |
| "logits/chosen": -6.665771484375, |
| "logits/rejected": -6.781673431396484, |
| "logps/chosen": -61.3337516784668, |
| "logps/rejected": -83.3028335571289, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5410109758377075, |
| "rewards/margins": 2.192946195602417, |
| "rewards/rejected": -2.733957290649414, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.36056770233985425, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 4.826243047671713e-05, |
| "logits/chosen": -6.8091139793396, |
| "logits/rejected": -6.85195255279541, |
| "logps/chosen": -63.64402389526367, |
| "logps/rejected": -85.47203063964844, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.638896107673645, |
| "rewards/margins": 2.196115255355835, |
| "rewards/rejected": -2.8350117206573486, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.3624856156501726, |
| "grad_norm": 0.103515625, |
| "learning_rate": 4.825865672158193e-05, |
| "logits/chosen": -6.7124199867248535, |
| "logits/rejected": -6.7912397384643555, |
| "logps/chosen": -62.58756637573242, |
| "logps/rejected": -83.9194107055664, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6223362684249878, |
| "rewards/margins": 2.1918787956237793, |
| "rewards/rejected": -2.8142151832580566, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.364403528960491, |
| "grad_norm": 0.06787109375, |
| "learning_rate": 4.825476412874119e-05, |
| "logits/chosen": -6.7734575271606445, |
| "logits/rejected": -6.823586463928223, |
| "logps/chosen": -62.79764938354492, |
| "logps/rejected": -84.3304672241211, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5803407430648804, |
| "rewards/margins": 2.1943869590759277, |
| "rewards/rejected": -2.774728298187256, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.36632144227080937, |
| "grad_norm": 0.06591796875, |
| "learning_rate": 4.8250752717584965e-05, |
| "logits/chosen": -6.757647514343262, |
| "logits/rejected": -6.80877161026001, |
| "logps/chosen": -66.12776947021484, |
| "logps/rejected": -87.11688232421875, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6834413409233093, |
| "rewards/margins": 2.198050022125244, |
| "rewards/rejected": -2.8814916610717773, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.36823935558112775, |
| "grad_norm": 0.0771484375, |
| "learning_rate": 4.8246622508095275e-05, |
| "logits/chosen": -6.766076564788818, |
| "logits/rejected": -6.856036186218262, |
| "logps/chosen": -68.24916076660156, |
| "logps/rejected": -88.81385040283203, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7616792917251587, |
| "rewards/margins": 2.1991758346557617, |
| "rewards/rejected": -2.960855007171631, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3701572688914461, |
| "grad_norm": 0.08056640625, |
| "learning_rate": 4.824237352084582e-05, |
| "logits/chosen": -6.84201192855835, |
| "logits/rejected": -6.853090763092041, |
| "logps/chosen": -63.752159118652344, |
| "logps/rejected": -85.63033294677734, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6369014978408813, |
| "rewards/margins": 2.1962616443634033, |
| "rewards/rejected": -2.833163261413574, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.3720751822017645, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 4.8238005777002006e-05, |
| "logits/chosen": -6.842381477355957, |
| "logits/rejected": -6.8631696701049805, |
| "logps/chosen": -62.98088455200195, |
| "logps/rejected": -84.48819732666016, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5686295032501221, |
| "rewards/margins": 2.1978487968444824, |
| "rewards/rejected": -2.7664780616760254, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.3739930955120829, |
| "grad_norm": 0.06005859375, |
| "learning_rate": 4.82335192983208e-05, |
| "logits/chosen": -6.801692008972168, |
| "logits/rejected": -6.871775150299072, |
| "logps/chosen": -65.15239715576172, |
| "logps/rejected": -85.4663314819336, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.561257004737854, |
| "rewards/margins": 2.1907687187194824, |
| "rewards/rejected": -2.752026081085205, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.37591100882240125, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 4.822891410715061e-05, |
| "logits/chosen": -6.735692024230957, |
| "logits/rejected": -6.789946556091309, |
| "logps/chosen": -59.1342887878418, |
| "logps/rejected": -80.65605926513672, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.43425020575523376, |
| "rewards/margins": 2.1964149475097656, |
| "rewards/rejected": -2.6306653022766113, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.3778289221327196, |
| "grad_norm": 0.04443359375, |
| "learning_rate": 4.822419022643119e-05, |
| "logits/chosen": -6.6819329261779785, |
| "logits/rejected": -6.77020263671875, |
| "logps/chosen": -65.46723937988281, |
| "logps/rejected": -85.34056091308594, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5593077540397644, |
| "rewards/margins": 2.1933341026306152, |
| "rewards/rejected": -2.7526416778564453, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.379746835443038, |
| "grad_norm": 0.06201171875, |
| "learning_rate": 4.821934767969352e-05, |
| "logits/chosen": -6.758429527282715, |
| "logits/rejected": -6.803321838378906, |
| "logps/chosen": -58.53925323486328, |
| "logps/rejected": -80.94148254394531, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5433183908462524, |
| "rewards/margins": 2.1945109367370605, |
| "rewards/rejected": -2.7378292083740234, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.3816647487533564, |
| "grad_norm": 0.05126953125, |
| "learning_rate": 4.8214386491059666e-05, |
| "logits/chosen": -6.7455244064331055, |
| "logits/rejected": -6.8056206703186035, |
| "logps/chosen": -66.12544250488281, |
| "logps/rejected": -86.32331848144531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6424646377563477, |
| "rewards/margins": 2.1983883380889893, |
| "rewards/rejected": -2.840852737426758, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.3835826620636747, |
| "grad_norm": 0.06982421875, |
| "learning_rate": 4.820930668524273e-05, |
| "logits/chosen": -6.71005392074585, |
| "logits/rejected": -6.745247840881348, |
| "logps/chosen": -66.3671646118164, |
| "logps/rejected": -87.68939208984375, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6825047731399536, |
| "rewards/margins": 2.1931846141815186, |
| "rewards/rejected": -2.8756892681121826, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3835826620636747, |
| "eval_logits/chosen": -6.65899658203125, |
| "eval_logits/rejected": -6.858875751495361, |
| "eval_logps/chosen": -61.29154968261719, |
| "eval_logps/rejected": -84.09736633300781, |
| "eval_loss": 0.3251773715019226, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": -0.5838707089424133, |
| "eval_rewards/margins": 2.196570634841919, |
| "eval_rewards/rejected": -2.7804412841796875, |
| "eval_runtime": 5.2595, |
| "eval_samples_per_second": 38.026, |
| "eval_steps_per_second": 38.026, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.38550057537399307, |
| "grad_norm": 0.0966796875, |
| "learning_rate": 4.8204108287546635e-05, |
| "logits/chosen": -6.718655586242676, |
| "logits/rejected": -6.77310037612915, |
| "logps/chosen": -61.59770584106445, |
| "logps/rejected": -82.8775863647461, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5203923583030701, |
| "rewards/margins": 2.196401596069336, |
| "rewards/rejected": -2.716794013977051, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.38741848868431145, |
| "grad_norm": 0.0546875, |
| "learning_rate": 4.8198791323866075e-05, |
| "logits/chosen": -6.782384395599365, |
| "logits/rejected": -6.805130958557129, |
| "logps/chosen": -66.81037139892578, |
| "logps/rejected": -87.42963409423828, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6235989928245544, |
| "rewards/margins": 2.195854663848877, |
| "rewards/rejected": -2.819453716278076, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3893364019946298, |
| "grad_norm": 0.0654296875, |
| "learning_rate": 4.819335582068633e-05, |
| "logits/chosen": -6.907446384429932, |
| "logits/rejected": -6.924661159515381, |
| "logps/chosen": -60.52661895751953, |
| "logps/rejected": -81.73246765136719, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5020779371261597, |
| "rewards/margins": 2.1980807781219482, |
| "rewards/rejected": -2.7001590728759766, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.3912543153049482, |
| "grad_norm": 0.04833984375, |
| "learning_rate": 4.818780180508317e-05, |
| "logits/chosen": -6.781402587890625, |
| "logits/rejected": -6.855971336364746, |
| "logps/chosen": -62.72636795043945, |
| "logps/rejected": -83.95735931396484, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5438644886016846, |
| "rewards/margins": 2.1982574462890625, |
| "rewards/rejected": -2.742121934890747, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.3931722286152666, |
| "grad_norm": 0.06201171875, |
| "learning_rate": 4.818212930472272e-05, |
| "logits/chosen": -6.8052659034729, |
| "logits/rejected": -6.898565769195557, |
| "logps/chosen": -62.053138732910156, |
| "logps/rejected": -83.63028717041016, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5735294222831726, |
| "rewards/margins": 2.195286750793457, |
| "rewards/rejected": -2.7688159942626953, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.39509014192558495, |
| "grad_norm": 0.049072265625, |
| "learning_rate": 4.817633834786127e-05, |
| "logits/chosen": -6.7843756675720215, |
| "logits/rejected": -6.809134006500244, |
| "logps/chosen": -64.53950500488281, |
| "logps/rejected": -85.89805603027344, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5933750867843628, |
| "rewards/margins": 2.1912827491760254, |
| "rewards/rejected": -2.7846579551696777, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.3970080552359033, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 4.8170428963345233e-05, |
| "logits/chosen": -6.7891716957092285, |
| "logits/rejected": -6.815330505371094, |
| "logps/chosen": -66.38642883300781, |
| "logps/rejected": -87.5756607055664, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6958648562431335, |
| "rewards/margins": 2.1894466876983643, |
| "rewards/rejected": -2.8853116035461426, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.3989259685462217, |
| "grad_norm": 0.07763671875, |
| "learning_rate": 4.81644011806109e-05, |
| "logits/chosen": -6.720099449157715, |
| "logits/rejected": -6.809721946716309, |
| "logps/chosen": -61.158897399902344, |
| "logps/rejected": -81.80070495605469, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.520710825920105, |
| "rewards/margins": 2.191119432449341, |
| "rewards/rejected": -2.7118301391601562, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.4008438818565401, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 4.8158255029684364e-05, |
| "logits/chosen": -6.766349792480469, |
| "logits/rejected": -6.818787574768066, |
| "logps/chosen": -63.04213333129883, |
| "logps/rejected": -85.06871032714844, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6426771283149719, |
| "rewards/margins": 2.19403076171875, |
| "rewards/rejected": -2.836707592010498, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.40276179516685845, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 4.815199054118132e-05, |
| "logits/chosen": -6.773943901062012, |
| "logits/rejected": -6.834680080413818, |
| "logps/chosen": -62.253082275390625, |
| "logps/rejected": -83.83917236328125, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4995577931404114, |
| "rewards/margins": 2.1934943199157715, |
| "rewards/rejected": -2.693052053451538, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4046797084771768, |
| "grad_norm": 0.045166015625, |
| "learning_rate": 4.8145607746306934e-05, |
| "logits/chosen": -6.594931602478027, |
| "logits/rejected": -6.683431148529053, |
| "logps/chosen": -66.52024841308594, |
| "logps/rejected": -87.12366485595703, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6391206979751587, |
| "rewards/margins": 2.1987528800964355, |
| "rewards/rejected": -2.8378734588623047, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.4065976217874952, |
| "grad_norm": 0.0458984375, |
| "learning_rate": 4.8139106676855725e-05, |
| "logits/chosen": -6.760615348815918, |
| "logits/rejected": -6.792774200439453, |
| "logps/chosen": -63.51324462890625, |
| "logps/rejected": -85.3805923461914, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5777386426925659, |
| "rewards/margins": 2.20072865486145, |
| "rewards/rejected": -2.7784667015075684, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.4085155350978136, |
| "grad_norm": 0.052734375, |
| "learning_rate": 4.813248736521134e-05, |
| "logits/chosen": -6.811188697814941, |
| "logits/rejected": -6.867379665374756, |
| "logps/chosen": -67.62599182128906, |
| "logps/rejected": -87.9825668334961, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7734149098396301, |
| "rewards/margins": 2.195405960083008, |
| "rewards/rejected": -2.9688210487365723, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.41043344840813195, |
| "grad_norm": 0.052001953125, |
| "learning_rate": 4.812574984434643e-05, |
| "logits/chosen": -6.798884391784668, |
| "logits/rejected": -6.8902130126953125, |
| "logps/chosen": -60.3090705871582, |
| "logps/rejected": -81.30734252929688, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5456782579421997, |
| "rewards/margins": 2.198199987411499, |
| "rewards/rejected": -2.7438783645629883, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4123513617184503, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 4.811889414782247e-05, |
| "logits/chosen": -6.775029182434082, |
| "logits/rejected": -6.823780059814453, |
| "logps/chosen": -62.48878860473633, |
| "logps/rejected": -83.3646011352539, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5705373883247375, |
| "rewards/margins": 2.1929373741149902, |
| "rewards/rejected": -2.763474702835083, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.4142692750287687, |
| "grad_norm": 0.04833984375, |
| "learning_rate": 4.811192030978961e-05, |
| "logits/chosen": -6.747941017150879, |
| "logits/rejected": -6.830478668212891, |
| "logps/chosen": -66.92538452148438, |
| "logps/rejected": -88.23207092285156, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7639543414115906, |
| "rewards/margins": 2.1968750953674316, |
| "rewards/rejected": -2.960829734802246, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.4161871883390871, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 4.810482836498652e-05, |
| "logits/chosen": -6.778285980224609, |
| "logits/rejected": -6.8560028076171875, |
| "logps/chosen": -64.7900161743164, |
| "logps/rejected": -85.25300598144531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5712297558784485, |
| "rewards/margins": 2.196739912033081, |
| "rewards/rejected": -2.767969846725464, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.41810510164940545, |
| "grad_norm": 0.04736328125, |
| "learning_rate": 4.809761834874016e-05, |
| "logits/chosen": -6.8289031982421875, |
| "logits/rejected": -6.893160820007324, |
| "logps/chosen": -59.30530548095703, |
| "logps/rejected": -81.9992446899414, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.48122939467430115, |
| "rewards/margins": 2.1956143379211426, |
| "rewards/rejected": -2.6768441200256348, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.42002301495972383, |
| "grad_norm": 0.048583984375, |
| "learning_rate": 4.809029029696565e-05, |
| "logits/chosen": -6.742526054382324, |
| "logits/rejected": -6.7710466384887695, |
| "logps/chosen": -70.74293518066406, |
| "logps/rejected": -91.24531555175781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7574164867401123, |
| "rewards/margins": 2.1936662197113037, |
| "rewards/rejected": -2.951082706451416, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.4219409282700422, |
| "grad_norm": 0.03955078125, |
| "learning_rate": 4.8082844246166064e-05, |
| "logits/chosen": -6.797771453857422, |
| "logits/rejected": -6.8490142822265625, |
| "logps/chosen": -58.99123001098633, |
| "logps/rejected": -79.65338134765625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4870396554470062, |
| "rewards/margins": 2.196023464202881, |
| "rewards/rejected": -2.68306303024292, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4238588415803606, |
| "grad_norm": 0.04736328125, |
| "learning_rate": 4.8075280233432274e-05, |
| "logits/chosen": -6.773340702056885, |
| "logits/rejected": -6.859117031097412, |
| "logps/chosen": -64.4073486328125, |
| "logps/rejected": -85.19906616210938, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6140931248664856, |
| "rewards/margins": 2.196549654006958, |
| "rewards/rejected": -2.810642719268799, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.42577675489067895, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 4.806759829644277e-05, |
| "logits/chosen": -6.728902339935303, |
| "logits/rejected": -6.808338165283203, |
| "logps/chosen": -61.23081588745117, |
| "logps/rejected": -82.18063354492188, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4775692820549011, |
| "rewards/margins": 2.1966347694396973, |
| "rewards/rejected": -2.674203872680664, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.42769466820099733, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 4.805979847346342e-05, |
| "logits/chosen": -6.670124053955078, |
| "logits/rejected": -6.779568672180176, |
| "logps/chosen": -63.63054656982422, |
| "logps/rejected": -84.89369201660156, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5798670053482056, |
| "rewards/margins": 2.1969175338745117, |
| "rewards/rejected": -2.7767844200134277, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.4296125815113157, |
| "grad_norm": 0.04248046875, |
| "learning_rate": 4.805188080334735e-05, |
| "logits/chosen": -6.852222442626953, |
| "logits/rejected": -6.859063625335693, |
| "logps/chosen": -65.18397521972656, |
| "logps/rejected": -84.5948486328125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5618688464164734, |
| "rewards/margins": 2.198063373565674, |
| "rewards/rejected": -2.759932041168213, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.4315304948216341, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 4.80438453255347e-05, |
| "logits/chosen": -6.80611515045166, |
| "logits/rejected": -6.918792724609375, |
| "logps/chosen": -60.48639678955078, |
| "logps/rejected": -82.47330474853516, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5442097187042236, |
| "rewards/margins": 2.1984286308288574, |
| "rewards/rejected": -2.742638111114502, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.43344840813195246, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 4.8035692080052436e-05, |
| "logits/chosen": -6.697965145111084, |
| "logits/rejected": -6.774870872497559, |
| "logps/chosen": -64.44297790527344, |
| "logps/rejected": -85.90459442138672, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7526843547821045, |
| "rewards/margins": 2.1978108882904053, |
| "rewards/rejected": -2.9504952430725098, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.43536632144227083, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 4.802742110751416e-05, |
| "logits/chosen": -6.803210258483887, |
| "logits/rejected": -6.829536437988281, |
| "logps/chosen": -60.73114776611328, |
| "logps/rejected": -82.74317169189453, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.44673386216163635, |
| "rewards/margins": 2.194598913192749, |
| "rewards/rejected": -2.6413326263427734, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.4372842347525892, |
| "grad_norm": 0.059326171875, |
| "learning_rate": 4.801903244911993e-05, |
| "logits/chosen": -6.766633033752441, |
| "logits/rejected": -6.864239692687988, |
| "logps/chosen": -63.66731643676758, |
| "logps/rejected": -82.88044738769531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5311779379844666, |
| "rewards/margins": 2.1935510635375977, |
| "rewards/rejected": -2.724729061126709, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4392021480629076, |
| "grad_norm": 0.044189453125, |
| "learning_rate": 4.8010526146656e-05, |
| "logits/chosen": -6.751435279846191, |
| "logits/rejected": -6.798174858093262, |
| "logps/chosen": -68.84373474121094, |
| "logps/rejected": -90.4491195678711, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7617834210395813, |
| "rewards/margins": 2.1939213275909424, |
| "rewards/rejected": -2.9557042121887207, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.44112006137322596, |
| "grad_norm": 0.04443359375, |
| "learning_rate": 4.800190224249464e-05, |
| "logits/chosen": -6.790631294250488, |
| "logits/rejected": -6.874907493591309, |
| "logps/chosen": -64.97017669677734, |
| "logps/rejected": -85.68257904052734, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5560901761054993, |
| "rewards/margins": 2.1980504989624023, |
| "rewards/rejected": -2.7541403770446777, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4430379746835443, |
| "grad_norm": 0.04296875, |
| "learning_rate": 4.799316077959397e-05, |
| "logits/chosen": -6.840447902679443, |
| "logits/rejected": -6.897547721862793, |
| "logps/chosen": -61.42547607421875, |
| "logps/rejected": -83.30725860595703, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5857731103897095, |
| "rewards/margins": 2.1990787982940674, |
| "rewards/rejected": -2.784851551055908, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.44495588799386265, |
| "grad_norm": 0.04052734375, |
| "learning_rate": 4.798430180149765e-05, |
| "logits/chosen": -6.734255313873291, |
| "logits/rejected": -6.802587032318115, |
| "logps/chosen": -63.668617248535156, |
| "logps/rejected": -86.23512268066406, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6360193490982056, |
| "rewards/margins": 2.197523593902588, |
| "rewards/rejected": -2.833542823791504, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.44687380130418103, |
| "grad_norm": 0.046875, |
| "learning_rate": 4.797532535233475e-05, |
| "logits/chosen": -6.807023525238037, |
| "logits/rejected": -6.8437395095825195, |
| "logps/chosen": -61.99128341674805, |
| "logps/rejected": -82.16876220703125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5318325161933899, |
| "rewards/margins": 2.200625419616699, |
| "rewards/rejected": -2.7324578762054443, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.4487917146144994, |
| "grad_norm": 0.064453125, |
| "learning_rate": 4.7966231476819484e-05, |
| "logits/chosen": -6.760836124420166, |
| "logits/rejected": -6.797084808349609, |
| "logps/chosen": -60.612220764160156, |
| "logps/rejected": -81.6513442993164, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.574898362159729, |
| "rewards/margins": 2.1929774284362793, |
| "rewards/rejected": -2.7678756713867188, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.4507096279248178, |
| "grad_norm": 0.04150390625, |
| "learning_rate": 4.7957020220251006e-05, |
| "logits/chosen": -6.7507781982421875, |
| "logits/rejected": -6.816678524017334, |
| "logps/chosen": -59.12261199951172, |
| "logps/rejected": -81.34478759765625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4484129548072815, |
| "rewards/margins": 2.1968235969543457, |
| "rewards/rejected": -2.6452364921569824, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.45262754123513615, |
| "grad_norm": 0.040283203125, |
| "learning_rate": 4.7947691628513175e-05, |
| "logits/chosen": -6.8405938148498535, |
| "logits/rejected": -6.911283016204834, |
| "logps/chosen": -62.71659469604492, |
| "logps/rejected": -84.43605041503906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5480086207389832, |
| "rewards/margins": 2.1955459117889404, |
| "rewards/rejected": -2.7435545921325684, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 4.793824574807431e-05, |
| "logits/chosen": -6.7717742919921875, |
| "logits/rejected": -6.848321437835693, |
| "logps/chosen": -62.666900634765625, |
| "logps/rejected": -82.93745422363281, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6121290922164917, |
| "rewards/margins": 2.195594072341919, |
| "rewards/rejected": -2.8077232837677, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.4564633678557729, |
| "grad_norm": 0.06201171875, |
| "learning_rate": 4.7928682625987026e-05, |
| "logits/chosen": -6.753211975097656, |
| "logits/rejected": -6.8527021408081055, |
| "logps/chosen": -63.77888107299805, |
| "logps/rejected": -85.04472351074219, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6296527981758118, |
| "rewards/margins": 2.1975722312927246, |
| "rewards/rejected": -2.8272249698638916, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.4583812811660913, |
| "grad_norm": 0.046630859375, |
| "learning_rate": 4.79190023098879e-05, |
| "logits/chosen": -6.826653957366943, |
| "logits/rejected": -6.868197441101074, |
| "logps/chosen": -63.947975158691406, |
| "logps/rejected": -85.13566589355469, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6934822797775269, |
| "rewards/margins": 2.195202350616455, |
| "rewards/rejected": -2.8886845111846924, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.46029919447640966, |
| "grad_norm": 0.0400390625, |
| "learning_rate": 4.7909204847997314e-05, |
| "logits/chosen": -6.788220405578613, |
| "logits/rejected": -6.831966400146484, |
| "logps/chosen": -60.81682205200195, |
| "logps/rejected": -81.76251220703125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5235145688056946, |
| "rewards/margins": 2.1972155570983887, |
| "rewards/rejected": -2.7207303047180176, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.46221710778672803, |
| "grad_norm": 0.044921875, |
| "learning_rate": 4.789929028911919e-05, |
| "logits/chosen": -6.754181861877441, |
| "logits/rejected": -6.800840854644775, |
| "logps/chosen": -64.92515563964844, |
| "logps/rejected": -86.32291412353516, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7156641483306885, |
| "rewards/margins": 2.1928858757019043, |
| "rewards/rejected": -2.9085500240325928, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.4641350210970464, |
| "grad_norm": 0.04736328125, |
| "learning_rate": 4.7889258682640706e-05, |
| "logits/chosen": -6.868006229400635, |
| "logits/rejected": -6.896157264709473, |
| "logps/chosen": -60.29206085205078, |
| "logps/rejected": -81.28080749511719, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3792288899421692, |
| "rewards/margins": 2.199084758758545, |
| "rewards/rejected": -2.5783140659332275, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4660529344073648, |
| "grad_norm": 0.037109375, |
| "learning_rate": 4.7879110078532146e-05, |
| "logits/chosen": -6.787661552429199, |
| "logits/rejected": -6.892359256744385, |
| "logps/chosen": -60.83192825317383, |
| "logps/rejected": -82.53497314453125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5688813924789429, |
| "rewards/margins": 2.2011029720306396, |
| "rewards/rejected": -2.7699837684631348, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.46797084771768316, |
| "grad_norm": 0.04736328125, |
| "learning_rate": 4.7868844527346537e-05, |
| "logits/chosen": -6.738536834716797, |
| "logits/rejected": -6.749987602233887, |
| "logps/chosen": -62.675140380859375, |
| "logps/rejected": -84.42222595214844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7274782657623291, |
| "rewards/margins": 2.192878007888794, |
| "rewards/rejected": -2.920356035232544, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.46988876102800153, |
| "grad_norm": 0.050537109375, |
| "learning_rate": 4.785846208021948e-05, |
| "logits/chosen": -6.793893337249756, |
| "logits/rejected": -6.861934661865234, |
| "logps/chosen": -61.73297119140625, |
| "logps/rejected": -83.82185363769531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5035967230796814, |
| "rewards/margins": 2.196446657180786, |
| "rewards/rejected": -2.700043201446533, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.4718066743383199, |
| "grad_norm": 0.05078125, |
| "learning_rate": 4.7847962788868864e-05, |
| "logits/chosen": -6.81466817855835, |
| "logits/rejected": -6.926538944244385, |
| "logps/chosen": -59.07057571411133, |
| "logps/rejected": -80.15919494628906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3567991554737091, |
| "rewards/margins": 2.197136163711548, |
| "rewards/rejected": -2.5539352893829346, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.4737245876486383, |
| "grad_norm": 0.058349609375, |
| "learning_rate": 4.783734670559462e-05, |
| "logits/chosen": -6.831311225891113, |
| "logits/rejected": -6.877817630767822, |
| "logps/chosen": -64.96932220458984, |
| "logps/rejected": -85.99290466308594, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6482155919075012, |
| "rewards/margins": 2.1953940391540527, |
| "rewards/rejected": -2.843609571456909, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.47564250095895666, |
| "grad_norm": 0.0390625, |
| "learning_rate": 4.7826613883278425e-05, |
| "logits/chosen": -6.820085048675537, |
| "logits/rejected": -6.860791206359863, |
| "logps/chosen": -59.95061111450195, |
| "logps/rejected": -81.42887878417969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5063437819480896, |
| "rewards/margins": 2.197067975997925, |
| "rewards/rejected": -2.703411817550659, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.47756041426927504, |
| "grad_norm": 0.0419921875, |
| "learning_rate": 4.781576437538349e-05, |
| "logits/chosen": -6.722633361816406, |
| "logits/rejected": -6.769705772399902, |
| "logps/chosen": -62.50212860107422, |
| "logps/rejected": -83.28905487060547, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.486052930355072, |
| "rewards/margins": 2.1938464641571045, |
| "rewards/rejected": -2.6798996925354004, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.4794783275795934, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 4.780479823595428e-05, |
| "logits/chosen": -6.791558265686035, |
| "logits/rejected": -6.841597557067871, |
| "logps/chosen": -60.80095291137695, |
| "logps/rejected": -82.75154113769531, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5802160501480103, |
| "rewards/margins": 2.193262815475464, |
| "rewards/rejected": -2.7734789848327637, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4813962408899118, |
| "grad_norm": 0.05322265625, |
| "learning_rate": 4.7793715519616194e-05, |
| "logits/chosen": -6.77915096282959, |
| "logits/rejected": -6.826117515563965, |
| "logps/chosen": -61.505638122558594, |
| "logps/rejected": -81.8968505859375, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.44169655442237854, |
| "rewards/margins": 2.194701671600342, |
| "rewards/rejected": -2.6363978385925293, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.48331415420023016, |
| "grad_norm": 0.047119140625, |
| "learning_rate": 4.778251628157537e-05, |
| "logits/chosen": -6.796322822570801, |
| "logits/rejected": -6.828359127044678, |
| "logps/chosen": -63.79240036010742, |
| "logps/rejected": -84.49711608886719, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6860198378562927, |
| "rewards/margins": 2.1973636150360107, |
| "rewards/rejected": -2.883383274078369, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.48523206751054854, |
| "grad_norm": 0.035400390625, |
| "learning_rate": 4.777120057761836e-05, |
| "logits/chosen": -6.802372932434082, |
| "logits/rejected": -6.869441032409668, |
| "logps/chosen": -60.574119567871094, |
| "logps/rejected": -81.42657470703125, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4548783302307129, |
| "rewards/margins": 2.194202423095703, |
| "rewards/rejected": -2.649080753326416, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.4871499808208669, |
| "grad_norm": 0.048095703125, |
| "learning_rate": 4.7759768464111865e-05, |
| "logits/chosen": -6.760685920715332, |
| "logits/rejected": -6.879847049713135, |
| "logps/chosen": -65.76459503173828, |
| "logps/rejected": -85.82694244384766, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5539541244506836, |
| "rewards/margins": 2.1962039470672607, |
| "rewards/rejected": -2.7501578330993652, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.4890678941311853, |
| "grad_norm": 0.049560546875, |
| "learning_rate": 4.774821999800244e-05, |
| "logits/chosen": -6.74704647064209, |
| "logits/rejected": -6.814779758453369, |
| "logps/chosen": -61.025238037109375, |
| "logps/rejected": -82.47376251220703, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5795901417732239, |
| "rewards/margins": 2.198065996170044, |
| "rewards/rejected": -2.777656078338623, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.49098580744150366, |
| "grad_norm": 0.044189453125, |
| "learning_rate": 4.773655523681627e-05, |
| "logits/chosen": -6.8133039474487305, |
| "logits/rejected": -6.857518672943115, |
| "logps/chosen": -59.81324005126953, |
| "logps/rejected": -81.82405853271484, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5019493103027344, |
| "rewards/margins": 2.1963508129119873, |
| "rewards/rejected": -2.6983001232147217, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.49290372075182204, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 4.7724774238658787e-05, |
| "logits/chosen": -6.734938144683838, |
| "logits/rejected": -6.824878692626953, |
| "logps/chosen": -64.06251525878906, |
| "logps/rejected": -85.48880767822266, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5863821506500244, |
| "rewards/margins": 2.197831630706787, |
| "rewards/rejected": -2.7842135429382324, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.4948216340621404, |
| "grad_norm": 0.048583984375, |
| "learning_rate": 4.7712877062214474e-05, |
| "logits/chosen": -6.768890380859375, |
| "logits/rejected": -6.9102044105529785, |
| "logps/chosen": -60.18365478515625, |
| "logps/rejected": -81.67802429199219, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4272948205471039, |
| "rewards/margins": 2.1933791637420654, |
| "rewards/rejected": -2.620673656463623, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.4967395473724588, |
| "grad_norm": 0.044677734375, |
| "learning_rate": 4.7700863766746484e-05, |
| "logits/chosen": -6.8031463623046875, |
| "logits/rejected": -6.905440330505371, |
| "logps/chosen": -60.71598434448242, |
| "logps/rejected": -81.36261749267578, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4217904210090637, |
| "rewards/margins": 2.193682909011841, |
| "rewards/rejected": -2.6154732704162598, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.49865746068277716, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 4.768873441209644e-05, |
| "logits/chosen": -6.832518577575684, |
| "logits/rejected": -6.83789587020874, |
| "logps/chosen": -62.7836799621582, |
| "logps/rejected": -82.60746765136719, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5582566857337952, |
| "rewards/margins": 2.1951565742492676, |
| "rewards/rejected": -2.753412961959839, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5005753739930955, |
| "grad_norm": 0.044189453125, |
| "learning_rate": 4.7676489058684055e-05, |
| "logits/chosen": -6.725746154785156, |
| "logits/rejected": -6.810865879058838, |
| "logps/chosen": -62.290992736816406, |
| "logps/rejected": -83.63255310058594, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4835914671421051, |
| "rewards/margins": 2.1948485374450684, |
| "rewards/rejected": -2.6784403324127197, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.5024932873034139, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 4.7664127767506884e-05, |
| "logits/chosen": -6.764736175537109, |
| "logits/rejected": -6.878215789794922, |
| "logps/chosen": -59.05699920654297, |
| "logps/rejected": -80.65967559814453, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5298063158988953, |
| "rewards/margins": 2.198462724685669, |
| "rewards/rejected": -2.72826886177063, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5044112006137322, |
| "grad_norm": 0.061767578125, |
| "learning_rate": 4.765165060014e-05, |
| "logits/chosen": -6.81158447265625, |
| "logits/rejected": -6.888514041900635, |
| "logps/chosen": -56.54206466674805, |
| "logps/rejected": -76.92692565917969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.40915647149086, |
| "rewards/margins": 2.200356960296631, |
| "rewards/rejected": -2.609513521194458, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.5063291139240507, |
| "grad_norm": 0.03369140625, |
| "learning_rate": 4.763905761873566e-05, |
| "logits/chosen": -6.763751983642578, |
| "logits/rejected": -6.8651227951049805, |
| "logps/chosen": -57.99541473388672, |
| "logps/rejected": -78.6756591796875, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4112626910209656, |
| "rewards/margins": 2.1965606212615967, |
| "rewards/rejected": -2.607823133468628, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.508247027234369, |
| "grad_norm": 0.048828125, |
| "learning_rate": 4.762634888602306e-05, |
| "logits/chosen": -6.782062530517578, |
| "logits/rejected": -6.847997188568115, |
| "logps/chosen": -64.24237823486328, |
| "logps/rejected": -85.33832550048828, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6039192080497742, |
| "rewards/margins": 2.1964235305786133, |
| "rewards/rejected": -2.8003430366516113, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.5101649405446874, |
| "grad_norm": 0.044921875, |
| "learning_rate": 4.761352446530797e-05, |
| "logits/chosen": -6.749083518981934, |
| "logits/rejected": -6.8179802894592285, |
| "logps/chosen": -59.93855667114258, |
| "logps/rejected": -81.1589126586914, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5835517048835754, |
| "rewards/margins": 2.194997549057007, |
| "rewards/rejected": -2.7785494327545166, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5120828538550057, |
| "grad_norm": 0.0380859375, |
| "learning_rate": 4.7600584420472416e-05, |
| "logits/chosen": -6.820675849914551, |
| "logits/rejected": -6.821556091308594, |
| "logps/chosen": -62.568939208984375, |
| "logps/rejected": -83.0500717163086, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6582069993019104, |
| "rewards/margins": 2.1977479457855225, |
| "rewards/rejected": -2.855954647064209, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.5140007671653242, |
| "grad_norm": 0.0400390625, |
| "learning_rate": 4.758752881597442e-05, |
| "logits/chosen": -6.764553070068359, |
| "logits/rejected": -6.8421125411987305, |
| "logps/chosen": -61.8564567565918, |
| "logps/rejected": -82.87976837158203, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5464135408401489, |
| "rewards/margins": 2.1998982429504395, |
| "rewards/rejected": -2.746311902999878, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5159186804756425, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 4.757435771684761e-05, |
| "logits/chosen": -6.775679111480713, |
| "logits/rejected": -6.910268306732178, |
| "logps/chosen": -62.16166305541992, |
| "logps/rejected": -85.0639419555664, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5519260764122009, |
| "rewards/margins": 2.1962175369262695, |
| "rewards/rejected": -2.7481436729431152, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.5178365937859609, |
| "grad_norm": 0.05078125, |
| "learning_rate": 4.756107118870093e-05, |
| "logits/chosen": -6.817084312438965, |
| "logits/rejected": -6.920017242431641, |
| "logps/chosen": -63.124549865722656, |
| "logps/rejected": -84.37528228759766, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5143846273422241, |
| "rewards/margins": 2.195559024810791, |
| "rewards/rejected": -2.7099435329437256, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5197545070962792, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 4.754766929771832e-05, |
| "logits/chosen": -6.7549333572387695, |
| "logits/rejected": -6.808642387390137, |
| "logps/chosen": -65.70404052734375, |
| "logps/rejected": -86.11283874511719, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7226360440254211, |
| "rewards/margins": 2.1955502033233643, |
| "rewards/rejected": -2.9181859493255615, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.5216724204065977, |
| "grad_norm": 0.0299072265625, |
| "learning_rate": 4.7534152110658354e-05, |
| "logits/chosen": -6.86520528793335, |
| "logits/rejected": -6.91876220703125, |
| "logps/chosen": -63.727516174316406, |
| "logps/rejected": -84.4239273071289, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.576887845993042, |
| "rewards/margins": 2.19865345954895, |
| "rewards/rejected": -2.775541067123413, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.523590333716916, |
| "grad_norm": 0.035888671875, |
| "learning_rate": 4.752051969485394e-05, |
| "logits/chosen": -6.792748928070068, |
| "logits/rejected": -6.829402923583984, |
| "logps/chosen": -58.34467697143555, |
| "logps/rejected": -80.06613159179688, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4105150103569031, |
| "rewards/margins": 2.199594259262085, |
| "rewards/rejected": -2.610109329223633, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.5255082470272344, |
| "grad_norm": 0.040283203125, |
| "learning_rate": 4.7506772118211987e-05, |
| "logits/chosen": -6.792714595794678, |
| "logits/rejected": -6.8383941650390625, |
| "logps/chosen": -69.7136459350586, |
| "logps/rejected": -90.48081970214844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8187990188598633, |
| "rewards/margins": 2.194049596786499, |
| "rewards/rejected": -3.0128488540649414, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.5274261603375527, |
| "grad_norm": 0.036376953125, |
| "learning_rate": 4.749290944921303e-05, |
| "logits/chosen": -6.784049987792969, |
| "logits/rejected": -6.838326454162598, |
| "logps/chosen": -60.717193603515625, |
| "logps/rejected": -82.25090026855469, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5634230375289917, |
| "rewards/margins": 2.1975927352905273, |
| "rewards/rejected": -2.7610154151916504, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.5293440736478712, |
| "grad_norm": 0.049560546875, |
| "learning_rate": 4.747893175691092e-05, |
| "logits/chosen": -6.78844690322876, |
| "logits/rejected": -6.837592124938965, |
| "logps/chosen": -62.469459533691406, |
| "logps/rejected": -83.67124938964844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6941007971763611, |
| "rewards/margins": 2.196812152862549, |
| "rewards/rejected": -2.890913248062134, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.5312619869581895, |
| "grad_norm": 0.0546875, |
| "learning_rate": 4.7464839110932476e-05, |
| "logits/chosen": -6.742993354797363, |
| "logits/rejected": -6.785296440124512, |
| "logps/chosen": -69.74168395996094, |
| "logps/rejected": -91.88017272949219, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.825350284576416, |
| "rewards/margins": 2.1931517124176025, |
| "rewards/rejected": -3.0185019969940186, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.5331799002685078, |
| "grad_norm": 0.0390625, |
| "learning_rate": 4.745063158147712e-05, |
| "logits/chosen": -6.85055685043335, |
| "logits/rejected": -6.899823188781738, |
| "logps/chosen": -61.71512985229492, |
| "logps/rejected": -83.54269409179688, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.472137987613678, |
| "rewards/margins": 2.195171356201172, |
| "rewards/rejected": -2.667309522628784, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5350978135788262, |
| "grad_norm": 0.0400390625, |
| "learning_rate": 4.743630923931655e-05, |
| "logits/chosen": -6.822695732116699, |
| "logits/rejected": -6.846883296966553, |
| "logps/chosen": -62.36680221557617, |
| "logps/rejected": -83.02012634277344, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5894892811775208, |
| "rewards/margins": 2.195992946624756, |
| "rewards/rejected": -2.785482168197632, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.5370157268891446, |
| "grad_norm": 0.048828125, |
| "learning_rate": 4.742187215579439e-05, |
| "logits/chosen": -6.81241512298584, |
| "logits/rejected": -6.8298821449279785, |
| "logps/chosen": -63.766090393066406, |
| "logps/rejected": -85.15402221679688, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6096689701080322, |
| "rewards/margins": 2.1970481872558594, |
| "rewards/rejected": -2.8067169189453125, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.538933640199463, |
| "grad_norm": 0.039306640625, |
| "learning_rate": 4.740732040282581e-05, |
| "logits/chosen": -6.7713141441345215, |
| "logits/rejected": -6.855074882507324, |
| "logps/chosen": -63.4873161315918, |
| "logps/rejected": -85.61607360839844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.577864944934845, |
| "rewards/margins": 2.197863817214966, |
| "rewards/rejected": -2.775728464126587, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.5408515535097813, |
| "grad_norm": 0.036865234375, |
| "learning_rate": 4.739265405289716e-05, |
| "logits/chosen": -6.76360559463501, |
| "logits/rejected": -6.843583583831787, |
| "logps/chosen": -61.78276443481445, |
| "logps/rejected": -83.03706359863281, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.523431658744812, |
| "rewards/margins": 2.1963558197021484, |
| "rewards/rejected": -2.719787120819092, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.5427694668200997, |
| "grad_norm": 0.046875, |
| "learning_rate": 4.737787317906568e-05, |
| "logits/chosen": -6.7519378662109375, |
| "logits/rejected": -6.820305824279785, |
| "logps/chosen": -63.89045333862305, |
| "logps/rejected": -85.07756042480469, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5729597806930542, |
| "rewards/margins": 2.1944832801818848, |
| "rewards/rejected": -2.7674431800842285, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.5446873801304181, |
| "grad_norm": 0.0380859375, |
| "learning_rate": 4.736297785495903e-05, |
| "logits/chosen": -6.815577507019043, |
| "logits/rejected": -6.836461544036865, |
| "logps/chosen": -60.28411865234375, |
| "logps/rejected": -82.16222381591797, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6184202432632446, |
| "rewards/margins": 2.1970183849334717, |
| "rewards/rejected": -2.815438747406006, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.5466052934407365, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 4.734796815477503e-05, |
| "logits/chosen": -6.764504909515381, |
| "logits/rejected": -6.821629524230957, |
| "logps/chosen": -61.06037139892578, |
| "logps/rejected": -82.24360656738281, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5765479207038879, |
| "rewards/margins": 2.1989150047302246, |
| "rewards/rejected": -2.775463104248047, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.5485232067510548, |
| "grad_norm": 0.039306640625, |
| "learning_rate": 4.73328441532812e-05, |
| "logits/chosen": -6.749651908874512, |
| "logits/rejected": -6.841190338134766, |
| "logps/chosen": -59.021446228027344, |
| "logps/rejected": -80.57456970214844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4866851270198822, |
| "rewards/margins": 2.1995737552642822, |
| "rewards/rejected": -2.6862587928771973, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.5504411200613732, |
| "grad_norm": 0.049072265625, |
| "learning_rate": 4.731760592581445e-05, |
| "logits/chosen": -6.712447166442871, |
| "logits/rejected": -6.835790157318115, |
| "logps/chosen": -59.537620544433594, |
| "logps/rejected": -81.30517578125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4595329165458679, |
| "rewards/margins": 2.198075771331787, |
| "rewards/rejected": -2.6576085090637207, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.5523590333716916, |
| "grad_norm": 0.043212890625, |
| "learning_rate": 4.7302253548280674e-05, |
| "logits/chosen": -6.720251560211182, |
| "logits/rejected": -6.798407077789307, |
| "logps/chosen": -60.29015350341797, |
| "logps/rejected": -81.96891784667969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.56800776720047, |
| "rewards/margins": 2.1948490142822266, |
| "rewards/rejected": -2.762856960296631, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.55427694668201, |
| "grad_norm": 0.04736328125, |
| "learning_rate": 4.728678709715438e-05, |
| "logits/chosen": -6.74158239364624, |
| "logits/rejected": -6.872788906097412, |
| "logps/chosen": -61.41450881958008, |
| "logps/rejected": -83.51676940917969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4987873136997223, |
| "rewards/margins": 2.1945414543151855, |
| "rewards/rejected": -2.693329095840454, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.5561948599923283, |
| "grad_norm": 0.043212890625, |
| "learning_rate": 4.72712066494783e-05, |
| "logits/chosen": -6.711419105529785, |
| "logits/rejected": -6.823484897613525, |
| "logps/chosen": -62.261627197265625, |
| "logps/rejected": -83.39556121826172, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5504243969917297, |
| "rewards/margins": 2.1979422569274902, |
| "rewards/rejected": -2.748366594314575, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5581127733026467, |
| "grad_norm": 0.0390625, |
| "learning_rate": 4.725551228286304e-05, |
| "logits/chosen": -6.845943450927734, |
| "logits/rejected": -6.932714939117432, |
| "logps/chosen": -65.34230041503906, |
| "logps/rejected": -86.31060791015625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5645556449890137, |
| "rewards/margins": 2.200577735900879, |
| "rewards/rejected": -2.7651333808898926, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.5600306866129651, |
| "grad_norm": 0.0419921875, |
| "learning_rate": 4.723970407548663e-05, |
| "logits/chosen": -6.818108558654785, |
| "logits/rejected": -6.868282318115234, |
| "logps/chosen": -62.125328063964844, |
| "logps/rejected": -82.881591796875, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.45852231979370117, |
| "rewards/margins": 2.1955528259277344, |
| "rewards/rejected": -2.6540751457214355, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5619485999232835, |
| "grad_norm": 0.060546875, |
| "learning_rate": 4.7223782106094215e-05, |
| "logits/chosen": -6.684022426605225, |
| "logits/rejected": -6.757493495941162, |
| "logps/chosen": -64.17045593261719, |
| "logps/rejected": -85.75135803222656, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5858643651008606, |
| "rewards/margins": 2.1960318088531494, |
| "rewards/rejected": -2.781895875930786, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.5638665132336018, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 4.720774645399759e-05, |
| "logits/chosen": -6.799399375915527, |
| "logits/rejected": -6.8811469078063965, |
| "logps/chosen": -58.72871780395508, |
| "logps/rejected": -79.8555908203125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4386534094810486, |
| "rewards/margins": 2.194446086883545, |
| "rewards/rejected": -2.6330995559692383, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.5657844265439202, |
| "grad_norm": 0.0306396484375, |
| "learning_rate": 4.719159719907484e-05, |
| "logits/chosen": -6.7151007652282715, |
| "logits/rejected": -6.806893348693848, |
| "logps/chosen": -59.9053840637207, |
| "logps/rejected": -80.97401428222656, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5145848393440247, |
| "rewards/margins": 2.198782444000244, |
| "rewards/rejected": -2.713367223739624, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.5677023398542386, |
| "grad_norm": 0.044189453125, |
| "learning_rate": 4.7175334421769954e-05, |
| "logits/chosen": -6.7644171714782715, |
| "logits/rejected": -6.841060638427734, |
| "logps/chosen": -60.12373733520508, |
| "logps/rejected": -80.3787612915039, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5275717973709106, |
| "rewards/margins": 2.1961476802825928, |
| "rewards/rejected": -2.723719596862793, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.569620253164557, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 4.715895820309239e-05, |
| "logits/chosen": -6.837026119232178, |
| "logits/rejected": -6.902268886566162, |
| "logps/chosen": -63.5143928527832, |
| "logps/rejected": -85.01191711425781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5784210562705994, |
| "rewards/margins": 2.199751377105713, |
| "rewards/rejected": -2.778172492980957, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.5715381664748753, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 4.71424686246167e-05, |
| "logits/chosen": -6.749986171722412, |
| "logits/rejected": -6.84371280670166, |
| "logps/chosen": -59.216644287109375, |
| "logps/rejected": -81.05298614501953, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.37248149514198303, |
| "rewards/margins": 2.1974806785583496, |
| "rewards/rejected": -2.5699620246887207, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5734560797851938, |
| "grad_norm": 0.045654296875, |
| "learning_rate": 4.7125865768482113e-05, |
| "logits/chosen": -6.700467586517334, |
| "logits/rejected": -6.8221635818481445, |
| "logps/chosen": -65.43324279785156, |
| "logps/rejected": -86.24195861816406, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6096637845039368, |
| "rewards/margins": 2.195284366607666, |
| "rewards/rejected": -2.804947853088379, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.5753739930955121, |
| "grad_norm": 0.04736328125, |
| "learning_rate": 4.710914971739211e-05, |
| "logits/chosen": -6.764548301696777, |
| "logits/rejected": -6.841375827789307, |
| "logps/chosen": -64.81352233886719, |
| "logps/rejected": -86.60743713378906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6613066792488098, |
| "rewards/margins": 2.1976189613342285, |
| "rewards/rejected": -2.8589255809783936, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5753739930955121, |
| "eval_logits/chosen": -6.663508415222168, |
| "eval_logits/rejected": -6.8637237548828125, |
| "eval_logps/chosen": -61.2503776550293, |
| "eval_logps/rejected": -84.0605239868164, |
| "eval_loss": 0.32512322068214417, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": -0.5797533988952637, |
| "eval_rewards/margins": 2.197005271911621, |
| "eval_rewards/rejected": -2.7767584323883057, |
| "eval_runtime": 5.3083, |
| "eval_samples_per_second": 37.677, |
| "eval_steps_per_second": 37.677, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5772919064058305, |
| "grad_norm": 0.038818359375, |
| "learning_rate": 4.709232055461405e-05, |
| "logits/chosen": -6.675856113433838, |
| "logits/rejected": -6.763123512268066, |
| "logps/chosen": -61.63834762573242, |
| "logps/rejected": -83.7894515991211, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5152076482772827, |
| "rewards/margins": 2.196242570877075, |
| "rewards/rejected": -2.7114500999450684, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.5792098197161488, |
| "grad_norm": 0.0498046875, |
| "learning_rate": 4.707537836397872e-05, |
| "logits/chosen": -6.814902305603027, |
| "logits/rejected": -6.906038761138916, |
| "logps/chosen": -64.63713836669922, |
| "logps/rejected": -86.2259521484375, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5156192779541016, |
| "rewards/margins": 2.196268320083618, |
| "rewards/rejected": -2.7118875980377197, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5811277330264673, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 4.705832322987994e-05, |
| "logits/chosen": -6.769433498382568, |
| "logits/rejected": -6.875026702880859, |
| "logps/chosen": -61.46131134033203, |
| "logps/rejected": -82.59616088867188, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5708250999450684, |
| "rewards/margins": 2.193892002105713, |
| "rewards/rejected": -2.7647171020507812, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.5830456463367856, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 4.7041155237274105e-05, |
| "logits/chosen": -6.731886863708496, |
| "logits/rejected": -6.820645809173584, |
| "logps/chosen": -64.55342102050781, |
| "logps/rejected": -85.70281219482422, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6872868537902832, |
| "rewards/margins": 2.1945242881774902, |
| "rewards/rejected": -2.8818111419677734, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.584963559647104, |
| "grad_norm": 0.052490234375, |
| "learning_rate": 4.702387447167984e-05, |
| "logits/chosen": -6.7164740562438965, |
| "logits/rejected": -6.778362274169922, |
| "logps/chosen": -62.84291458129883, |
| "logps/rejected": -84.47712707519531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5710551142692566, |
| "rewards/margins": 2.1979777812957764, |
| "rewards/rejected": -2.7690329551696777, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.5868814729574223, |
| "grad_norm": 0.034912109375, |
| "learning_rate": 4.700648101917749e-05, |
| "logits/chosen": -6.749175071716309, |
| "logits/rejected": -6.815337181091309, |
| "logps/chosen": -63.7158088684082, |
| "logps/rejected": -84.00125885009766, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6128538846969604, |
| "rewards/margins": 2.1982455253601074, |
| "rewards/rejected": -2.8110997676849365, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5887993862677406, |
| "grad_norm": 0.033935546875, |
| "learning_rate": 4.698897496640871e-05, |
| "logits/chosen": -6.734133720397949, |
| "logits/rejected": -6.797808647155762, |
| "logps/chosen": -61.02180862426758, |
| "logps/rejected": -81.09629821777344, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5548717379570007, |
| "rewards/margins": 2.196436643600464, |
| "rewards/rejected": -2.7513084411621094, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.5907172995780591, |
| "grad_norm": 0.05078125, |
| "learning_rate": 4.697135640057609e-05, |
| "logits/chosen": -6.733152866363525, |
| "logits/rejected": -6.8301849365234375, |
| "logps/chosen": -61.2963752746582, |
| "logps/rejected": -81.04912567138672, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.41952013969421387, |
| "rewards/margins": 2.194058895111084, |
| "rewards/rejected": -2.613579034805298, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5926352128883774, |
| "grad_norm": 0.037353515625, |
| "learning_rate": 4.695362540944266e-05, |
| "logits/chosen": -6.73095178604126, |
| "logits/rejected": -6.822793006896973, |
| "logps/chosen": -63.28429412841797, |
| "logps/rejected": -83.17731475830078, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5410445928573608, |
| "rewards/margins": 2.1978955268859863, |
| "rewards/rejected": -2.7389400005340576, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.5945531261986958, |
| "grad_norm": 0.03564453125, |
| "learning_rate": 4.693578208133145e-05, |
| "logits/chosen": -6.678317070007324, |
| "logits/rejected": -6.7426886558532715, |
| "logps/chosen": -64.89047241210938, |
| "logps/rejected": -87.15535736083984, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7630675435066223, |
| "rewards/margins": 2.196265697479248, |
| "rewards/rejected": -2.9593334197998047, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5964710395090141, |
| "grad_norm": 0.03466796875, |
| "learning_rate": 4.691782650512511e-05, |
| "logits/chosen": -6.7095465660095215, |
| "logits/rejected": -6.760190010070801, |
| "logps/chosen": -67.8410415649414, |
| "logps/rejected": -89.27967834472656, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8494323492050171, |
| "rewards/margins": 2.1951375007629395, |
| "rewards/rejected": -3.0445704460144043, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.5983889528193326, |
| "grad_norm": 0.036376953125, |
| "learning_rate": 4.6899758770265416e-05, |
| "logits/chosen": -6.807528495788574, |
| "logits/rejected": -6.854405403137207, |
| "logps/chosen": -64.88179016113281, |
| "logps/rejected": -85.54252624511719, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5705582499504089, |
| "rewards/margins": 2.196807384490967, |
| "rewards/rejected": -2.7673659324645996, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.6003068661296509, |
| "grad_norm": 0.05078125, |
| "learning_rate": 4.688157896675282e-05, |
| "logits/chosen": -6.67507791519165, |
| "logits/rejected": -6.780775547027588, |
| "logps/chosen": -63.21833419799805, |
| "logps/rejected": -83.4687728881836, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5824323296546936, |
| "rewards/margins": 2.2005057334899902, |
| "rewards/rejected": -2.782937526702881, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.6022247794399693, |
| "grad_norm": 0.03955078125, |
| "learning_rate": 4.686328718514604e-05, |
| "logits/chosen": -6.837525844573975, |
| "logits/rejected": -6.915156364440918, |
| "logps/chosen": -62.40456008911133, |
| "logps/rejected": -84.44121551513672, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5634955167770386, |
| "rewards/margins": 2.195712089538574, |
| "rewards/rejected": -2.7592074871063232, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.6041426927502876, |
| "grad_norm": 0.0390625, |
| "learning_rate": 4.684488351656158e-05, |
| "logits/chosen": -6.743095397949219, |
| "logits/rejected": -6.8821210861206055, |
| "logps/chosen": -58.7745361328125, |
| "logps/rejected": -79.80195617675781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.47515901923179626, |
| "rewards/margins": 2.195002317428589, |
| "rewards/rejected": -2.670161485671997, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.6060606060606061, |
| "grad_norm": 0.04443359375, |
| "learning_rate": 4.6826368052673295e-05, |
| "logits/chosen": -6.782208442687988, |
| "logits/rejected": -6.839322090148926, |
| "logps/chosen": -58.1090202331543, |
| "logps/rejected": -79.67631530761719, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5011711716651917, |
| "rewards/margins": 2.196396589279175, |
| "rewards/rejected": -2.6975674629211426, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.6079785193709244, |
| "grad_norm": 0.0634765625, |
| "learning_rate": 4.68077408857119e-05, |
| "logits/chosen": -6.660303592681885, |
| "logits/rejected": -6.718382835388184, |
| "logps/chosen": -64.7911605834961, |
| "logps/rejected": -86.10942077636719, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6913571357727051, |
| "rewards/margins": 2.1963887214660645, |
| "rewards/rejected": -2.8877463340759277, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.6098964326812428, |
| "grad_norm": 0.039306640625, |
| "learning_rate": 4.678900210846456e-05, |
| "logits/chosen": -6.826661586761475, |
| "logits/rejected": -6.863768100738525, |
| "logps/chosen": -64.81199645996094, |
| "logps/rejected": -85.13519287109375, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5662140846252441, |
| "rewards/margins": 2.198249340057373, |
| "rewards/rejected": -2.764463424682617, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.6118143459915611, |
| "grad_norm": 0.046630859375, |
| "learning_rate": 4.677015181427439e-05, |
| "logits/chosen": -6.7675042152404785, |
| "logits/rejected": -6.877073764801025, |
| "logps/chosen": -61.42873001098633, |
| "logps/rejected": -83.35433197021484, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6646031141281128, |
| "rewards/margins": 2.1970815658569336, |
| "rewards/rejected": -2.861684799194336, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.6137322593018796, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 4.675119009704e-05, |
| "logits/chosen": -6.88427734375, |
| "logits/rejected": -6.932794094085693, |
| "logps/chosen": -61.87055206298828, |
| "logps/rejected": -82.3925552368164, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.46913057565689087, |
| "rewards/margins": 2.1977620124816895, |
| "rewards/rejected": -2.6668922901153564, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6156501726121979, |
| "grad_norm": 0.045166015625, |
| "learning_rate": 4.673211705121503e-05, |
| "logits/chosen": -6.758817195892334, |
| "logits/rejected": -6.858736991882324, |
| "logps/chosen": -61.0307502746582, |
| "logps/rejected": -82.81901550292969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.506413459777832, |
| "rewards/margins": 2.196242332458496, |
| "rewards/rejected": -2.7026560306549072, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.6175680859225163, |
| "grad_norm": 0.0380859375, |
| "learning_rate": 4.67129327718077e-05, |
| "logits/chosen": -6.817410945892334, |
| "logits/rejected": -6.829085350036621, |
| "logps/chosen": -62.83549118041992, |
| "logps/rejected": -83.76074981689453, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6362646818161011, |
| "rewards/margins": 2.196465015411377, |
| "rewards/rejected": -2.8327295780181885, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.6194859992328346, |
| "grad_norm": 0.053466796875, |
| "learning_rate": 4.669363735438028e-05, |
| "logits/chosen": -6.759008884429932, |
| "logits/rejected": -6.868435859680176, |
| "logps/chosen": -61.52397918701172, |
| "logps/rejected": -84.11946868896484, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5710245966911316, |
| "rewards/margins": 2.1951489448547363, |
| "rewards/rejected": -2.7661736011505127, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.6214039125431531, |
| "grad_norm": 0.038818359375, |
| "learning_rate": 4.667423089504868e-05, |
| "logits/chosen": -6.804877281188965, |
| "logits/rejected": -6.851701259613037, |
| "logps/chosen": -61.42131423950195, |
| "logps/rejected": -82.5312728881836, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5742114186286926, |
| "rewards/margins": 2.1968421936035156, |
| "rewards/rejected": -2.7710535526275635, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6233218258534714, |
| "grad_norm": 0.03173828125, |
| "learning_rate": 4.665471349048191e-05, |
| "logits/chosen": -6.743138790130615, |
| "logits/rejected": -6.808810234069824, |
| "logps/chosen": -57.78135299682617, |
| "logps/rejected": -78.70478057861328, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4692755341529846, |
| "rewards/margins": 2.198038101196289, |
| "rewards/rejected": -2.667313814163208, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.6252397391637898, |
| "grad_norm": 0.052001953125, |
| "learning_rate": 4.663508523790167e-05, |
| "logits/chosen": -6.797055244445801, |
| "logits/rejected": -6.851097106933594, |
| "logps/chosen": -63.55707550048828, |
| "logps/rejected": -84.19354248046875, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5817860960960388, |
| "rewards/margins": 2.198854923248291, |
| "rewards/rejected": -2.7806410789489746, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.6271576524741082, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 4.661534623508179e-05, |
| "logits/chosen": -6.698374271392822, |
| "logits/rejected": -6.778656005859375, |
| "logps/chosen": -64.4042739868164, |
| "logps/rejected": -85.94600677490234, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5719730257987976, |
| "rewards/margins": 2.195774555206299, |
| "rewards/rejected": -2.767747402191162, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.6290755657844266, |
| "grad_norm": 0.0458984375, |
| "learning_rate": 4.65954965803478e-05, |
| "logits/chosen": -6.783711910247803, |
| "logits/rejected": -6.867341041564941, |
| "logps/chosen": -64.90428161621094, |
| "logps/rejected": -85.47926330566406, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6290830969810486, |
| "rewards/margins": 2.197962522506714, |
| "rewards/rejected": -2.827045440673828, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.6309934790947449, |
| "grad_norm": 0.049560546875, |
| "learning_rate": 4.657553637257641e-05, |
| "logits/chosen": -6.754020690917969, |
| "logits/rejected": -6.789788722991943, |
| "logps/chosen": -62.860679626464844, |
| "logps/rejected": -84.67469787597656, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.58977210521698, |
| "rewards/margins": 2.19518780708313, |
| "rewards/rejected": -2.784959554672241, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.6329113924050633, |
| "grad_norm": 0.0712890625, |
| "learning_rate": 4.655546571119501e-05, |
| "logits/chosen": -6.775702476501465, |
| "logits/rejected": -6.795037746429443, |
| "logps/chosen": -63.498443603515625, |
| "logps/rejected": -84.87963104248047, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6724980473518372, |
| "rewards/margins": 2.198357105255127, |
| "rewards/rejected": -2.8708553314208984, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.6348293057153817, |
| "grad_norm": 0.064453125, |
| "learning_rate": 4.653528469618122e-05, |
| "logits/chosen": -6.807392120361328, |
| "logits/rejected": -6.87429141998291, |
| "logps/chosen": -59.84100341796875, |
| "logps/rejected": -80.7729721069336, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5027590990066528, |
| "rewards/margins": 2.1962478160858154, |
| "rewards/rejected": -2.699007034301758, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.6367472190257001, |
| "grad_norm": 0.034423828125, |
| "learning_rate": 4.651499342806235e-05, |
| "logits/chosen": -6.737414360046387, |
| "logits/rejected": -6.8177337646484375, |
| "logps/chosen": -58.65924072265625, |
| "logps/rejected": -79.90467071533203, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4649263918399811, |
| "rewards/margins": 2.196115493774414, |
| "rewards/rejected": -2.661041736602783, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.6386651323360184, |
| "grad_norm": 0.042236328125, |
| "learning_rate": 4.64945920079149e-05, |
| "logits/chosen": -6.836834907531738, |
| "logits/rejected": -6.920575141906738, |
| "logps/chosen": -60.32160568237305, |
| "logps/rejected": -82.49227905273438, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5061328411102295, |
| "rewards/margins": 2.1959569454193115, |
| "rewards/rejected": -2.70209002494812, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.6405830456463368, |
| "grad_norm": 0.046630859375, |
| "learning_rate": 4.6474080537364086e-05, |
| "logits/chosen": -6.743792533874512, |
| "logits/rejected": -6.792672157287598, |
| "logps/chosen": -65.84016418457031, |
| "logps/rejected": -86.33671569824219, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7148057818412781, |
| "rewards/margins": 2.1939172744750977, |
| "rewards/rejected": -2.9087231159210205, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.6425009589566552, |
| "grad_norm": 0.050537109375, |
| "learning_rate": 4.64534591185833e-05, |
| "logits/chosen": -6.721620082855225, |
| "logits/rejected": -6.873713493347168, |
| "logps/chosen": -59.97218704223633, |
| "logps/rejected": -82.32026672363281, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5144708752632141, |
| "rewards/margins": 2.1981799602508545, |
| "rewards/rejected": -2.712650775909424, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.6444188722669736, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 4.643272785429364e-05, |
| "logits/chosen": -6.694830894470215, |
| "logits/rejected": -6.757704734802246, |
| "logps/chosen": -62.828887939453125, |
| "logps/rejected": -85.16447448730469, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6964989900588989, |
| "rewards/margins": 2.196366786956787, |
| "rewards/rejected": -2.8928656578063965, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6463367855772919, |
| "grad_norm": 0.06689453125, |
| "learning_rate": 4.6411886847763344e-05, |
| "logits/chosen": -6.788907051086426, |
| "logits/rejected": -6.820149898529053, |
| "logps/chosen": -64.8192367553711, |
| "logps/rejected": -86.21455383300781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7176898717880249, |
| "rewards/margins": 2.1988344192504883, |
| "rewards/rejected": -2.9165244102478027, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.6482546988876102, |
| "grad_norm": 0.038330078125, |
| "learning_rate": 4.6390936202807337e-05, |
| "logits/chosen": -6.768073081970215, |
| "logits/rejected": -6.80368709564209, |
| "logps/chosen": -69.4419174194336, |
| "logps/rejected": -89.60987854003906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7591196894645691, |
| "rewards/margins": 2.195878744125366, |
| "rewards/rejected": -2.95499849319458, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.6501726121979287, |
| "grad_norm": 0.046630859375, |
| "learning_rate": 4.636987602378666e-05, |
| "logits/chosen": -6.750307559967041, |
| "logits/rejected": -6.827138423919678, |
| "logps/chosen": -64.7966537475586, |
| "logps/rejected": -86.62849426269531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6886340379714966, |
| "rewards/margins": 2.195281505584717, |
| "rewards/rejected": -2.883915662765503, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.652090525508247, |
| "grad_norm": 0.039306640625, |
| "learning_rate": 4.6348706415607987e-05, |
| "logits/chosen": -6.8090667724609375, |
| "logits/rejected": -6.860124111175537, |
| "logps/chosen": -59.753684997558594, |
| "logps/rejected": -81.40279388427734, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.585098147392273, |
| "rewards/margins": 2.1974575519561768, |
| "rewards/rejected": -2.78255558013916, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6540084388185654, |
| "grad_norm": 0.051513671875, |
| "learning_rate": 4.6327427483723095e-05, |
| "logits/chosen": -6.84018611907959, |
| "logits/rejected": -6.8761467933654785, |
| "logps/chosen": -64.61951446533203, |
| "logps/rejected": -85.91930389404297, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5915480852127075, |
| "rewards/margins": 2.1989455223083496, |
| "rewards/rejected": -2.7904937267303467, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.6559263521288837, |
| "grad_norm": 0.05712890625, |
| "learning_rate": 4.6306039334128314e-05, |
| "logits/chosen": -6.728167533874512, |
| "logits/rejected": -6.835136413574219, |
| "logps/chosen": -59.91040802001953, |
| "logps/rejected": -82.41685485839844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5621598362922668, |
| "rewards/margins": 2.199091672897339, |
| "rewards/rejected": -2.761251926422119, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6578442654392022, |
| "grad_norm": 0.0546875, |
| "learning_rate": 4.628454207336403e-05, |
| "logits/chosen": -6.8048810958862305, |
| "logits/rejected": -6.874848365783691, |
| "logps/chosen": -63.48942947387695, |
| "logps/rejected": -84.7466812133789, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5312366485595703, |
| "rewards/margins": 2.1963775157928467, |
| "rewards/rejected": -2.727614402770996, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.6597621787495205, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 4.6262935808514154e-05, |
| "logits/chosen": -6.784109592437744, |
| "logits/rejected": -6.887749671936035, |
| "logps/chosen": -57.665740966796875, |
| "logps/rejected": -79.55059814453125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.33379465341567993, |
| "rewards/margins": 2.198172092437744, |
| "rewards/rejected": -2.5319666862487793, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6616800920598389, |
| "grad_norm": 0.044677734375, |
| "learning_rate": 4.624122064720555e-05, |
| "logits/chosen": -6.82706356048584, |
| "logits/rejected": -6.852479457855225, |
| "logps/chosen": -61.39141845703125, |
| "logps/rejected": -82.49794006347656, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5211236476898193, |
| "rewards/margins": 2.1956381797790527, |
| "rewards/rejected": -2.716762065887451, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.6635980053701572, |
| "grad_norm": 0.050048828125, |
| "learning_rate": 4.621939669760755e-05, |
| "logits/chosen": -6.765600681304932, |
| "logits/rejected": -6.7940497398376465, |
| "logps/chosen": -60.3200798034668, |
| "logps/rejected": -81.86724090576172, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6085076332092285, |
| "rewards/margins": 2.1932969093322754, |
| "rewards/rejected": -2.801804542541504, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.6655159186804757, |
| "grad_norm": 0.054443359375, |
| "learning_rate": 4.6197464068431366e-05, |
| "logits/chosen": -6.755357265472412, |
| "logits/rejected": -6.797060966491699, |
| "logps/chosen": -58.92303466796875, |
| "logps/rejected": -81.09183502197266, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.49806681275367737, |
| "rewards/margins": 2.1958022117614746, |
| "rewards/rejected": -2.693869113922119, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.667433831990794, |
| "grad_norm": 0.039794921875, |
| "learning_rate": 4.6175422868929615e-05, |
| "logits/chosen": -6.759666442871094, |
| "logits/rejected": -6.852987766265869, |
| "logps/chosen": -61.73881149291992, |
| "logps/rejected": -82.39242553710938, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4188078045845032, |
| "rewards/margins": 2.197063446044922, |
| "rewards/rejected": -2.615870952606201, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6693517453011124, |
| "grad_norm": 0.0341796875, |
| "learning_rate": 4.615327320889568e-05, |
| "logits/chosen": -6.756011962890625, |
| "logits/rejected": -6.833889961242676, |
| "logps/chosen": -65.3582534790039, |
| "logps/rejected": -86.70683288574219, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6901088356971741, |
| "rewards/margins": 2.198507070541382, |
| "rewards/rejected": -2.888615846633911, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.6712696586114307, |
| "grad_norm": 0.04443359375, |
| "learning_rate": 4.613101519866326e-05, |
| "logits/chosen": -6.754976749420166, |
| "logits/rejected": -6.827247619628906, |
| "logps/chosen": -66.11727905273438, |
| "logps/rejected": -87.72874450683594, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7430967092514038, |
| "rewards/margins": 2.1992220878601074, |
| "rewards/rejected": -2.9423186779022217, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6731875719217492, |
| "grad_norm": 0.0341796875, |
| "learning_rate": 4.6108648949105756e-05, |
| "logits/chosen": -6.783148765563965, |
| "logits/rejected": -6.81369686126709, |
| "logps/chosen": -65.33592224121094, |
| "logps/rejected": -84.68086242675781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6635004281997681, |
| "rewards/margins": 2.1974689960479736, |
| "rewards/rejected": -2.860969305038452, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.6751054852320675, |
| "grad_norm": 0.04736328125, |
| "learning_rate": 4.608617457163573e-05, |
| "logits/chosen": -6.7608642578125, |
| "logits/rejected": -6.836709499359131, |
| "logps/chosen": -59.4000244140625, |
| "logps/rejected": -81.574951171875, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4980418086051941, |
| "rewards/margins": 2.1935529708862305, |
| "rewards/rejected": -2.6915950775146484, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6770233985423859, |
| "grad_norm": 0.050537109375, |
| "learning_rate": 4.606359217820441e-05, |
| "logits/chosen": -6.732338905334473, |
| "logits/rejected": -6.816025733947754, |
| "logps/chosen": -65.36774444580078, |
| "logps/rejected": -85.60862731933594, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5363543629646301, |
| "rewards/margins": 2.19832706451416, |
| "rewards/rejected": -2.7346813678741455, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.6789413118527042, |
| "grad_norm": 0.051025390625, |
| "learning_rate": 4.6040901881301004e-05, |
| "logits/chosen": -6.83046817779541, |
| "logits/rejected": -6.893084526062012, |
| "logps/chosen": -60.8812141418457, |
| "logps/rejected": -81.68726348876953, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.41270560026168823, |
| "rewards/margins": 2.1950905323028564, |
| "rewards/rejected": -2.6077961921691895, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.6808592251630227, |
| "grad_norm": 0.0361328125, |
| "learning_rate": 4.6018103793952287e-05, |
| "logits/chosen": -6.795252323150635, |
| "logits/rejected": -6.8598222732543945, |
| "logps/chosen": -59.79205322265625, |
| "logps/rejected": -80.0960693359375, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.40817388892173767, |
| "rewards/margins": 2.1988348960876465, |
| "rewards/rejected": -2.607008695602417, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.682777138473341, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 4.599519802972192e-05, |
| "logits/chosen": -6.790003776550293, |
| "logits/rejected": -6.81201171875, |
| "logps/chosen": -62.64923095703125, |
| "logps/rejected": -83.82284545898438, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5379251837730408, |
| "rewards/margins": 2.197380542755127, |
| "rewards/rejected": -2.7353057861328125, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.6846950517836594, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 4.597218470270997e-05, |
| "logits/chosen": -6.7385454177856445, |
| "logits/rejected": -6.783153533935547, |
| "logps/chosen": -66.6662826538086, |
| "logps/rejected": -88.35466003417969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7900538444519043, |
| "rewards/margins": 2.200716018676758, |
| "rewards/rejected": -2.990769863128662, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.6866129650939777, |
| "grad_norm": 0.0751953125, |
| "learning_rate": 4.594906392755229e-05, |
| "logits/chosen": -6.846865653991699, |
| "logits/rejected": -6.8920698165893555, |
| "logps/chosen": -62.57611083984375, |
| "logps/rejected": -83.23236846923828, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5446496605873108, |
| "rewards/margins": 2.1978793144226074, |
| "rewards/rejected": -2.7425286769866943, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.6885308784042962, |
| "grad_norm": 0.037353515625, |
| "learning_rate": 4.592583581941994e-05, |
| "logits/chosen": -6.785799980163574, |
| "logits/rejected": -6.822165012359619, |
| "logps/chosen": -62.872100830078125, |
| "logps/rejected": -83.56089782714844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6173510551452637, |
| "rewards/margins": 2.195507526397705, |
| "rewards/rejected": -2.8128585815429688, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.6904487917146145, |
| "grad_norm": 0.0517578125, |
| "learning_rate": 4.590250049401866e-05, |
| "logits/chosen": -6.7071852684021, |
| "logits/rejected": -6.744346618652344, |
| "logps/chosen": -59.503631591796875, |
| "logps/rejected": -82.10621643066406, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4692237973213196, |
| "rewards/margins": 2.197390556335449, |
| "rewards/rejected": -2.666614294052124, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6923667050249329, |
| "grad_norm": 0.06884765625, |
| "learning_rate": 4.587905806758828e-05, |
| "logits/chosen": -6.759226322174072, |
| "logits/rejected": -6.798618316650391, |
| "logps/chosen": -60.597740173339844, |
| "logps/rejected": -81.20115661621094, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.47514835000038147, |
| "rewards/margins": 2.1932222843170166, |
| "rewards/rejected": -2.668370485305786, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.6942846183352512, |
| "grad_norm": 0.042724609375, |
| "learning_rate": 4.585550865690211e-05, |
| "logits/chosen": -6.79116153717041, |
| "logits/rejected": -6.905958652496338, |
| "logps/chosen": -65.27635192871094, |
| "logps/rejected": -87.11880493164062, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6134611368179321, |
| "rewards/margins": 2.199479818344116, |
| "rewards/rejected": -2.812941074371338, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.6962025316455697, |
| "grad_norm": 0.06396484375, |
| "learning_rate": 4.5831852379266374e-05, |
| "logits/chosen": -6.826533317565918, |
| "logits/rejected": -6.830357551574707, |
| "logps/chosen": -61.02429962158203, |
| "logps/rejected": -82.49093627929688, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4633258879184723, |
| "rewards/margins": 2.1991915702819824, |
| "rewards/rejected": -2.662517547607422, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.698120444955888, |
| "grad_norm": 0.044677734375, |
| "learning_rate": 4.5808089352519645e-05, |
| "logits/chosen": -6.621363639831543, |
| "logits/rejected": -6.738465309143066, |
| "logps/chosen": -62.206825256347656, |
| "logps/rejected": -82.90311431884766, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5602379441261292, |
| "rewards/margins": 2.1953892707824707, |
| "rewards/rejected": -2.755626678466797, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.7000383582662064, |
| "grad_norm": 0.042724609375, |
| "learning_rate": 4.578421969503224e-05, |
| "logits/chosen": -6.793525695800781, |
| "logits/rejected": -6.882171630859375, |
| "logps/chosen": -62.86467361450195, |
| "logps/rejected": -84.51292419433594, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5820156335830688, |
| "rewards/margins": 2.1975064277648926, |
| "rewards/rejected": -2.7795217037200928, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.7019562715765247, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 4.576024352570563e-05, |
| "logits/chosen": -6.7367753982543945, |
| "logits/rejected": -6.8105878829956055, |
| "logps/chosen": -64.4591293334961, |
| "logps/rejected": -84.80027770996094, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6178757548332214, |
| "rewards/margins": 2.1955502033233643, |
| "rewards/rejected": -2.8134257793426514, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.7038741848868432, |
| "grad_norm": 0.04736328125, |
| "learning_rate": 4.573616096397187e-05, |
| "logits/chosen": -6.7723212242126465, |
| "logits/rejected": -6.824551582336426, |
| "logps/chosen": -60.999359130859375, |
| "logps/rejected": -81.71595764160156, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6143304109573364, |
| "rewards/margins": 2.199537992477417, |
| "rewards/rejected": -2.8138680458068848, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.7057920981971615, |
| "grad_norm": 0.038818359375, |
| "learning_rate": 4.571197212979295e-05, |
| "logits/chosen": -6.857874870300293, |
| "logits/rejected": -6.9073357582092285, |
| "logps/chosen": -65.43289184570312, |
| "logps/rejected": -86.42829895019531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6608371734619141, |
| "rewards/margins": 2.197458267211914, |
| "rewards/rejected": -2.858295440673828, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.7077100115074798, |
| "grad_norm": 0.0458984375, |
| "learning_rate": 4.5687677143660254e-05, |
| "logits/chosen": -6.782164573669434, |
| "logits/rejected": -6.784448146820068, |
| "logps/chosen": -63.07709884643555, |
| "logps/rejected": -83.46958923339844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5670441389083862, |
| "rewards/margins": 2.1972358226776123, |
| "rewards/rejected": -2.764279842376709, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.7096279248177982, |
| "grad_norm": 0.050048828125, |
| "learning_rate": 4.566327612659394e-05, |
| "logits/chosen": -6.812560081481934, |
| "logits/rejected": -6.878233432769775, |
| "logps/chosen": -61.72986602783203, |
| "logps/rejected": -83.14173889160156, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5427154302597046, |
| "rewards/margins": 2.1970319747924805, |
| "rewards/rejected": -2.7397477626800537, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.7115458381281166, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 4.563876920014234e-05, |
| "logits/chosen": -6.7176947593688965, |
| "logits/rejected": -6.807933807373047, |
| "logps/chosen": -63.34507369995117, |
| "logps/rejected": -84.20928955078125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6023356318473816, |
| "rewards/margins": 2.1956591606140137, |
| "rewards/rejected": -2.79799485206604, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.713463751438435, |
| "grad_norm": 0.045166015625, |
| "learning_rate": 4.561415648638133e-05, |
| "logits/chosen": -6.859047889709473, |
| "logits/rejected": -6.9745049476623535, |
| "logps/chosen": -61.1859016418457, |
| "logps/rejected": -83.69287109375, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5361623167991638, |
| "rewards/margins": 2.1962532997131348, |
| "rewards/rejected": -2.732415199279785, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.7153816647487533, |
| "grad_norm": 0.06884765625, |
| "learning_rate": 4.5589438107913764e-05, |
| "logits/chosen": -6.736496925354004, |
| "logits/rejected": -6.8658857345581055, |
| "logps/chosen": -61.28631591796875, |
| "logps/rejected": -83.09465026855469, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5119591951370239, |
| "rewards/margins": 2.195549964904785, |
| "rewards/rejected": -2.7075092792510986, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.7172995780590717, |
| "grad_norm": 0.0869140625, |
| "learning_rate": 4.556461418786884e-05, |
| "logits/chosen": -6.786721229553223, |
| "logits/rejected": -6.838905334472656, |
| "logps/chosen": -60.60860061645508, |
| "logps/rejected": -82.28132629394531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5649218559265137, |
| "rewards/margins": 2.1966118812561035, |
| "rewards/rejected": -2.761533260345459, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.7192174913693901, |
| "grad_norm": 0.04931640625, |
| "learning_rate": 4.5539684849901465e-05, |
| "logits/chosen": -6.7685346603393555, |
| "logits/rejected": -6.808984279632568, |
| "logps/chosen": -65.05911254882812, |
| "logps/rejected": -85.53936767578125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6445679664611816, |
| "rewards/margins": 2.1987884044647217, |
| "rewards/rejected": -2.8433563709259033, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.7211354046797085, |
| "grad_norm": 0.04345703125, |
| "learning_rate": 4.551465021819168e-05, |
| "logits/chosen": -6.751245021820068, |
| "logits/rejected": -6.854135036468506, |
| "logps/chosen": -62.14875411987305, |
| "logps/rejected": -83.81892395019531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5632321238517761, |
| "rewards/margins": 2.1948935985565186, |
| "rewards/rejected": -2.7581255435943604, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.7230533179900268, |
| "grad_norm": 0.06591796875, |
| "learning_rate": 4.548951041744404e-05, |
| "logits/chosen": -6.735540866851807, |
| "logits/rejected": -6.806166172027588, |
| "logps/chosen": -61.762107849121094, |
| "logps/rejected": -83.3387451171875, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4712158739566803, |
| "rewards/margins": 2.1985621452331543, |
| "rewards/rejected": -2.6697781085968018, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.7249712313003452, |
| "grad_norm": 0.039306640625, |
| "learning_rate": 4.5464265572886934e-05, |
| "logits/chosen": -6.727081298828125, |
| "logits/rejected": -6.819952487945557, |
| "logps/chosen": -66.33174133300781, |
| "logps/rejected": -87.17375183105469, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7186114192008972, |
| "rewards/margins": 2.196577310562134, |
| "rewards/rejected": -2.9151885509490967, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.7268891446106636, |
| "grad_norm": 0.053955078125, |
| "learning_rate": 4.543891581027205e-05, |
| "logits/chosen": -6.7953901290893555, |
| "logits/rejected": -6.884771823883057, |
| "logps/chosen": -67.89469909667969, |
| "logps/rejected": -90.35826110839844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7310246229171753, |
| "rewards/margins": 2.199010133743286, |
| "rewards/rejected": -2.93003511428833, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.728807057920982, |
| "grad_norm": 0.05810546875, |
| "learning_rate": 4.541346125587367e-05, |
| "logits/chosen": -6.854927062988281, |
| "logits/rejected": -6.904066562652588, |
| "logps/chosen": -62.1525764465332, |
| "logps/rejected": -82.98854064941406, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.559751570224762, |
| "rewards/margins": 2.1974406242370605, |
| "rewards/rejected": -2.7571921348571777, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7307249712313003, |
| "grad_norm": 0.08740234375, |
| "learning_rate": 4.53879020364881e-05, |
| "logits/chosen": -6.737257957458496, |
| "logits/rejected": -6.838744163513184, |
| "logps/chosen": -62.46352005004883, |
| "logps/rejected": -84.47117614746094, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5287628769874573, |
| "rewards/margins": 2.199683666229248, |
| "rewards/rejected": -2.7284464836120605, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.7326428845416187, |
| "grad_norm": 0.0546875, |
| "learning_rate": 4.5362238279432966e-05, |
| "logits/chosen": -6.782439231872559, |
| "logits/rejected": -6.8341827392578125, |
| "logps/chosen": -64.24818420410156, |
| "logps/rejected": -86.03765869140625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5050908327102661, |
| "rewards/margins": 2.196857213973999, |
| "rewards/rejected": -2.7019479274749756, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.7345607978519371, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 4.533647011254668e-05, |
| "logits/chosen": -6.7711181640625, |
| "logits/rejected": -6.8788042068481445, |
| "logps/chosen": -65.85881042480469, |
| "logps/rejected": -85.33684539794922, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5805426836013794, |
| "rewards/margins": 2.1959762573242188, |
| "rewards/rejected": -2.7765188217163086, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.7364787111622555, |
| "grad_norm": 0.0576171875, |
| "learning_rate": 4.531059766418772e-05, |
| "logits/chosen": -6.808495998382568, |
| "logits/rejected": -6.879085540771484, |
| "logps/chosen": -56.28014373779297, |
| "logps/rejected": -77.42876434326172, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3688265085220337, |
| "rewards/margins": 2.1956605911254883, |
| "rewards/rejected": -2.5644869804382324, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.7383966244725738, |
| "grad_norm": 0.10986328125, |
| "learning_rate": 4.528462106323401e-05, |
| "logits/chosen": -6.75214147567749, |
| "logits/rejected": -6.826302528381348, |
| "logps/chosen": -63.860877990722656, |
| "logps/rejected": -85.62739562988281, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6718140840530396, |
| "rewards/margins": 2.194911479949951, |
| "rewards/rejected": -2.866725444793701, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.7403145377828922, |
| "grad_norm": 0.037353515625, |
| "learning_rate": 4.525854043908233e-05, |
| "logits/chosen": -6.792781829833984, |
| "logits/rejected": -6.87396240234375, |
| "logps/chosen": -64.07568359375, |
| "logps/rejected": -85.14360046386719, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6465386152267456, |
| "rewards/margins": 2.19848370552063, |
| "rewards/rejected": -2.845022439956665, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.7422324510932106, |
| "grad_norm": 0.041015625, |
| "learning_rate": 4.5232355921647566e-05, |
| "logits/chosen": -6.751712799072266, |
| "logits/rejected": -6.775839328765869, |
| "logps/chosen": -60.863189697265625, |
| "logps/rejected": -81.8580093383789, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4481169283390045, |
| "rewards/margins": 2.1981563568115234, |
| "rewards/rejected": -2.646273374557495, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.744150364403529, |
| "grad_norm": 0.050537109375, |
| "learning_rate": 4.520606764136218e-05, |
| "logits/chosen": -6.766876220703125, |
| "logits/rejected": -6.857789039611816, |
| "logps/chosen": -68.12163543701172, |
| "logps/rejected": -89.1402587890625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6213817596435547, |
| "rewards/margins": 2.1959993839263916, |
| "rewards/rejected": -2.8173813819885254, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.7460682777138473, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 4.517967572917548e-05, |
| "logits/chosen": -6.843106269836426, |
| "logits/rejected": -6.927582740783691, |
| "logps/chosen": -60.88816452026367, |
| "logps/rejected": -83.1964111328125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5037531852722168, |
| "rewards/margins": 2.1971211433410645, |
| "rewards/rejected": -2.7008743286132812, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.7479861910241657, |
| "grad_norm": 0.0537109375, |
| "learning_rate": 4.5153180316553e-05, |
| "logits/chosen": -6.827356815338135, |
| "logits/rejected": -6.820866584777832, |
| "logps/chosen": -63.216529846191406, |
| "logps/rejected": -84.10847473144531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6285332441329956, |
| "rewards/margins": 2.1946308612823486, |
| "rewards/rejected": -2.823164224624634, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7499041043344841, |
| "grad_norm": 0.048583984375, |
| "learning_rate": 4.5126581535475836e-05, |
| "logits/chosen": -6.798864841461182, |
| "logits/rejected": -6.872601509094238, |
| "logps/chosen": -66.13003540039062, |
| "logps/rejected": -87.56874084472656, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.689039409160614, |
| "rewards/margins": 2.1969897747039795, |
| "rewards/rejected": -2.8860294818878174, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.7518220176448025, |
| "grad_norm": 0.056396484375, |
| "learning_rate": 4.5099879518439994e-05, |
| "logits/chosen": -6.668278694152832, |
| "logits/rejected": -6.7603960037231445, |
| "logps/chosen": -64.11713409423828, |
| "logps/rejected": -84.96097564697266, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5732687711715698, |
| "rewards/margins": 2.199057102203369, |
| "rewards/rejected": -2.7723259925842285, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7537399309551208, |
| "grad_norm": 0.0419921875, |
| "learning_rate": 4.5073074398455726e-05, |
| "logits/chosen": -6.860171318054199, |
| "logits/rejected": -6.9390549659729, |
| "logps/chosen": -59.60344696044922, |
| "logps/rejected": -80.41170501708984, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4485054910182953, |
| "rewards/margins": 2.1985862255096436, |
| "rewards/rejected": -2.6470913887023926, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.7556578442654392, |
| "grad_norm": 0.05224609375, |
| "learning_rate": 4.504616630904687e-05, |
| "logits/chosen": -6.770961761474609, |
| "logits/rejected": -6.811574459075928, |
| "logps/chosen": -58.90739822387695, |
| "logps/rejected": -80.26576232910156, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.44480863213539124, |
| "rewards/margins": 2.1979799270629883, |
| "rewards/rejected": -2.6427886486053467, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.7575757575757576, |
| "grad_norm": 0.061767578125, |
| "learning_rate": 4.5019155384250175e-05, |
| "logits/chosen": -6.8337860107421875, |
| "logits/rejected": -6.892735481262207, |
| "logps/chosen": -63.18619918823242, |
| "logps/rejected": -83.08711242675781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5215980410575867, |
| "rewards/margins": 2.1953835487365723, |
| "rewards/rejected": -2.7169814109802246, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.759493670886076, |
| "grad_norm": 0.044677734375, |
| "learning_rate": 4.4992041758614665e-05, |
| "logits/chosen": -6.797111511230469, |
| "logits/rejected": -6.9238691329956055, |
| "logps/chosen": -61.52330780029297, |
| "logps/rejected": -82.0053939819336, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4695897698402405, |
| "rewards/margins": 2.1961278915405273, |
| "rewards/rejected": -2.665717840194702, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7614115841963943, |
| "grad_norm": 0.060791015625, |
| "learning_rate": 4.4964825567200924e-05, |
| "logits/chosen": -6.764313697814941, |
| "logits/rejected": -6.81424617767334, |
| "logps/chosen": -63.5527229309082, |
| "logps/rejected": -84.34095764160156, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6415268778800964, |
| "rewards/margins": 2.197373867034912, |
| "rewards/rejected": -2.838900566101074, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.7633294975067128, |
| "grad_norm": 0.05419921875, |
| "learning_rate": 4.493750694558045e-05, |
| "logits/chosen": -6.801182746887207, |
| "logits/rejected": -6.829155921936035, |
| "logps/chosen": -60.45806121826172, |
| "logps/rejected": -80.76268005371094, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5038140416145325, |
| "rewards/margins": 2.197751522064209, |
| "rewards/rejected": -2.7015655040740967, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.7652474108170311, |
| "grad_norm": 0.1923828125, |
| "learning_rate": 4.4910086029834964e-05, |
| "logits/chosen": -6.829585075378418, |
| "logits/rejected": -6.883375644683838, |
| "logps/chosen": -64.5816879272461, |
| "logps/rejected": -86.17229461669922, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6635004878044128, |
| "rewards/margins": 2.1997756958007812, |
| "rewards/rejected": -2.863276243209839, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.7671653241273494, |
| "grad_norm": 0.038818359375, |
| "learning_rate": 4.488256295655578e-05, |
| "logits/chosen": -6.76079797744751, |
| "logits/rejected": -6.796362400054932, |
| "logps/chosen": -62.818328857421875, |
| "logps/rejected": -83.43183898925781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5418844223022461, |
| "rewards/margins": 2.197444438934326, |
| "rewards/rejected": -2.7393288612365723, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7671653241273494, |
| "eval_logits/chosen": -6.664936065673828, |
| "eval_logits/rejected": -6.8608903884887695, |
| "eval_logps/chosen": -61.263816833496094, |
| "eval_logps/rejected": -84.09880065917969, |
| "eval_loss": 0.32511982321739197, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": -0.5810979604721069, |
| "eval_rewards/margins": 2.1994879245758057, |
| "eval_rewards/rejected": -2.780586004257202, |
| "eval_runtime": 5.3218, |
| "eval_samples_per_second": 37.581, |
| "eval_steps_per_second": 37.581, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7690832374376678, |
| "grad_norm": 0.072265625, |
| "learning_rate": 4.4854937862843045e-05, |
| "logits/chosen": -6.7290191650390625, |
| "logits/rejected": -6.806711673736572, |
| "logps/chosen": -64.05516052246094, |
| "logps/rejected": -85.09039306640625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5960283279418945, |
| "rewards/margins": 2.1978256702423096, |
| "rewards/rejected": -2.793853759765625, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.7710011507479861, |
| "grad_norm": 0.11181640625, |
| "learning_rate": 4.482721088630511e-05, |
| "logits/chosen": -6.806267738342285, |
| "logits/rejected": -6.852849006652832, |
| "logps/chosen": -64.03093719482422, |
| "logps/rejected": -84.20830535888672, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5489016771316528, |
| "rewards/margins": 2.1934478282928467, |
| "rewards/rejected": -2.742349863052368, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.7729190640583046, |
| "grad_norm": 0.0419921875, |
| "learning_rate": 4.479938216505786e-05, |
| "logits/chosen": -6.806654930114746, |
| "logits/rejected": -6.920736789703369, |
| "logps/chosen": -63.2310791015625, |
| "logps/rejected": -85.70663452148438, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5934334993362427, |
| "rewards/margins": 2.1975131034851074, |
| "rewards/rejected": -2.7909464836120605, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.7748369773686229, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 4.477145183772396e-05, |
| "logits/chosen": -6.734241485595703, |
| "logits/rejected": -6.792110443115234, |
| "logps/chosen": -64.88965606689453, |
| "logps/rejected": -85.61289978027344, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6298405528068542, |
| "rewards/margins": 2.1967482566833496, |
| "rewards/rejected": -2.8265888690948486, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7767548906789413, |
| "grad_norm": 0.06494140625, |
| "learning_rate": 4.4743420043432235e-05, |
| "logits/chosen": -6.755318641662598, |
| "logits/rejected": -6.848862648010254, |
| "logps/chosen": -61.517234802246094, |
| "logps/rejected": -82.68122100830078, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5705845355987549, |
| "rewards/margins": 2.196594715118408, |
| "rewards/rejected": -2.767179012298584, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.7786728039892596, |
| "grad_norm": 0.0556640625, |
| "learning_rate": 4.471528692181692e-05, |
| "logits/chosen": -6.737010955810547, |
| "logits/rejected": -6.783226013183594, |
| "logps/chosen": -63.67380905151367, |
| "logps/rejected": -85.3752212524414, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5692884922027588, |
| "rewards/margins": 2.198531150817871, |
| "rewards/rejected": -2.767819881439209, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.7805907172995781, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 4.468705261301701e-05, |
| "logits/chosen": -6.7614288330078125, |
| "logits/rejected": -6.859000205993652, |
| "logps/chosen": -62.118080139160156, |
| "logps/rejected": -83.3497085571289, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4895971417427063, |
| "rewards/margins": 2.193037509918213, |
| "rewards/rejected": -2.6826348304748535, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.7825086306098964, |
| "grad_norm": 0.048828125, |
| "learning_rate": 4.465871725767552e-05, |
| "logits/chosen": -6.761736869812012, |
| "logits/rejected": -6.77576208114624, |
| "logps/chosen": -64.80572509765625, |
| "logps/rejected": -84.99140930175781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.636228084564209, |
| "rewards/margins": 2.1979706287384033, |
| "rewards/rejected": -2.8341987133026123, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.7844265439202148, |
| "grad_norm": 0.046875, |
| "learning_rate": 4.4630280996938836e-05, |
| "logits/chosen": -6.707512855529785, |
| "logits/rejected": -6.794578552246094, |
| "logps/chosen": -59.2886848449707, |
| "logps/rejected": -80.18730163574219, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4784001410007477, |
| "rewards/margins": 2.1976962089538574, |
| "rewards/rejected": -2.6760964393615723, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.7863444572305331, |
| "grad_norm": 0.055419921875, |
| "learning_rate": 4.4601743972455956e-05, |
| "logits/chosen": -6.743660926818848, |
| "logits/rejected": -6.8471574783325195, |
| "logps/chosen": -64.73351287841797, |
| "logps/rejected": -85.35092163085938, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6120368242263794, |
| "rewards/margins": 2.1981964111328125, |
| "rewards/rejected": -2.8102333545684814, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.7882623705408516, |
| "grad_norm": 0.057861328125, |
| "learning_rate": 4.457310632637782e-05, |
| "logits/chosen": -6.833253383636475, |
| "logits/rejected": -6.930048942565918, |
| "logps/chosen": -62.33038330078125, |
| "logps/rejected": -84.3667984008789, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5810059309005737, |
| "rewards/margins": 2.196547031402588, |
| "rewards/rejected": -2.777553081512451, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.7901802838511699, |
| "grad_norm": 0.05126953125, |
| "learning_rate": 4.45443682013566e-05, |
| "logits/chosen": -6.695115089416504, |
| "logits/rejected": -6.742805480957031, |
| "logps/chosen": -64.67724609375, |
| "logps/rejected": -85.6129150390625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6756724715232849, |
| "rewards/margins": 2.1990294456481934, |
| "rewards/rejected": -2.874701738357544, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.7920981971614883, |
| "grad_norm": 0.0458984375, |
| "learning_rate": 4.4515529740544965e-05, |
| "logits/chosen": -6.786559104919434, |
| "logits/rejected": -6.889459133148193, |
| "logps/chosen": -65.39054870605469, |
| "logps/rejected": -87.1576156616211, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6647933125495911, |
| "rewards/margins": 2.194797992706299, |
| "rewards/rejected": -2.859591245651245, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.7940161104718066, |
| "grad_norm": 0.04638671875, |
| "learning_rate": 4.44865910875954e-05, |
| "logits/chosen": -6.708530426025391, |
| "logits/rejected": -6.796938419342041, |
| "logps/chosen": -63.345420837402344, |
| "logps/rejected": -83.50309753417969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5706071257591248, |
| "rewards/margins": 2.196532726287842, |
| "rewards/rejected": -2.7671401500701904, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.7959340237821251, |
| "grad_norm": 0.058837890625, |
| "learning_rate": 4.445755238665947e-05, |
| "logits/chosen": -6.803549289703369, |
| "logits/rejected": -6.857290744781494, |
| "logps/chosen": -65.19212341308594, |
| "logps/rejected": -87.05712127685547, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6691518425941467, |
| "rewards/margins": 2.1989452838897705, |
| "rewards/rejected": -2.8680973052978516, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.7978519370924434, |
| "grad_norm": 0.053955078125, |
| "learning_rate": 4.442841378238711e-05, |
| "logits/chosen": -6.7612199783325195, |
| "logits/rejected": -6.839663028717041, |
| "logps/chosen": -62.892539978027344, |
| "logps/rejected": -84.74234771728516, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6525223851203918, |
| "rewards/margins": 2.1974222660064697, |
| "rewards/rejected": -2.849944591522217, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.7997698504027618, |
| "grad_norm": 0.047119140625, |
| "learning_rate": 4.4399175419925886e-05, |
| "logits/chosen": -6.800496578216553, |
| "logits/rejected": -6.868733882904053, |
| "logps/chosen": -64.82361602783203, |
| "logps/rejected": -86.96720123291016, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.632161021232605, |
| "rewards/margins": 2.1986496448516846, |
| "rewards/rejected": -2.830810308456421, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.8016877637130801, |
| "grad_norm": 0.046630859375, |
| "learning_rate": 4.43698374449203e-05, |
| "logits/chosen": -6.7268853187561035, |
| "logits/rejected": -6.8085432052612305, |
| "logps/chosen": -62.931060791015625, |
| "logps/rejected": -84.10462951660156, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5087457895278931, |
| "rewards/margins": 2.196925640106201, |
| "rewards/rejected": -2.705671548843384, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.8036056770233986, |
| "grad_norm": 0.05859375, |
| "learning_rate": 4.4340400003511073e-05, |
| "logits/chosen": -6.776535987854004, |
| "logits/rejected": -6.851083278656006, |
| "logps/chosen": -62.702186584472656, |
| "logps/rejected": -83.18672943115234, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.538625955581665, |
| "rewards/margins": 2.1954431533813477, |
| "rewards/rejected": -2.7340688705444336, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.8055235903337169, |
| "grad_norm": 0.059814453125, |
| "learning_rate": 4.431086324233436e-05, |
| "logits/chosen": -6.781048774719238, |
| "logits/rejected": -6.8217363357543945, |
| "logps/chosen": -65.83214569091797, |
| "logps/rejected": -86.54866790771484, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.567939281463623, |
| "rewards/margins": 2.200836181640625, |
| "rewards/rejected": -2.768775463104248, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.8074415036440353, |
| "grad_norm": 0.04736328125, |
| "learning_rate": 4.4281227308521064e-05, |
| "logits/chosen": -6.783602714538574, |
| "logits/rejected": -6.803304195404053, |
| "logps/chosen": -65.60836029052734, |
| "logps/rejected": -87.5150146484375, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7341927289962769, |
| "rewards/margins": 2.195850372314453, |
| "rewards/rejected": -2.9300434589385986, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.8093594169543536, |
| "grad_norm": 0.07080078125, |
| "learning_rate": 4.4251492349696115e-05, |
| "logits/chosen": -6.827857971191406, |
| "logits/rejected": -6.948951721191406, |
| "logps/chosen": -60.68585968017578, |
| "logps/rejected": -81.73527526855469, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.37252140045166016, |
| "rewards/margins": 2.194404125213623, |
| "rewards/rejected": -2.5669257640838623, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.8112773302646721, |
| "grad_norm": 0.09326171875, |
| "learning_rate": 4.42216585139777e-05, |
| "logits/chosen": -6.7736992835998535, |
| "logits/rejected": -6.819214820861816, |
| "logps/chosen": -62.67510986328125, |
| "logps/rejected": -84.61715698242188, |
| "loss": 0.3261, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5392805933952332, |
| "rewards/margins": 2.1884799003601074, |
| "rewards/rejected": -2.7277605533599854, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.8131952435749904, |
| "grad_norm": 0.08251953125, |
| "learning_rate": 4.419172594997653e-05, |
| "logits/chosen": -6.801031589508057, |
| "logits/rejected": -6.857367515563965, |
| "logps/chosen": -62.54511260986328, |
| "logps/rejected": -83.21441650390625, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4863591194152832, |
| "rewards/margins": 2.1975858211517334, |
| "rewards/rejected": -2.6839447021484375, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.8151131568853088, |
| "grad_norm": 0.099609375, |
| "learning_rate": 4.4161694806795126e-05, |
| "logits/chosen": -6.776806831359863, |
| "logits/rejected": -6.829167366027832, |
| "logps/chosen": -62.74772262573242, |
| "logps/rejected": -84.97938537597656, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6619436740875244, |
| "rewards/margins": 2.1976840496063232, |
| "rewards/rejected": -2.8596279621124268, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.8170310701956272, |
| "grad_norm": 0.103515625, |
| "learning_rate": 4.413156523402706e-05, |
| "logits/chosen": -6.788256645202637, |
| "logits/rejected": -6.827269077301025, |
| "logps/chosen": -64.37828063964844, |
| "logps/rejected": -85.09651184082031, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6709007620811462, |
| "rewards/margins": 2.2003588676452637, |
| "rewards/rejected": -2.8712592124938965, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.8189489835059456, |
| "grad_norm": 0.0888671875, |
| "learning_rate": 4.410133738175618e-05, |
| "logits/chosen": -6.746880531311035, |
| "logits/rejected": -6.855155944824219, |
| "logps/chosen": -60.25474166870117, |
| "logps/rejected": -82.25382995605469, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5617008209228516, |
| "rewards/margins": 2.1955270767211914, |
| "rewards/rejected": -2.757227897644043, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.8208668968162639, |
| "grad_norm": 0.080078125, |
| "learning_rate": 4.407101140055594e-05, |
| "logits/chosen": -6.7756781578063965, |
| "logits/rejected": -6.8511552810668945, |
| "logps/chosen": -63.31382369995117, |
| "logps/rejected": -84.40943908691406, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5347599387168884, |
| "rewards/margins": 2.194408655166626, |
| "rewards/rejected": -2.729168653488159, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.8227848101265823, |
| "grad_norm": 0.0732421875, |
| "learning_rate": 4.4040587441488566e-05, |
| "logits/chosen": -6.786390781402588, |
| "logits/rejected": -6.8126220703125, |
| "logps/chosen": -64.05632019042969, |
| "logps/rejected": -86.46208190917969, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6747733354568481, |
| "rewards/margins": 2.1963019371032715, |
| "rewards/rejected": -2.87107515335083, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.8247027234369007, |
| "grad_norm": 0.076171875, |
| "learning_rate": 4.401006565610436e-05, |
| "logits/chosen": -6.741326808929443, |
| "logits/rejected": -6.817544460296631, |
| "logps/chosen": -61.1034049987793, |
| "logps/rejected": -82.86426544189453, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6038416028022766, |
| "rewards/margins": 2.2050437927246094, |
| "rewards/rejected": -2.8088855743408203, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.826620636747219, |
| "grad_norm": 0.10888671875, |
| "learning_rate": 4.397944619644089e-05, |
| "logits/chosen": -6.805345058441162, |
| "logits/rejected": -6.8814568519592285, |
| "logps/chosen": -62.960472106933594, |
| "logps/rejected": -84.38675689697266, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5378700494766235, |
| "rewards/margins": 2.198153495788574, |
| "rewards/rejected": -2.7360236644744873, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.8285385500575374, |
| "grad_norm": 0.07373046875, |
| "learning_rate": 4.394872921502232e-05, |
| "logits/chosen": -6.7272233963012695, |
| "logits/rejected": -6.84502649307251, |
| "logps/chosen": -64.93167114257812, |
| "logps/rejected": -87.38648223876953, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5903478264808655, |
| "rewards/margins": 2.1909964084625244, |
| "rewards/rejected": -2.781344175338745, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.8304564633678557, |
| "grad_norm": 0.10498046875, |
| "learning_rate": 4.3917914864858546e-05, |
| "logits/chosen": -6.765233516693115, |
| "logits/rejected": -6.857439994812012, |
| "logps/chosen": -61.6948127746582, |
| "logps/rejected": -82.59429931640625, |
| "loss": 0.3284, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.46801313757896423, |
| "rewards/margins": 2.223072052001953, |
| "rewards/rejected": -2.6910855770111084, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.8323743766781742, |
| "grad_norm": 0.1083984375, |
| "learning_rate": 4.388700329944453e-05, |
| "logits/chosen": -6.72791051864624, |
| "logits/rejected": -6.8128252029418945, |
| "logps/chosen": -60.26092529296875, |
| "logps/rejected": -81.54100036621094, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4659239649772644, |
| "rewards/margins": 2.1824138164520264, |
| "rewards/rejected": -2.6483378410339355, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.8342922899884925, |
| "grad_norm": 0.10986328125, |
| "learning_rate": 4.385599467275944e-05, |
| "logits/chosen": -6.779816627502441, |
| "logits/rejected": -6.829860687255859, |
| "logps/chosen": -60.439300537109375, |
| "logps/rejected": -81.71858215332031, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.546042799949646, |
| "rewards/margins": 2.1965088844299316, |
| "rewards/rejected": -2.742551803588867, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.8362102032988109, |
| "grad_norm": 0.2451171875, |
| "learning_rate": 4.3824889139265984e-05, |
| "logits/chosen": -6.751175880432129, |
| "logits/rejected": -6.8204522132873535, |
| "logps/chosen": -64.71672058105469, |
| "logps/rejected": -84.95536804199219, |
| "loss": 0.3256, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5461716651916504, |
| "rewards/margins": 2.2114083766937256, |
| "rewards/rejected": -2.757579803466797, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8381281166091292, |
| "grad_norm": 0.1669921875, |
| "learning_rate": 4.3793686853909556e-05, |
| "logits/chosen": -6.78512716293335, |
| "logits/rejected": -6.888462066650391, |
| "logps/chosen": -63.5004997253418, |
| "logps/rejected": -83.88385009765625, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6083672642707825, |
| "rewards/margins": 2.203294038772583, |
| "rewards/rejected": -2.8116612434387207, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.8400460299194477, |
| "grad_norm": 0.10693359375, |
| "learning_rate": 4.376238797211751e-05, |
| "logits/chosen": -6.882073879241943, |
| "logits/rejected": -6.960659980773926, |
| "logps/chosen": -62.78485107421875, |
| "logps/rejected": -84.0, |
| "loss": 0.3258, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5518940687179565, |
| "rewards/margins": 2.181422472000122, |
| "rewards/rejected": -2.7333168983459473, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.841963943229766, |
| "grad_norm": 0.10302734375, |
| "learning_rate": 4.373099264979839e-05, |
| "logits/chosen": -6.732028961181641, |
| "logits/rejected": -6.831223487854004, |
| "logps/chosen": -65.45835876464844, |
| "logps/rejected": -87.10944366455078, |
| "loss": 0.3254, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7259451150894165, |
| "rewards/margins": 2.1964221000671387, |
| "rewards/rejected": -2.9223673343658447, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.8438818565400844, |
| "grad_norm": 0.201171875, |
| "learning_rate": 4.36995010433411e-05, |
| "logits/chosen": -6.707369804382324, |
| "logits/rejected": -6.776480197906494, |
| "logps/chosen": -67.71757507324219, |
| "logps/rejected": -89.28482055664062, |
| "loss": 0.326, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7898483276367188, |
| "rewards/margins": 2.1907553672790527, |
| "rewards/rejected": -2.9806036949157715, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8457997698504027, |
| "grad_norm": 0.2734375, |
| "learning_rate": 4.366791330961419e-05, |
| "logits/chosen": -6.8558478355407715, |
| "logits/rejected": -6.885350704193115, |
| "logps/chosen": -66.4366226196289, |
| "logps/rejected": -85.9381103515625, |
| "loss": 0.326, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.614000141620636, |
| "rewards/margins": 2.194479465484619, |
| "rewards/rejected": -2.8084795475006104, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.8477176831607212, |
| "grad_norm": 0.2197265625, |
| "learning_rate": 4.3636229605965046e-05, |
| "logits/chosen": -6.758575439453125, |
| "logits/rejected": -6.804045677185059, |
| "logps/chosen": -66.45049285888672, |
| "logps/rejected": -88.2242202758789, |
| "loss": 0.326, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.743838906288147, |
| "rewards/margins": 2.1854729652404785, |
| "rewards/rejected": -2.929311752319336, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.8496355964710395, |
| "grad_norm": 0.21484375, |
| "learning_rate": 4.3604450090219094e-05, |
| "logits/chosen": -6.822757720947266, |
| "logits/rejected": -6.8710737228393555, |
| "logps/chosen": -63.0651741027832, |
| "logps/rejected": -84.55931854248047, |
| "loss": 0.3277, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5265265703201294, |
| "rewards/margins": 2.20011043548584, |
| "rewards/rejected": -2.7266366481781006, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.8515535097813579, |
| "grad_norm": 0.62890625, |
| "learning_rate": 4.357257492067904e-05, |
| "logits/chosen": -6.8631696701049805, |
| "logits/rejected": -6.924670219421387, |
| "logps/chosen": -58.963844299316406, |
| "logps/rejected": -80.02134704589844, |
| "loss": 0.3317, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4552484154701233, |
| "rewards/margins": 2.2213027477264404, |
| "rewards/rejected": -2.676551103591919, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.8534714230916762, |
| "grad_norm": 0.82421875, |
| "learning_rate": 4.3540604256124066e-05, |
| "logits/chosen": -6.800238132476807, |
| "logits/rejected": -6.881220817565918, |
| "logps/chosen": -60.06099319458008, |
| "logps/rejected": -81.65616607666016, |
| "loss": 0.3294, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4933759570121765, |
| "rewards/margins": 2.1853110790252686, |
| "rewards/rejected": -2.67868709564209, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.8553893364019947, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.3508538255809035e-05, |
| "logits/chosen": -6.7798614501953125, |
| "logits/rejected": -6.826601982116699, |
| "logps/chosen": -61.747901916503906, |
| "logps/rejected": -83.8855209350586, |
| "loss": 0.3466, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -0.39245396852493286, |
| "rewards/margins": 2.295319080352783, |
| "rewards/rejected": -2.687772750854492, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.857307249712313, |
| "grad_norm": 5.9375, |
| "learning_rate": 4.3476377079463705e-05, |
| "logits/chosen": -6.831582546234131, |
| "logits/rejected": -6.855175971984863, |
| "logps/chosen": -62.176368713378906, |
| "logps/rejected": -81.49797058105469, |
| "loss": 0.3641, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -0.527076005935669, |
| "rewards/margins": 2.0065996646881104, |
| "rewards/rejected": -2.5336756706237793, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.8592251630226314, |
| "grad_norm": 2.734375, |
| "learning_rate": 4.3444120887291936e-05, |
| "logits/chosen": -6.75543737411499, |
| "logits/rejected": -6.856991767883301, |
| "logps/chosen": -60.326416015625, |
| "logps/rejected": -79.26715850830078, |
| "loss": 0.3855, |
| "rewards/accuracies": 0.965624988079071, |
| "rewards/chosen": -0.3984132707118988, |
| "rewards/margins": 1.9175384044647217, |
| "rewards/rejected": -2.3159518241882324, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.8611430763329497, |
| "grad_norm": 2.140625, |
| "learning_rate": 4.34117698399709e-05, |
| "logits/chosen": -6.790041446685791, |
| "logits/rejected": -6.8845930099487305, |
| "logps/chosen": -57.28984832763672, |
| "logps/rejected": -74.63005065917969, |
| "loss": 0.3935, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.3596039116382599, |
| "rewards/margins": 1.7848408222198486, |
| "rewards/rejected": -2.144444704055786, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.8630609896432682, |
| "grad_norm": 5.5625, |
| "learning_rate": 4.337932409865023e-05, |
| "logits/chosen": -6.925728797912598, |
| "logits/rejected": -6.95987606048584, |
| "logps/chosen": -62.9880256652832, |
| "logps/rejected": -80.00062561035156, |
| "loss": 0.4107, |
| "rewards/accuracies": 0.9312499761581421, |
| "rewards/chosen": -0.46328768134117126, |
| "rewards/margins": 1.8457485437393188, |
| "rewards/rejected": -2.3090367317199707, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.8649789029535865, |
| "grad_norm": 2.34375, |
| "learning_rate": 4.33467838249513e-05, |
| "logits/chosen": -6.822978973388672, |
| "logits/rejected": -6.83398962020874, |
| "logps/chosen": -60.968116760253906, |
| "logps/rejected": -78.74832916259766, |
| "loss": 0.3894, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.4434930682182312, |
| "rewards/margins": 1.8895623683929443, |
| "rewards/rejected": -2.3330554962158203, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.8668968162639049, |
| "grad_norm": 2.203125, |
| "learning_rate": 4.331414918096637e-05, |
| "logits/chosen": -6.800551414489746, |
| "logits/rejected": -6.828469753265381, |
| "logps/chosen": -62.655357360839844, |
| "logps/rejected": -83.25059509277344, |
| "loss": 0.36, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.5360097885131836, |
| "rewards/margins": 2.1591033935546875, |
| "rewards/rejected": -2.695112943649292, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.8688147295742232, |
| "grad_norm": 3.328125, |
| "learning_rate": 4.328142032925777e-05, |
| "logits/chosen": -6.843760013580322, |
| "logits/rejected": -6.8942742347717285, |
| "logps/chosen": -60.87250900268555, |
| "logps/rejected": -80.40345764160156, |
| "loss": 0.379, |
| "rewards/accuracies": 0.965624988079071, |
| "rewards/chosen": -0.45352882146835327, |
| "rewards/margins": 1.9877502918243408, |
| "rewards/rejected": -2.441279172897339, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.8707326428845417, |
| "grad_norm": 1.34375, |
| "learning_rate": 4.32485974328571e-05, |
| "logits/chosen": -6.862430572509766, |
| "logits/rejected": -6.91161584854126, |
| "logps/chosen": -60.5975341796875, |
| "logps/rejected": -78.65064239501953, |
| "loss": 0.3711, |
| "rewards/accuracies": 0.9781249761581421, |
| "rewards/chosen": -0.32780012488365173, |
| "rewards/margins": 1.910913109779358, |
| "rewards/rejected": -2.238713264465332, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.87265055619486, |
| "grad_norm": 7.03125, |
| "learning_rate": 4.3215680655264436e-05, |
| "logits/chosen": -6.953143119812012, |
| "logits/rejected": -6.958700656890869, |
| "logps/chosen": -60.2089958190918, |
| "logps/rejected": -80.94921112060547, |
| "loss": 0.3767, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -0.4435577392578125, |
| "rewards/margins": 2.0195579528808594, |
| "rewards/rejected": -2.463115692138672, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.8745684695051784, |
| "grad_norm": 2.375, |
| "learning_rate": 4.3182670160447495e-05, |
| "logits/chosen": -6.874422550201416, |
| "logits/rejected": -6.977646827697754, |
| "logps/chosen": -58.399261474609375, |
| "logps/rejected": -80.04866027832031, |
| "loss": 0.353, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.26949343085289, |
| "rewards/margins": 2.098801851272583, |
| "rewards/rejected": -2.368295192718506, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.8764863828154967, |
| "grad_norm": 3.828125, |
| "learning_rate": 4.314956611284084e-05, |
| "logits/chosen": -6.972007751464844, |
| "logits/rejected": -7.0395026206970215, |
| "logps/chosen": -61.51708984375, |
| "logps/rejected": -81.02632141113281, |
| "loss": 0.3673, |
| "rewards/accuracies": 0.965624988079071, |
| "rewards/chosen": -0.26689666509628296, |
| "rewards/margins": 2.0594050884246826, |
| "rewards/rejected": -2.3263020515441895, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.8784042961258152, |
| "grad_norm": 1.796875, |
| "learning_rate": 4.311636867734503e-05, |
| "logits/chosen": -6.955082893371582, |
| "logits/rejected": -7.046578407287598, |
| "logps/chosen": -58.28876876831055, |
| "logps/rejected": -81.63599395751953, |
| "loss": 0.3619, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -0.30293378233909607, |
| "rewards/margins": 2.241940498352051, |
| "rewards/rejected": -2.544874429702759, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.8803222094361335, |
| "grad_norm": 1.3125, |
| "learning_rate": 4.308307801932584e-05, |
| "logits/chosen": -6.8621649742126465, |
| "logits/rejected": -6.930577754974365, |
| "logps/chosen": -61.332130432128906, |
| "logps/rejected": -82.65200805664062, |
| "loss": 0.3517, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -0.26416295766830444, |
| "rewards/margins": 2.246364116668701, |
| "rewards/rejected": -2.5105273723602295, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.8822401227464519, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.304969430461337e-05, |
| "logits/chosen": -6.878881931304932, |
| "logits/rejected": -6.98480224609375, |
| "logps/chosen": -57.89545822143555, |
| "logps/rejected": -77.03311920166016, |
| "loss": 0.3547, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -0.03846656531095505, |
| "rewards/margins": 2.111886739730835, |
| "rewards/rejected": -2.1503536701202393, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8841580360567702, |
| "grad_norm": 0.8125, |
| "learning_rate": 4.301621769950129e-05, |
| "logits/chosen": -6.871652126312256, |
| "logits/rejected": -6.941521644592285, |
| "logps/chosen": -52.32462692260742, |
| "logps/rejected": -73.90729522705078, |
| "loss": 0.3493, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -0.026116441935300827, |
| "rewards/margins": 2.0886855125427246, |
| "rewards/rejected": -2.1148018836975098, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.8860759493670886, |
| "grad_norm": 0.5703125, |
| "learning_rate": 4.2982648370746005e-05, |
| "logits/chosen": -6.835605621337891, |
| "logits/rejected": -6.929086208343506, |
| "logps/chosen": -63.22246170043945, |
| "logps/rejected": -83.64646911621094, |
| "loss": 0.3502, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -0.3698285222053528, |
| "rewards/margins": 2.067920446395874, |
| "rewards/rejected": -2.437749147415161, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.887993862677407, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.294898648556575e-05, |
| "logits/chosen": -6.868348598480225, |
| "logits/rejected": -6.924635887145996, |
| "logps/chosen": -60.72758865356445, |
| "logps/rejected": -82.21714782714844, |
| "loss": 0.3501, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -0.34204745292663574, |
| "rewards/margins": 2.1199233531951904, |
| "rewards/rejected": -2.4619712829589844, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.8899117759877253, |
| "grad_norm": 1.140625, |
| "learning_rate": 4.2915232211639855e-05, |
| "logits/chosen": -6.862878322601318, |
| "logits/rejected": -6.937772274017334, |
| "logps/chosen": -57.07465744018555, |
| "logps/rejected": -78.38792419433594, |
| "loss": 0.3525, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -0.14980041980743408, |
| "rewards/margins": 2.179995059967041, |
| "rewards/rejected": -2.3297953605651855, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.8918296892980437, |
| "grad_norm": 1.265625, |
| "learning_rate": 4.288138571710783e-05, |
| "logits/chosen": -6.880101680755615, |
| "logits/rejected": -6.914247989654541, |
| "logps/chosen": -59.02347946166992, |
| "logps/rejected": -79.57148742675781, |
| "loss": 0.3485, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -0.11769998073577881, |
| "rewards/margins": 2.1596789360046387, |
| "rewards/rejected": -2.277379035949707, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.8937476026083621, |
| "grad_norm": 1.5234375, |
| "learning_rate": 4.2847447170568584e-05, |
| "logits/chosen": -6.755918979644775, |
| "logits/rejected": -6.846758842468262, |
| "logps/chosen": -57.86151123046875, |
| "logps/rejected": -79.17916870117188, |
| "loss": 0.3405, |
| "rewards/accuracies": 0.9937499761581421, |
| "rewards/chosen": -0.14960043132305145, |
| "rewards/margins": 2.1391072273254395, |
| "rewards/rejected": -2.2887072563171387, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.8956655159186805, |
| "grad_norm": 0.6953125, |
| "learning_rate": 4.2813416741079564e-05, |
| "logits/chosen": -6.889958381652832, |
| "logits/rejected": -6.950301170349121, |
| "logps/chosen": -58.084312438964844, |
| "logps/rejected": -78.97642517089844, |
| "loss": 0.3407, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.14899101853370667, |
| "rewards/margins": 2.1399528980255127, |
| "rewards/rejected": -2.2889437675476074, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.8975834292289988, |
| "grad_norm": 0.62890625, |
| "learning_rate": 4.27792945981559e-05, |
| "logits/chosen": -6.840428829193115, |
| "logits/rejected": -6.967151641845703, |
| "logps/chosen": -56.54399490356445, |
| "logps/rejected": -76.96357727050781, |
| "loss": 0.3386, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": 0.008828367106616497, |
| "rewards/margins": 2.1768367290496826, |
| "rewards/rejected": -2.168008327484131, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.8995013425393172, |
| "grad_norm": 0.48046875, |
| "learning_rate": 4.2745080911769565e-05, |
| "logits/chosen": -6.883180141448975, |
| "logits/rejected": -6.966970920562744, |
| "logps/chosen": -57.99214553833008, |
| "logps/rejected": -79.50528717041016, |
| "loss": 0.3409, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -0.05504138022661209, |
| "rewards/margins": 2.1709494590759277, |
| "rewards/rejected": -2.2259907722473145, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.9014192558496356, |
| "grad_norm": 0.369140625, |
| "learning_rate": 4.2710775852348576e-05, |
| "logits/chosen": -6.948625087738037, |
| "logits/rejected": -6.978739261627197, |
| "logps/chosen": -62.683006286621094, |
| "logps/rejected": -84.10035705566406, |
| "loss": 0.3405, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.11399755626916885, |
| "rewards/margins": 2.2531776428222656, |
| "rewards/rejected": -2.3671748638153076, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.903337169159954, |
| "grad_norm": 0.5703125, |
| "learning_rate": 4.267637959077606e-05, |
| "logits/chosen": -6.881336212158203, |
| "logits/rejected": -6.992560386657715, |
| "logps/chosen": -57.68507766723633, |
| "logps/rejected": -79.43367767333984, |
| "loss": 0.3344, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.08601374179124832, |
| "rewards/margins": 2.2121574878692627, |
| "rewards/rejected": -2.298171281814575, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.9052550824702723, |
| "grad_norm": 0.326171875, |
| "learning_rate": 4.264189229838945e-05, |
| "logits/chosen": -6.853945255279541, |
| "logits/rejected": -6.962986946105957, |
| "logps/chosen": -55.08637237548828, |
| "logps/rejected": -76.57861328125, |
| "loss": 0.3395, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": 0.04339681938290596, |
| "rewards/margins": 2.2207624912261963, |
| "rewards/rejected": -2.177365779876709, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.9071729957805907, |
| "grad_norm": 0.396484375, |
| "learning_rate": 4.2607314146979664e-05, |
| "logits/chosen": -6.9054412841796875, |
| "logits/rejected": -7.006344795227051, |
| "logps/chosen": -55.31730270385742, |
| "logps/rejected": -77.40642547607422, |
| "loss": 0.3399, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11814818531274796, |
| "rewards/margins": 2.2816827297210693, |
| "rewards/rejected": -2.16353440284729, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 0.59765625, |
| "learning_rate": 4.257264530879019e-05, |
| "logits/chosen": -6.856900691986084, |
| "logits/rejected": -6.938775539398193, |
| "logps/chosen": -55.32623291015625, |
| "logps/rejected": -76.9993667602539, |
| "loss": 0.3337, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.012098994106054306, |
| "rewards/margins": 2.1912758350372314, |
| "rewards/rejected": -2.1791763305664062, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.9110088224012275, |
| "grad_norm": 0.51953125, |
| "learning_rate": 4.253788595651624e-05, |
| "logits/chosen": -6.835860252380371, |
| "logits/rejected": -6.922590732574463, |
| "logps/chosen": -56.22468185424805, |
| "logps/rejected": -78.07466125488281, |
| "loss": 0.3349, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08803171664476395, |
| "rewards/margins": 2.245025634765625, |
| "rewards/rejected": -2.156993865966797, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.9129267357115458, |
| "grad_norm": 0.29296875, |
| "learning_rate": 4.250303626330394e-05, |
| "logits/chosen": -6.897280693054199, |
| "logits/rejected": -6.991764068603516, |
| "logps/chosen": -54.25004959106445, |
| "logps/rejected": -75.50594329833984, |
| "loss": 0.3321, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08122767508029938, |
| "rewards/margins": 2.2148044109344482, |
| "rewards/rejected": -2.1335768699645996, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.9148446490218642, |
| "grad_norm": 0.75, |
| "learning_rate": 4.246809640274939e-05, |
| "logits/chosen": -6.8050665855407715, |
| "logits/rejected": -6.896435737609863, |
| "logps/chosen": -56.57902145385742, |
| "logps/rejected": -77.64833068847656, |
| "loss": 0.3316, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.13071151077747345, |
| "rewards/margins": 2.223146438598633, |
| "rewards/rejected": -2.0924346446990967, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.9167625623321826, |
| "grad_norm": 0.310546875, |
| "learning_rate": 4.243306654889788e-05, |
| "logits/chosen": -6.833188056945801, |
| "logits/rejected": -6.917020320892334, |
| "logps/chosen": -53.51298141479492, |
| "logps/rejected": -75.99391174316406, |
| "loss": 0.3292, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.15236929059028625, |
| "rewards/margins": 2.231903314590454, |
| "rewards/rejected": -2.0795340538024902, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.918680475642501, |
| "grad_norm": 0.14453125, |
| "learning_rate": 4.239794687624294e-05, |
| "logits/chosen": -6.868006229400635, |
| "logits/rejected": -6.944448947906494, |
| "logps/chosen": -54.134910583496094, |
| "logps/rejected": -74.07920837402344, |
| "loss": 0.3273, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1325669139623642, |
| "rewards/margins": 2.1751344203948975, |
| "rewards/rejected": -2.042567729949951, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.9205983889528193, |
| "grad_norm": 0.25, |
| "learning_rate": 4.2362737559725526e-05, |
| "logits/chosen": -6.768338203430176, |
| "logits/rejected": -6.835727691650391, |
| "logps/chosen": -55.674278259277344, |
| "logps/rejected": -76.75242614746094, |
| "loss": 0.3299, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": 0.09858144074678421, |
| "rewards/margins": 2.154266834259033, |
| "rewards/rejected": -2.05568528175354, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.9225163022631377, |
| "grad_norm": 0.2041015625, |
| "learning_rate": 4.232743877473316e-05, |
| "logits/chosen": -6.808007717132568, |
| "logits/rejected": -6.890481472015381, |
| "logps/chosen": -51.641632080078125, |
| "logps/rejected": -72.97166442871094, |
| "loss": 0.3288, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18179336190223694, |
| "rewards/margins": 2.2345938682556152, |
| "rewards/rejected": -2.0528006553649902, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.9244342155734561, |
| "grad_norm": 0.16015625, |
| "learning_rate": 4.229205069709898e-05, |
| "logits/chosen": -6.86145544052124, |
| "logits/rejected": -6.930682182312012, |
| "logps/chosen": -55.37908172607422, |
| "logps/rejected": -76.59054565429688, |
| "loss": 0.3286, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.09439722448587418, |
| "rewards/margins": 2.2271690368652344, |
| "rewards/rejected": -2.1327717304229736, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.9263521288837745, |
| "grad_norm": 0.248046875, |
| "learning_rate": 4.225657350310099e-05, |
| "logits/chosen": -6.836122989654541, |
| "logits/rejected": -6.910861015319824, |
| "logps/chosen": -59.543251037597656, |
| "logps/rejected": -80.47911071777344, |
| "loss": 0.3281, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.04849497973918915, |
| "rewards/margins": 2.2070679664611816, |
| "rewards/rejected": -2.1585729122161865, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.9282700421940928, |
| "grad_norm": 0.1591796875, |
| "learning_rate": 4.222100736946103e-05, |
| "logits/chosen": -6.886750221252441, |
| "logits/rejected": -6.970227241516113, |
| "logps/chosen": -53.348419189453125, |
| "logps/rejected": -75.61641693115234, |
| "loss": 0.3282, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17651376128196716, |
| "rewards/margins": 2.2037792205810547, |
| "rewards/rejected": -2.027265787124634, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.9301879555044112, |
| "grad_norm": 0.1220703125, |
| "learning_rate": 4.218535247334402e-05, |
| "logits/chosen": -6.816466331481934, |
| "logits/rejected": -6.905699729919434, |
| "logps/chosen": -55.218040466308594, |
| "logps/rejected": -76.34078979492188, |
| "loss": 0.3269, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1573016345500946, |
| "rewards/margins": 2.205143451690674, |
| "rewards/rejected": -2.047841787338257, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.9321058688147296, |
| "grad_norm": 0.2138671875, |
| "learning_rate": 4.2149608992357024e-05, |
| "logits/chosen": -6.868281364440918, |
| "logits/rejected": -6.949463844299316, |
| "logps/chosen": -54.75147247314453, |
| "logps/rejected": -75.82257080078125, |
| "loss": 0.3266, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17414028942584991, |
| "rewards/margins": 2.208597183227539, |
| "rewards/rejected": -2.034456729888916, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.934023782125048, |
| "grad_norm": 0.1953125, |
| "learning_rate": 4.211377710454837e-05, |
| "logits/chosen": -6.814573764801025, |
| "logits/rejected": -6.940939903259277, |
| "logps/chosen": -55.294456481933594, |
| "logps/rejected": -76.1449966430664, |
| "loss": 0.3266, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1870449334383011, |
| "rewards/margins": 2.2274434566497803, |
| "rewards/rejected": -2.040398120880127, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.9359416954353663, |
| "grad_norm": 0.201171875, |
| "learning_rate": 4.2077856988406756e-05, |
| "logits/chosen": -6.854167938232422, |
| "logits/rejected": -6.9163618087768555, |
| "logps/chosen": -52.53936767578125, |
| "logps/rejected": -75.00141906738281, |
| "loss": 0.3272, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16979984939098358, |
| "rewards/margins": 2.208627700805664, |
| "rewards/rejected": -2.038827896118164, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.9378596087456847, |
| "grad_norm": 0.1123046875, |
| "learning_rate": 4.204184882286038e-05, |
| "logits/chosen": -6.842519283294678, |
| "logits/rejected": -6.933518409729004, |
| "logps/chosen": -56.68706512451172, |
| "logps/rejected": -78.18891143798828, |
| "loss": 0.3259, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.09812651574611664, |
| "rewards/margins": 2.197338581085205, |
| "rewards/rejected": -2.099212169647217, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.9397775220560031, |
| "grad_norm": 0.0732421875, |
| "learning_rate": 4.200575278727604e-05, |
| "logits/chosen": -6.877575874328613, |
| "logits/rejected": -6.985260009765625, |
| "logps/chosen": -54.264122009277344, |
| "logps/rejected": -74.304931640625, |
| "loss": 0.3259, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12776386737823486, |
| "rewards/margins": 2.1849300861358643, |
| "rewards/rejected": -2.057166337966919, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.9416954353663214, |
| "grad_norm": 0.09326171875, |
| "learning_rate": 4.1969569061458224e-05, |
| "logits/chosen": -6.85354471206665, |
| "logits/rejected": -6.928282737731934, |
| "logps/chosen": -58.26618576049805, |
| "logps/rejected": -79.95372009277344, |
| "loss": 0.3256, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.06239093095064163, |
| "rewards/margins": 2.1833994388580322, |
| "rewards/rejected": -2.1210083961486816, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.9436133486766398, |
| "grad_norm": 0.1328125, |
| "learning_rate": 4.1933297825648244e-05, |
| "logits/chosen": -6.8315935134887695, |
| "logits/rejected": -6.913296699523926, |
| "logps/chosen": -53.44468307495117, |
| "logps/rejected": -75.22355651855469, |
| "loss": 0.3259, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1496339738368988, |
| "rewards/margins": 2.1975769996643066, |
| "rewards/rejected": -2.047943353652954, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.9455312619869581, |
| "grad_norm": 0.1435546875, |
| "learning_rate": 4.189693926052333e-05, |
| "logits/chosen": -6.85409688949585, |
| "logits/rejected": -6.940674781799316, |
| "logps/chosen": -54.494407653808594, |
| "logps/rejected": -75.82017517089844, |
| "loss": 0.3255, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.19262854754924774, |
| "rewards/margins": 2.1881494522094727, |
| "rewards/rejected": -1.995520830154419, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.9474491752972766, |
| "grad_norm": 0.07275390625, |
| "learning_rate": 4.186049354719571e-05, |
| "logits/chosen": -6.920261383056641, |
| "logits/rejected": -6.956971645355225, |
| "logps/chosen": -54.238853454589844, |
| "logps/rejected": -74.33580017089844, |
| "loss": 0.3257, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12954334914684296, |
| "rewards/margins": 2.2072174549102783, |
| "rewards/rejected": -2.077674150466919, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.9493670886075949, |
| "grad_norm": 0.1474609375, |
| "learning_rate": 4.182396086721172e-05, |
| "logits/chosen": -6.929312705993652, |
| "logits/rejected": -6.987416744232178, |
| "logps/chosen": -51.33516311645508, |
| "logps/rejected": -72.56217956542969, |
| "loss": 0.3259, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18959593772888184, |
| "rewards/margins": 2.210606098175049, |
| "rewards/rejected": -2.021010160446167, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.9512850019179133, |
| "grad_norm": 0.05224609375, |
| "learning_rate": 4.1787341402550915e-05, |
| "logits/chosen": -6.868629455566406, |
| "logits/rejected": -6.9263153076171875, |
| "logps/chosen": -56.88227462768555, |
| "logps/rejected": -78.9183578491211, |
| "loss": 0.3254, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10913822799921036, |
| "rewards/margins": 2.2045962810516357, |
| "rewards/rejected": -2.0954582691192627, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.9532029152282316, |
| "grad_norm": 0.146484375, |
| "learning_rate": 4.175063533562514e-05, |
| "logits/chosen": -6.909239292144775, |
| "logits/rejected": -6.994379997253418, |
| "logps/chosen": -57.91779708862305, |
| "logps/rejected": -78.76531219482422, |
| "loss": 0.3256, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.0988362729549408, |
| "rewards/margins": 2.2019526958465576, |
| "rewards/rejected": -2.103116273880005, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.9551208285385501, |
| "grad_norm": 0.05029296875, |
| "learning_rate": 4.1713842849277634e-05, |
| "logits/chosen": -6.838289737701416, |
| "logits/rejected": -6.935790061950684, |
| "logps/chosen": -53.85325241088867, |
| "logps/rejected": -75.92013549804688, |
| "loss": 0.3258, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17410710453987122, |
| "rewards/margins": 2.203998327255249, |
| "rewards/rejected": -2.0298912525177, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.9570387418488684, |
| "grad_norm": 0.040771484375, |
| "learning_rate": 4.1676964126782105e-05, |
| "logits/chosen": -6.864720821380615, |
| "logits/rejected": -6.89766788482666, |
| "logps/chosen": -54.05454635620117, |
| "logps/rejected": -74.34647369384766, |
| "loss": 0.3255, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16576418280601501, |
| "rewards/margins": 2.1935932636260986, |
| "rewards/rejected": -2.0278289318084717, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.9589566551591868, |
| "grad_norm": 0.06298828125, |
| "learning_rate": 4.1639999351841845e-05, |
| "logits/chosen": -6.877594947814941, |
| "logits/rejected": -6.9712982177734375, |
| "logps/chosen": -57.58259963989258, |
| "logps/rejected": -78.67808532714844, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.15820643305778503, |
| "rewards/margins": 2.194255828857422, |
| "rewards/rejected": -2.0360493659973145, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9589566551591868, |
| "eval_logits/chosen": -6.7274932861328125, |
| "eval_logits/rejected": -7.004845142364502, |
| "eval_logps/chosen": -54.04244613647461, |
| "eval_logps/rejected": -76.8889389038086, |
| "eval_loss": 0.32545679807662964, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 0.14103949069976807, |
| "eval_rewards/margins": 2.20063853263855, |
| "eval_rewards/rejected": -2.0595991611480713, |
| "eval_runtime": 5.2337, |
| "eval_samples_per_second": 38.214, |
| "eval_steps_per_second": 38.214, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9608745684695051, |
| "grad_norm": 0.062255859375, |
| "learning_rate": 4.160294870858879e-05, |
| "logits/chosen": -6.783505916595459, |
| "logits/rejected": -6.876799583435059, |
| "logps/chosen": -56.87532424926758, |
| "logps/rejected": -78.06890869140625, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17683516442775726, |
| "rewards/margins": 2.1882314682006836, |
| "rewards/rejected": -2.0113959312438965, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.9627924817798236, |
| "grad_norm": 0.0625, |
| "learning_rate": 4.15658123815826e-05, |
| "logits/chosen": -6.886815071105957, |
| "logits/rejected": -6.940008640289307, |
| "logps/chosen": -51.72332000732422, |
| "logps/rejected": -73.25819396972656, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.24257156252861023, |
| "rewards/margins": 2.1972813606262207, |
| "rewards/rejected": -1.954709768295288, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.9647103950901419, |
| "grad_norm": 0.0927734375, |
| "learning_rate": 4.152859055580976e-05, |
| "logits/chosen": -6.846029758453369, |
| "logits/rejected": -6.949192047119141, |
| "logps/chosen": -56.957305908203125, |
| "logps/rejected": -79.1841049194336, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10997702181339264, |
| "rewards/margins": 2.1889004707336426, |
| "rewards/rejected": -2.0789237022399902, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.9666283084004603, |
| "grad_norm": 0.05859375, |
| "learning_rate": 4.1491283416682646e-05, |
| "logits/chosen": -6.93172550201416, |
| "logits/rejected": -7.003387451171875, |
| "logps/chosen": -59.26605224609375, |
| "logps/rejected": -80.91238403320312, |
| "loss": 0.3254, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.0580294243991375, |
| "rewards/margins": 2.1971397399902344, |
| "rewards/rejected": -2.1391100883483887, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.9685462217107786, |
| "grad_norm": 0.0257568359375, |
| "learning_rate": 4.145389115003861e-05, |
| "logits/chosen": -6.888918399810791, |
| "logits/rejected": -6.952479362487793, |
| "logps/chosen": -55.66357421875, |
| "logps/rejected": -77.88643646240234, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.140243262052536, |
| "rewards/margins": 2.1988415718078613, |
| "rewards/rejected": -2.058598041534424, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.9704641350210971, |
| "grad_norm": 0.055908203125, |
| "learning_rate": 4.141641394213903e-05, |
| "logits/chosen": -6.902583122253418, |
| "logits/rejected": -6.9869842529296875, |
| "logps/chosen": -55.62483596801758, |
| "logps/rejected": -76.63645935058594, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.15657739341259003, |
| "rewards/margins": 2.1922218799591064, |
| "rewards/rejected": -2.035644292831421, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.9723820483314154, |
| "grad_norm": 0.1396484375, |
| "learning_rate": 4.1378851979668424e-05, |
| "logits/chosen": -6.8762526512146, |
| "logits/rejected": -6.9788818359375, |
| "logps/chosen": -56.33964920043945, |
| "logps/rejected": -77.26721954345703, |
| "loss": 0.3256, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14851531386375427, |
| "rewards/margins": 2.206197738647461, |
| "rewards/rejected": -2.057682514190674, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.9742999616417338, |
| "grad_norm": 0.04541015625, |
| "learning_rate": 4.134120544973349e-05, |
| "logits/chosen": -6.7452521324157715, |
| "logits/rejected": -6.859133720397949, |
| "logps/chosen": -52.94233322143555, |
| "logps/rejected": -74.87626647949219, |
| "loss": 0.3254, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1908825784921646, |
| "rewards/margins": 2.20391845703125, |
| "rewards/rejected": -2.0130362510681152, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.9762178749520521, |
| "grad_norm": 0.033203125, |
| "learning_rate": 4.1303474539862165e-05, |
| "logits/chosen": -6.876730442047119, |
| "logits/rejected": -6.973580837249756, |
| "logps/chosen": -58.592201232910156, |
| "logps/rejected": -80.33421325683594, |
| "loss": 0.3254, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.0061996979638934135, |
| "rewards/margins": 2.2044360637664795, |
| "rewards/rejected": -2.2106356620788574, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.9781357882623706, |
| "grad_norm": 0.0400390625, |
| "learning_rate": 4.126565943800272e-05, |
| "logits/chosen": -6.827844142913818, |
| "logits/rejected": -6.945103645324707, |
| "logps/chosen": -51.06730651855469, |
| "logps/rejected": -73.34208679199219, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17692360281944275, |
| "rewards/margins": 2.20219087600708, |
| "rewards/rejected": -2.0252671241760254, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.9800537015726889, |
| "grad_norm": 0.044921875, |
| "learning_rate": 4.12277603325228e-05, |
| "logits/chosen": -6.779177665710449, |
| "logits/rejected": -6.879153251647949, |
| "logps/chosen": -56.09104537963867, |
| "logps/rejected": -77.8788833618164, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1476428508758545, |
| "rewards/margins": 2.2048072814941406, |
| "rewards/rejected": -2.0571646690368652, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.9819716148830073, |
| "grad_norm": 0.056884765625, |
| "learning_rate": 4.1189777412208516e-05, |
| "logits/chosen": -6.883273124694824, |
| "logits/rejected": -6.9658308029174805, |
| "logps/chosen": -53.4930534362793, |
| "logps/rejected": -74.64016723632812, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12911777198314667, |
| "rewards/margins": 2.201110363006592, |
| "rewards/rejected": -2.0719926357269287, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.9838895281933256, |
| "grad_norm": 0.029296875, |
| "learning_rate": 4.1151710866263457e-05, |
| "logits/chosen": -6.886050224304199, |
| "logits/rejected": -6.961801052093506, |
| "logps/chosen": -54.818687438964844, |
| "logps/rejected": -76.78516387939453, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.06988323479890823, |
| "rewards/margins": 2.1951420307159424, |
| "rewards/rejected": -2.125258684158325, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.9858074415036441, |
| "grad_norm": 0.03271484375, |
| "learning_rate": 4.11135608843078e-05, |
| "logits/chosen": -6.865797996520996, |
| "logits/rejected": -6.954972267150879, |
| "logps/chosen": -55.82950973510742, |
| "logps/rejected": -76.11198425292969, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16017884016036987, |
| "rewards/margins": 2.1961967945098877, |
| "rewards/rejected": -2.036017894744873, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.9877253548139624, |
| "grad_norm": 0.038818359375, |
| "learning_rate": 4.107532765637733e-05, |
| "logits/chosen": -6.876706600189209, |
| "logits/rejected": -6.931038856506348, |
| "logps/chosen": -56.75634765625, |
| "logps/rejected": -77.3016128540039, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18982794880867004, |
| "rewards/margins": 2.195154905319214, |
| "rewards/rejected": -2.005326747894287, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.9896432681242808, |
| "grad_norm": 0.031982421875, |
| "learning_rate": 4.103701137292248e-05, |
| "logits/chosen": -6.9785475730896, |
| "logits/rejected": -7.048098564147949, |
| "logps/chosen": -58.41059112548828, |
| "logps/rejected": -78.7070083618164, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11143723875284195, |
| "rewards/margins": 2.19964861869812, |
| "rewards/rejected": -2.0882115364074707, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.9915611814345991, |
| "grad_norm": 0.091796875, |
| "learning_rate": 4.099861222480745e-05, |
| "logits/chosen": -6.864126682281494, |
| "logits/rejected": -6.916762351989746, |
| "logps/chosen": -53.8144645690918, |
| "logps/rejected": -74.3333740234375, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16026651859283447, |
| "rewards/margins": 2.200939655303955, |
| "rewards/rejected": -2.040672779083252, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.9934790947449176, |
| "grad_norm": 0.052001953125, |
| "learning_rate": 4.096013040330918e-05, |
| "logits/chosen": -6.825551509857178, |
| "logits/rejected": -6.919713020324707, |
| "logps/chosen": -55.239952087402344, |
| "logps/rejected": -76.48191833496094, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1743491291999817, |
| "rewards/margins": 2.1970267295837402, |
| "rewards/rejected": -2.0226778984069824, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.9953970080552359, |
| "grad_norm": 0.052978515625, |
| "learning_rate": 4.092156610011644e-05, |
| "logits/chosen": -6.889337062835693, |
| "logits/rejected": -6.97268009185791, |
| "logps/chosen": -53.6662712097168, |
| "logps/rejected": -75.61515045166016, |
| "loss": 0.3253, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.15172195434570312, |
| "rewards/margins": 2.1960272789001465, |
| "rewards/rejected": -2.0443053245544434, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.9973149213655543, |
| "grad_norm": 0.0400390625, |
| "learning_rate": 4.0882919507328866e-05, |
| "logits/chosen": -6.899672031402588, |
| "logits/rejected": -6.974337100982666, |
| "logps/chosen": -55.918304443359375, |
| "logps/rejected": -78.09788513183594, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12672047317028046, |
| "rewards/margins": 2.198251485824585, |
| "rewards/rejected": -2.071531295776367, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.9992328346758726, |
| "grad_norm": 0.0308837890625, |
| "learning_rate": 4.084419081745601e-05, |
| "logits/chosen": -6.8632493019104, |
| "logits/rejected": -6.958096981048584, |
| "logps/chosen": -55.341552734375, |
| "logps/rejected": -77.59717559814453, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1237340122461319, |
| "rewards/margins": 2.197999954223633, |
| "rewards/rejected": -2.074265956878662, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.9996164173379364, |
| "eval_logits/chosen": -6.729816436767578, |
| "eval_logits/rejected": -7.007940292358398, |
| "eval_logps/chosen": -54.07815933227539, |
| "eval_logps/rejected": -76.93055725097656, |
| "eval_loss": 0.3251609802246094, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 0.13746821880340576, |
| "eval_rewards/margins": 2.2012293338775635, |
| "eval_rewards/rejected": -2.063760995864868, |
| "eval_runtime": 5.2222, |
| "eval_samples_per_second": 38.298, |
| "eval_steps_per_second": 38.298, |
| "step": 2606 |
| }, |
| { |
| "epoch": 1.001150747986191, |
| "grad_norm": 0.03466796875, |
| "learning_rate": 4.0805380223416344e-05, |
| "logits/chosen": -6.883546352386475, |
| "logits/rejected": -6.957623481750488, |
| "logps/chosen": -53.838043212890625, |
| "logps/rejected": -74.10066986083984, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.22372904419898987, |
| "rewards/margins": 2.1984477043151855, |
| "rewards/rejected": -1.974718689918518, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.0030686612965094, |
| "grad_norm": 0.041259765625, |
| "learning_rate": 4.076648791853635e-05, |
| "logits/chosen": -6.892508506774902, |
| "logits/rejected": -6.995348930358887, |
| "logps/chosen": -56.61248016357422, |
| "logps/rejected": -78.36751556396484, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10622930526733398, |
| "rewards/margins": 2.1974518299102783, |
| "rewards/rejected": -2.0912225246429443, |
| "step": 2615 |
| }, |
| { |
| "epoch": 1.0049865746068278, |
| "grad_norm": 0.034912109375, |
| "learning_rate": 4.072751409654955e-05, |
| "logits/chosen": -6.887833595275879, |
| "logits/rejected": -6.990255832672119, |
| "logps/chosen": -53.994590759277344, |
| "logps/rejected": -75.97149658203125, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17344704270362854, |
| "rewards/margins": 2.1953394412994385, |
| "rewards/rejected": -2.0218923091888428, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.006904487917146, |
| "grad_norm": 0.02197265625, |
| "learning_rate": 4.068845895159548e-05, |
| "logits/chosen": -6.899606227874756, |
| "logits/rejected": -7.0003509521484375, |
| "logps/chosen": -58.028724670410156, |
| "logps/rejected": -78.30329895019531, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08123298734426498, |
| "rewards/margins": 2.1976544857025146, |
| "rewards/rejected": -2.116421699523926, |
| "step": 2625 |
| }, |
| { |
| "epoch": 1.0088224012274645, |
| "grad_norm": 0.03662109375, |
| "learning_rate": 4.0649322678218804e-05, |
| "logits/chosen": -6.895656585693359, |
| "logits/rejected": -6.965889930725098, |
| "logps/chosen": -54.9753532409668, |
| "logps/rejected": -75.52403259277344, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1174411028623581, |
| "rewards/margins": 2.196857452392578, |
| "rewards/rejected": -2.079416036605835, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.010740314537783, |
| "grad_norm": 0.034423828125, |
| "learning_rate": 4.0610105471368305e-05, |
| "logits/chosen": -6.923454284667969, |
| "logits/rejected": -6.931260108947754, |
| "logps/chosen": -55.9629020690918, |
| "logps/rejected": -76.51974487304688, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.2325584888458252, |
| "rewards/margins": 2.194713592529297, |
| "rewards/rejected": -1.9621549844741821, |
| "step": 2635 |
| }, |
| { |
| "epoch": 1.0126582278481013, |
| "grad_norm": 0.0263671875, |
| "learning_rate": 4.057080752639591e-05, |
| "logits/chosen": -6.872107028961182, |
| "logits/rejected": -6.9589643478393555, |
| "logps/chosen": -54.76153564453125, |
| "logps/rejected": -75.4432373046875, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10465948283672333, |
| "rewards/margins": 2.196145534515381, |
| "rewards/rejected": -2.0914859771728516, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.0145761411584195, |
| "grad_norm": 0.0308837890625, |
| "learning_rate": 4.053142903905573e-05, |
| "logits/chosen": -6.88769006729126, |
| "logits/rejected": -6.998019218444824, |
| "logps/chosen": -55.536720275878906, |
| "logps/rejected": -77.26806640625, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11749310791492462, |
| "rewards/margins": 2.1989588737487793, |
| "rewards/rejected": -2.081465482711792, |
| "step": 2645 |
| }, |
| { |
| "epoch": 1.016494054468738, |
| "grad_norm": 0.029541015625, |
| "learning_rate": 4.0491970205503084e-05, |
| "logits/chosen": -6.833132266998291, |
| "logits/rejected": -6.9340500831604, |
| "logps/chosen": -54.23346710205078, |
| "logps/rejected": -74.97486877441406, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.15681582689285278, |
| "rewards/margins": 2.198915958404541, |
| "rewards/rejected": -2.042099952697754, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.0184119677790564, |
| "grad_norm": 0.025390625, |
| "learning_rate": 4.04524312222935e-05, |
| "logits/chosen": -6.784567356109619, |
| "logits/rejected": -6.9042158126831055, |
| "logps/chosen": -56.043975830078125, |
| "logps/rejected": -77.23291015625, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.19256003201007843, |
| "rewards/margins": 2.1978323459625244, |
| "rewards/rejected": -2.005272150039673, |
| "step": 2655 |
| }, |
| { |
| "epoch": 1.0203298810893748, |
| "grad_norm": 0.0283203125, |
| "learning_rate": 4.041281228638177e-05, |
| "logits/chosen": -6.854004859924316, |
| "logits/rejected": -6.9370012283325195, |
| "logps/chosen": -54.6614875793457, |
| "logps/rejected": -76.17295837402344, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12523391842842102, |
| "rewards/margins": 2.196631908416748, |
| "rewards/rejected": -2.0713982582092285, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.022247794399693, |
| "grad_norm": 0.0196533203125, |
| "learning_rate": 4.037311359512096e-05, |
| "logits/chosen": -6.878549098968506, |
| "logits/rejected": -6.92901086807251, |
| "logps/chosen": -55.35753631591797, |
| "logps/rejected": -75.7037582397461, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18637792766094208, |
| "rewards/margins": 2.1981968879699707, |
| "rewards/rejected": -2.011819362640381, |
| "step": 2665 |
| }, |
| { |
| "epoch": 1.0241657077100115, |
| "grad_norm": 0.037841796875, |
| "learning_rate": 4.0333335346261415e-05, |
| "logits/chosen": -6.915754795074463, |
| "logits/rejected": -6.975900173187256, |
| "logps/chosen": -58.16853713989258, |
| "logps/rejected": -79.80914306640625, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.06968244165182114, |
| "rewards/margins": 2.1984660625457764, |
| "rewards/rejected": -2.128783702850342, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.02608362102033, |
| "grad_norm": 0.0250244140625, |
| "learning_rate": 4.029347773794975e-05, |
| "logits/chosen": -6.917555332183838, |
| "logits/rejected": -6.966570854187012, |
| "logps/chosen": -58.62860107421875, |
| "logps/rejected": -78.85334777832031, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.09619802236557007, |
| "rewards/margins": 2.1974024772644043, |
| "rewards/rejected": -2.1012043952941895, |
| "step": 2675 |
| }, |
| { |
| "epoch": 1.0280015343306483, |
| "grad_norm": 0.030517578125, |
| "learning_rate": 4.025354096872794e-05, |
| "logits/chosen": -6.830000877380371, |
| "logits/rejected": -6.904738426208496, |
| "logps/chosen": -56.95014572143555, |
| "logps/rejected": -77.70689392089844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.13039973378181458, |
| "rewards/margins": 2.1978321075439453, |
| "rewards/rejected": -2.067432403564453, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.0299194476409665, |
| "grad_norm": 0.025146484375, |
| "learning_rate": 4.0213525237532235e-05, |
| "logits/chosen": -6.799073219299316, |
| "logits/rejected": -6.8913750648498535, |
| "logps/chosen": -55.946311950683594, |
| "logps/rejected": -78.06874084472656, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11492273956537247, |
| "rewards/margins": 2.1941425800323486, |
| "rewards/rejected": -2.0792198181152344, |
| "step": 2685 |
| }, |
| { |
| "epoch": 1.031837360951285, |
| "grad_norm": 0.02001953125, |
| "learning_rate": 4.017343074369226e-05, |
| "logits/chosen": -6.870763301849365, |
| "logits/rejected": -6.954649448394775, |
| "logps/chosen": -53.58631134033203, |
| "logps/rejected": -74.53648376464844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20419125258922577, |
| "rewards/margins": 2.1963956356048584, |
| "rewards/rejected": -1.9922046661376953, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.0337552742616034, |
| "grad_norm": 0.02587890625, |
| "learning_rate": 4.0133257686929944e-05, |
| "logits/chosen": -6.9005842208862305, |
| "logits/rejected": -6.95761251449585, |
| "logps/chosen": -55.439735412597656, |
| "logps/rejected": -74.86544036865234, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.24408455193042755, |
| "rewards/margins": 2.200181245803833, |
| "rewards/rejected": -1.9560966491699219, |
| "step": 2695 |
| }, |
| { |
| "epoch": 1.0356731875719218, |
| "grad_norm": 0.02880859375, |
| "learning_rate": 4.009300626735859e-05, |
| "logits/chosen": -6.9174299240112305, |
| "logits/rejected": -6.982752323150635, |
| "logps/chosen": -57.56931686401367, |
| "logps/rejected": -78.64064025878906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07381283491849899, |
| "rewards/margins": 2.198716640472412, |
| "rewards/rejected": -2.124903440475464, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.03759110088224, |
| "grad_norm": 0.02978515625, |
| "learning_rate": 4.0052676685481814e-05, |
| "logits/chosen": -6.861070156097412, |
| "logits/rejected": -6.959973335266113, |
| "logps/chosen": -53.57646560668945, |
| "logps/rejected": -74.02730560302734, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18773815035820007, |
| "rewards/margins": 2.191598892211914, |
| "rewards/rejected": -2.0038609504699707, |
| "step": 2705 |
| }, |
| { |
| "epoch": 1.0395090141925585, |
| "grad_norm": 0.0302734375, |
| "learning_rate": 4.001226914219261e-05, |
| "logits/chosen": -6.911322593688965, |
| "logits/rejected": -7.01156759262085, |
| "logps/chosen": -53.097312927246094, |
| "logps/rejected": -73.89768981933594, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16086049377918243, |
| "rewards/margins": 2.1958365440368652, |
| "rewards/rejected": -2.0349762439727783, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.041426927502877, |
| "grad_norm": 0.02587890625, |
| "learning_rate": 3.997178383877231e-05, |
| "logits/chosen": -6.8495354652404785, |
| "logits/rejected": -6.967142581939697, |
| "logps/chosen": -56.043922424316406, |
| "logps/rejected": -78.30987548828125, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.15614651143550873, |
| "rewards/margins": 2.1998848915100098, |
| "rewards/rejected": -2.04373836517334, |
| "step": 2715 |
| }, |
| { |
| "epoch": 1.0433448408131953, |
| "grad_norm": 0.0286865234375, |
| "learning_rate": 3.993122097688959e-05, |
| "logits/chosen": -6.892895698547363, |
| "logits/rejected": -6.953802585601807, |
| "logps/chosen": -57.95183181762695, |
| "logps/rejected": -78.77163696289062, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.04726942256093025, |
| "rewards/margins": 2.1945252418518066, |
| "rewards/rejected": -2.1472554206848145, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.0452627541235135, |
| "grad_norm": 0.0260009765625, |
| "learning_rate": 3.989058075859947e-05, |
| "logits/chosen": -6.83001184463501, |
| "logits/rejected": -6.906769752502441, |
| "logps/chosen": -56.60731887817383, |
| "logps/rejected": -76.92979431152344, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11525243520736694, |
| "rewards/margins": 2.1981358528137207, |
| "rewards/rejected": -2.082883596420288, |
| "step": 2725 |
| }, |
| { |
| "epoch": 1.047180667433832, |
| "grad_norm": 0.031494140625, |
| "learning_rate": 3.984986338634229e-05, |
| "logits/chosen": -6.8577399253845215, |
| "logits/rejected": -6.957972526550293, |
| "logps/chosen": -55.03181076049805, |
| "logps/rejected": -75.6190414428711, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16855183243751526, |
| "rewards/margins": 2.197056770324707, |
| "rewards/rejected": -2.0285048484802246, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.0490985807441504, |
| "grad_norm": 0.0225830078125, |
| "learning_rate": 3.980906906294273e-05, |
| "logits/chosen": -6.934423923492432, |
| "logits/rejected": -7.023044586181641, |
| "logps/chosen": -54.798828125, |
| "logps/rejected": -76.52142333984375, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1898016333580017, |
| "rewards/margins": 2.1955370903015137, |
| "rewards/rejected": -2.005735397338867, |
| "step": 2735 |
| }, |
| { |
| "epoch": 1.0510164940544688, |
| "grad_norm": 0.027099609375, |
| "learning_rate": 3.976819799160879e-05, |
| "logits/chosen": -6.8770341873168945, |
| "logits/rejected": -6.964162349700928, |
| "logps/chosen": -54.404808044433594, |
| "logps/rejected": -75.58030700683594, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17514730989933014, |
| "rewards/margins": 2.197451114654541, |
| "rewards/rejected": -2.022303819656372, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.052934407364787, |
| "grad_norm": 0.0301513671875, |
| "learning_rate": 3.9727250375930744e-05, |
| "logits/chosen": -6.848829746246338, |
| "logits/rejected": -6.935307502746582, |
| "logps/chosen": -55.02604293823242, |
| "logps/rejected": -76.05598449707031, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1925317794084549, |
| "rewards/margins": 2.198523998260498, |
| "rewards/rejected": -2.0059921741485596, |
| "step": 2745 |
| }, |
| { |
| "epoch": 1.0548523206751055, |
| "grad_norm": 0.0277099609375, |
| "learning_rate": 3.96862264198802e-05, |
| "logits/chosen": -6.894994258880615, |
| "logits/rejected": -6.986319065093994, |
| "logps/chosen": -57.370811462402344, |
| "logps/rejected": -79.54820251464844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.026093292981386185, |
| "rewards/margins": 2.2008121013641357, |
| "rewards/rejected": -2.1747188568115234, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.056770233985424, |
| "grad_norm": 0.0230712890625, |
| "learning_rate": 3.9645126327808997e-05, |
| "logits/chosen": -6.829963684082031, |
| "logits/rejected": -6.898384094238281, |
| "logps/chosen": -50.599239349365234, |
| "logps/rejected": -71.79393768310547, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.26723015308380127, |
| "rewards/margins": 2.201087713241577, |
| "rewards/rejected": -1.9338576793670654, |
| "step": 2755 |
| }, |
| { |
| "epoch": 1.0586881472957423, |
| "grad_norm": 0.0302734375, |
| "learning_rate": 3.960395030444826e-05, |
| "logits/chosen": -6.888216972351074, |
| "logits/rejected": -6.947674751281738, |
| "logps/chosen": -55.1099739074707, |
| "logps/rejected": -76.78738403320312, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.09314815700054169, |
| "rewards/margins": 2.196256399154663, |
| "rewards/rejected": -2.1031081676483154, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.0606060606060606, |
| "grad_norm": 0.027099609375, |
| "learning_rate": 3.9562698554907324e-05, |
| "logits/chosen": -6.857504367828369, |
| "logits/rejected": -6.95501184463501, |
| "logps/chosen": -51.588653564453125, |
| "logps/rejected": -73.61457061767578, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.19752810895442963, |
| "rewards/margins": 2.198362350463867, |
| "rewards/rejected": -2.0008342266082764, |
| "step": 2765 |
| }, |
| { |
| "epoch": 1.062523973916379, |
| "grad_norm": 0.0145263671875, |
| "learning_rate": 3.952137128467275e-05, |
| "logits/chosen": -6.8695220947265625, |
| "logits/rejected": -6.959606170654297, |
| "logps/chosen": -56.88529586791992, |
| "logps/rejected": -78.67168426513672, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.061385512351989746, |
| "rewards/margins": 2.1960301399230957, |
| "rewards/rejected": -2.1346447467803955, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.0644418872266974, |
| "grad_norm": 0.021728515625, |
| "learning_rate": 3.947996869960729e-05, |
| "logits/chosen": -6.845946311950684, |
| "logits/rejected": -6.92853307723999, |
| "logps/chosen": -52.496307373046875, |
| "logps/rejected": -75.715576171875, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17457488179206848, |
| "rewards/margins": 2.196068286895752, |
| "rewards/rejected": -2.021493434906006, |
| "step": 2775 |
| }, |
| { |
| "epoch": 1.0663598005370156, |
| "grad_norm": 0.024658203125, |
| "learning_rate": 3.943849100594886e-05, |
| "logits/chosen": -6.955491542816162, |
| "logits/rejected": -7.005650997161865, |
| "logps/chosen": -56.628509521484375, |
| "logps/rejected": -78.13602447509766, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.04586848244071007, |
| "rewards/margins": 2.200742244720459, |
| "rewards/rejected": -2.1548736095428467, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.068277713847334, |
| "grad_norm": 0.017578125, |
| "learning_rate": 3.9396938410309515e-05, |
| "logits/chosen": -6.926751136779785, |
| "logits/rejected": -6.99490213394165, |
| "logps/chosen": -57.37287139892578, |
| "logps/rejected": -79.40666198730469, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.052088893949985504, |
| "rewards/margins": 2.1968274116516113, |
| "rewards/rejected": -2.14473819732666, |
| "step": 2785 |
| }, |
| { |
| "epoch": 1.0701956271576525, |
| "grad_norm": 0.024658203125, |
| "learning_rate": 3.9355311119674394e-05, |
| "logits/chosen": -6.852726936340332, |
| "logits/rejected": -6.912999629974365, |
| "logps/chosen": -55.161170959472656, |
| "logps/rejected": -77.08748626708984, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1337597668170929, |
| "rewards/margins": 2.1965129375457764, |
| "rewards/rejected": -2.062753200531006, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.072113540467971, |
| "grad_norm": 0.020263671875, |
| "learning_rate": 3.9313609341400757e-05, |
| "logits/chosen": -6.858818054199219, |
| "logits/rejected": -6.979428291320801, |
| "logps/chosen": -53.97002410888672, |
| "logps/rejected": -76.10984802246094, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.22703592479228973, |
| "rewards/margins": 2.198535919189453, |
| "rewards/rejected": -1.9714998006820679, |
| "step": 2795 |
| }, |
| { |
| "epoch": 1.0740314537782891, |
| "grad_norm": 0.0264892578125, |
| "learning_rate": 3.9271833283216866e-05, |
| "logits/chosen": -6.864724159240723, |
| "logits/rejected": -6.9316606521606445, |
| "logps/chosen": -55.78950881958008, |
| "logps/rejected": -78.62726593017578, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14562520384788513, |
| "rewards/margins": 2.197704315185547, |
| "rewards/rejected": -2.0520787239074707, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.0759493670886076, |
| "grad_norm": 0.031494140625, |
| "learning_rate": 3.922998315322101e-05, |
| "logits/chosen": -6.889589786529541, |
| "logits/rejected": -6.990285396575928, |
| "logps/chosen": -52.906951904296875, |
| "logps/rejected": -73.70419311523438, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.26940232515335083, |
| "rewards/margins": 2.2004714012145996, |
| "rewards/rejected": -1.9310691356658936, |
| "step": 2805 |
| }, |
| { |
| "epoch": 1.077867280398926, |
| "grad_norm": 0.0196533203125, |
| "learning_rate": 3.9188059159880463e-05, |
| "logits/chosen": -6.812591552734375, |
| "logits/rejected": -6.900485038757324, |
| "logps/chosen": -59.0921745300293, |
| "logps/rejected": -80.58056640625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08650780469179153, |
| "rewards/margins": 2.1971893310546875, |
| "rewards/rejected": -2.1106815338134766, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.0797851937092444, |
| "grad_norm": 0.02197265625, |
| "learning_rate": 3.9146061512030415e-05, |
| "logits/chosen": -6.838484764099121, |
| "logits/rejected": -6.911144256591797, |
| "logps/chosen": -56.846275329589844, |
| "logps/rejected": -79.10497283935547, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.053564321249723434, |
| "rewards/margins": 2.197330951690674, |
| "rewards/rejected": -2.1437668800354004, |
| "step": 2815 |
| }, |
| { |
| "epoch": 1.0817031070195626, |
| "grad_norm": 0.0269775390625, |
| "learning_rate": 3.910399041887295e-05, |
| "logits/chosen": -6.862179756164551, |
| "logits/rejected": -6.935473442077637, |
| "logps/chosen": -53.73378372192383, |
| "logps/rejected": -75.07929229736328, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.15086905658245087, |
| "rewards/margins": 2.197706699371338, |
| "rewards/rejected": -2.046837568283081, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.083621020329881, |
| "grad_norm": 0.017578125, |
| "learning_rate": 3.9061846089976005e-05, |
| "logits/chosen": -6.84485387802124, |
| "logits/rejected": -6.956694602966309, |
| "logps/chosen": -56.87104034423828, |
| "logps/rejected": -77.37132263183594, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12146338075399399, |
| "rewards/margins": 2.1992440223693848, |
| "rewards/rejected": -2.0777807235717773, |
| "step": 2825 |
| }, |
| { |
| "epoch": 1.0855389336401995, |
| "grad_norm": 0.0159912109375, |
| "learning_rate": 3.901962873527233e-05, |
| "logits/chosen": -6.868411064147949, |
| "logits/rejected": -6.946302890777588, |
| "logps/chosen": -54.27248001098633, |
| "logps/rejected": -76.27902221679688, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18422341346740723, |
| "rewards/margins": 2.1939854621887207, |
| "rewards/rejected": -2.0097622871398926, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.087456846950518, |
| "grad_norm": 0.024658203125, |
| "learning_rate": 3.897733856505845e-05, |
| "logits/chosen": -6.850857734680176, |
| "logits/rejected": -6.960956573486328, |
| "logps/chosen": -57.13166046142578, |
| "logps/rejected": -78.78927612304688, |
| "loss": 0.3252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.019278164952993393, |
| "rewards/margins": 2.1967427730560303, |
| "rewards/rejected": -2.1774649620056152, |
| "step": 2835 |
| }, |
| { |
| "epoch": 1.0893747602608361, |
| "grad_norm": 0.024169921875, |
| "learning_rate": 3.893497578999357e-05, |
| "logits/chosen": -6.8516998291015625, |
| "logits/rejected": -6.964435577392578, |
| "logps/chosen": -56.83503341674805, |
| "logps/rejected": -77.79374694824219, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.13902735710144043, |
| "rewards/margins": 2.1968703269958496, |
| "rewards/rejected": -2.05784273147583, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.0912926735711546, |
| "grad_norm": 0.0224609375, |
| "learning_rate": 3.8892540621098594e-05, |
| "logits/chosen": -6.85550594329834, |
| "logits/rejected": -6.9149274826049805, |
| "logps/chosen": -58.30366134643555, |
| "logps/rejected": -79.84410095214844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.04130606725811958, |
| "rewards/margins": 2.1934211254119873, |
| "rewards/rejected": -2.1521153450012207, |
| "step": 2845 |
| }, |
| { |
| "epoch": 1.093210586881473, |
| "grad_norm": 0.0220947265625, |
| "learning_rate": 3.885003326975502e-05, |
| "logits/chosen": -6.817744255065918, |
| "logits/rejected": -6.904561519622803, |
| "logps/chosen": -51.51453399658203, |
| "logps/rejected": -73.3747329711914, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.24189969897270203, |
| "rewards/margins": 2.1980557441711426, |
| "rewards/rejected": -1.9561560153961182, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.0951285001917914, |
| "grad_norm": 0.02490234375, |
| "learning_rate": 3.880745394770392e-05, |
| "logits/chosen": -6.877533912658691, |
| "logits/rejected": -6.920339107513428, |
| "logps/chosen": -59.07216262817383, |
| "logps/rejected": -80.83231353759766, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.059574197977781296, |
| "rewards/margins": 2.1979880332946777, |
| "rewards/rejected": -2.138413429260254, |
| "step": 2855 |
| }, |
| { |
| "epoch": 1.0970464135021096, |
| "grad_norm": 0.0244140625, |
| "learning_rate": 3.876480286704484e-05, |
| "logits/chosen": -6.807048797607422, |
| "logits/rejected": -6.901270389556885, |
| "logps/chosen": -53.65718460083008, |
| "logps/rejected": -74.63452911376953, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1505444347858429, |
| "rewards/margins": 2.1971020698547363, |
| "rewards/rejected": -2.0465571880340576, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.098964326812428, |
| "grad_norm": 0.0234375, |
| "learning_rate": 3.872208024023484e-05, |
| "logits/chosen": -6.927255153656006, |
| "logits/rejected": -6.956273555755615, |
| "logps/chosen": -53.692848205566406, |
| "logps/rejected": -76.09283447265625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.15065941214561462, |
| "rewards/margins": 2.201733112335205, |
| "rewards/rejected": -2.0510735511779785, |
| "step": 2865 |
| }, |
| { |
| "epoch": 1.1008822401227465, |
| "grad_norm": 0.035888671875, |
| "learning_rate": 3.867928628008731e-05, |
| "logits/chosen": -6.816448211669922, |
| "logits/rejected": -6.911751747131348, |
| "logps/chosen": -55.2706184387207, |
| "logps/rejected": -76.91694641113281, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16313551366329193, |
| "rewards/margins": 2.1990082263946533, |
| "rewards/rejected": -2.035872459411621, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.102800153433065, |
| "grad_norm": 0.0303955078125, |
| "learning_rate": 3.8636421199771e-05, |
| "logits/chosen": -6.858266353607178, |
| "logits/rejected": -6.889333248138428, |
| "logps/chosen": -53.993865966796875, |
| "logps/rejected": -75.5336685180664, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1297408640384674, |
| "rewards/margins": 2.1988682746887207, |
| "rewards/rejected": -2.0691275596618652, |
| "step": 2875 |
| }, |
| { |
| "epoch": 1.1047180667433831, |
| "grad_norm": 0.0186767578125, |
| "learning_rate": 3.859348521280892e-05, |
| "logits/chosen": -6.896686553955078, |
| "logits/rejected": -6.956762790679932, |
| "logps/chosen": -53.74113082885742, |
| "logps/rejected": -75.0785903930664, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1880251169204712, |
| "rewards/margins": 2.1969103813171387, |
| "rewards/rejected": -2.008885622024536, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.1066359800537016, |
| "grad_norm": 0.0196533203125, |
| "learning_rate": 3.855047853307728e-05, |
| "logits/chosen": -6.921117305755615, |
| "logits/rejected": -7.01132345199585, |
| "logps/chosen": -54.68280792236328, |
| "logps/rejected": -76.28700256347656, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14484816789627075, |
| "rewards/margins": 2.1998002529144287, |
| "rewards/rejected": -2.0549521446228027, |
| "step": 2885 |
| }, |
| { |
| "epoch": 1.10855389336402, |
| "grad_norm": 0.018798828125, |
| "learning_rate": 3.850740137480447e-05, |
| "logits/chosen": -6.905709743499756, |
| "logits/rejected": -6.965888977050781, |
| "logps/chosen": -58.196983337402344, |
| "logps/rejected": -79.59626770019531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11491873115301132, |
| "rewards/margins": 2.1975598335266113, |
| "rewards/rejected": -2.082641363143921, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.1104718066743384, |
| "grad_norm": 0.0172119140625, |
| "learning_rate": 3.846425395256989e-05, |
| "logits/chosen": -6.784145355224609, |
| "logits/rejected": -6.9189653396606445, |
| "logps/chosen": -52.5921516418457, |
| "logps/rejected": -74.90009307861328, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18322737514972687, |
| "rewards/margins": 2.1953601837158203, |
| "rewards/rejected": -2.0121326446533203, |
| "step": 2895 |
| }, |
| { |
| "epoch": 1.1123897199846566, |
| "grad_norm": 0.0184326171875, |
| "learning_rate": 3.8421036481303e-05, |
| "logits/chosen": -6.781815528869629, |
| "logits/rejected": -6.881045341491699, |
| "logps/chosen": -53.2947998046875, |
| "logps/rejected": -74.60124206542969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14246733486652374, |
| "rewards/margins": 2.1933109760284424, |
| "rewards/rejected": -2.0508437156677246, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.114307633294975, |
| "grad_norm": 0.0205078125, |
| "learning_rate": 3.837774917628218e-05, |
| "logits/chosen": -6.8118181228637695, |
| "logits/rejected": -6.875295162200928, |
| "logps/chosen": -54.65224075317383, |
| "logps/rejected": -75.99118041992188, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14632579684257507, |
| "rewards/margins": 2.1963367462158203, |
| "rewards/rejected": -2.050011157989502, |
| "step": 2905 |
| }, |
| { |
| "epoch": 1.1162255466052935, |
| "grad_norm": 0.0206298828125, |
| "learning_rate": 3.833439225313362e-05, |
| "logits/chosen": -6.832925319671631, |
| "logits/rejected": -6.927382469177246, |
| "logps/chosen": -54.1140251159668, |
| "logps/rejected": -75.5179214477539, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12394438683986664, |
| "rewards/margins": 2.195122480392456, |
| "rewards/rejected": -2.0711779594421387, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.1181434599156117, |
| "grad_norm": 0.0250244140625, |
| "learning_rate": 3.829096592783039e-05, |
| "logits/chosen": -6.910910129547119, |
| "logits/rejected": -6.9941840171813965, |
| "logps/chosen": -54.79912185668945, |
| "logps/rejected": -77.29903411865234, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10858787596225739, |
| "rewards/margins": 2.1974599361419678, |
| "rewards/rejected": -2.088872194290161, |
| "step": 2915 |
| }, |
| { |
| "epoch": 1.1200613732259301, |
| "grad_norm": 0.0145263671875, |
| "learning_rate": 3.82474704166912e-05, |
| "logits/chosen": -6.777789115905762, |
| "logits/rejected": -6.893073081970215, |
| "logps/chosen": -57.53221893310547, |
| "logps/rejected": -78.73521423339844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1271776258945465, |
| "rewards/margins": 2.195502758026123, |
| "rewards/rejected": -2.0683252811431885, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.1219792865362486, |
| "grad_norm": 0.0244140625, |
| "learning_rate": 3.8203905936379415e-05, |
| "logits/chosen": -6.841919898986816, |
| "logits/rejected": -6.939420223236084, |
| "logps/chosen": -55.87000274658203, |
| "logps/rejected": -76.84537506103516, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1004914790391922, |
| "rewards/margins": 2.1957545280456543, |
| "rewards/rejected": -2.0952632427215576, |
| "step": 2925 |
| }, |
| { |
| "epoch": 1.123897199846567, |
| "grad_norm": 0.0250244140625, |
| "learning_rate": 3.8160272703901975e-05, |
| "logits/chosen": -6.862138271331787, |
| "logits/rejected": -6.949643135070801, |
| "logps/chosen": -57.05035400390625, |
| "logps/rejected": -77.7467041015625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.13071158528327942, |
| "rewards/margins": 2.197261333465576, |
| "rewards/rejected": -2.06654953956604, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.1258151131568854, |
| "grad_norm": 0.0263671875, |
| "learning_rate": 3.8116570936608245e-05, |
| "logits/chosen": -6.812536716461182, |
| "logits/rejected": -6.889588832855225, |
| "logps/chosen": -58.20977783203125, |
| "logps/rejected": -79.45494079589844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08483082056045532, |
| "rewards/margins": 2.194852352142334, |
| "rewards/rejected": -2.1100213527679443, |
| "step": 2935 |
| }, |
| { |
| "epoch": 1.1277330264672036, |
| "grad_norm": 0.01513671875, |
| "learning_rate": 3.807280085218904e-05, |
| "logits/chosen": -6.816141605377197, |
| "logits/rejected": -6.902308464050293, |
| "logps/chosen": -53.630462646484375, |
| "logps/rejected": -75.4277114868164, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20424386858940125, |
| "rewards/margins": 2.1985929012298584, |
| "rewards/rejected": -1.9943492412567139, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.129650939777522, |
| "grad_norm": 0.0208740234375, |
| "learning_rate": 3.8028962668675436e-05, |
| "logits/chosen": -6.88754940032959, |
| "logits/rejected": -6.952759742736816, |
| "logps/chosen": -55.587440490722656, |
| "logps/rejected": -77.20613098144531, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1409328579902649, |
| "rewards/margins": 2.198951005935669, |
| "rewards/rejected": -2.058018207550049, |
| "step": 2945 |
| }, |
| { |
| "epoch": 1.1315688530878405, |
| "grad_norm": 0.03466796875, |
| "learning_rate": 3.7985056604437754e-05, |
| "logits/chosen": -6.822312355041504, |
| "logits/rejected": -6.915589332580566, |
| "logps/chosen": -56.829490661621094, |
| "logps/rejected": -77.65740966796875, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11516109853982925, |
| "rewards/margins": 2.199615240097046, |
| "rewards/rejected": -2.084454298019409, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.1334867663981587, |
| "grad_norm": 0.023193359375, |
| "learning_rate": 3.794108287818444e-05, |
| "logits/chosen": -6.859801292419434, |
| "logits/rejected": -6.943611145019531, |
| "logps/chosen": -53.40679931640625, |
| "logps/rejected": -73.86637878417969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18870747089385986, |
| "rewards/margins": 2.196364164352417, |
| "rewards/rejected": -2.0076565742492676, |
| "step": 2955 |
| }, |
| { |
| "epoch": 1.1354046797084771, |
| "grad_norm": 0.0177001953125, |
| "learning_rate": 3.7897041708961e-05, |
| "logits/chosen": -6.775750637054443, |
| "logits/rejected": -6.829716682434082, |
| "logps/chosen": -56.192054748535156, |
| "logps/rejected": -77.07792663574219, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16598518192768097, |
| "rewards/margins": 2.196262836456299, |
| "rewards/rejected": -2.0302774906158447, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.1373225930187956, |
| "grad_norm": 0.014892578125, |
| "learning_rate": 3.785293331614884e-05, |
| "logits/chosen": -6.888825416564941, |
| "logits/rejected": -6.977200984954834, |
| "logps/chosen": -58.45457077026367, |
| "logps/rejected": -80.14210510253906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.06244196370244026, |
| "rewards/margins": 2.196946620941162, |
| "rewards/rejected": -2.134504795074463, |
| "step": 2965 |
| }, |
| { |
| "epoch": 1.139240506329114, |
| "grad_norm": 0.01806640625, |
| "learning_rate": 3.78087579194643e-05, |
| "logits/chosen": -6.927948951721191, |
| "logits/rejected": -6.9935150146484375, |
| "logps/chosen": -55.169342041015625, |
| "logps/rejected": -77.38768005371094, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.09456191211938858, |
| "rewards/margins": 2.1975150108337402, |
| "rewards/rejected": -2.1029531955718994, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.1411584196394322, |
| "grad_norm": 0.0263671875, |
| "learning_rate": 3.7764515738957434e-05, |
| "logits/chosen": -6.886633396148682, |
| "logits/rejected": -6.958956718444824, |
| "logps/chosen": -55.48567581176758, |
| "logps/rejected": -77.4232177734375, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1457061469554901, |
| "rewards/margins": 2.1986184120178223, |
| "rewards/rejected": -2.0529122352600098, |
| "step": 2975 |
| }, |
| { |
| "epoch": 1.1430763329497506, |
| "grad_norm": 0.0185546875, |
| "learning_rate": 3.772020699501098e-05, |
| "logits/chosen": -6.843485355377197, |
| "logits/rejected": -6.9481916427612305, |
| "logps/chosen": -53.60565948486328, |
| "logps/rejected": -75.5847396850586, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.21644556522369385, |
| "rewards/margins": 2.1974103450775146, |
| "rewards/rejected": -1.9809648990631104, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.144994246260069, |
| "grad_norm": 0.0179443359375, |
| "learning_rate": 3.7675831908339234e-05, |
| "logits/chosen": -6.9442949295043945, |
| "logits/rejected": -6.987727165222168, |
| "logps/chosen": -55.760963439941406, |
| "logps/rejected": -76.97257995605469, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08401085436344147, |
| "rewards/margins": 2.1952691078186035, |
| "rewards/rejected": -2.1112582683563232, |
| "step": 2985 |
| }, |
| { |
| "epoch": 1.1469121595703875, |
| "grad_norm": 0.0172119140625, |
| "learning_rate": 3.763139069998699e-05, |
| "logits/chosen": -6.921295166015625, |
| "logits/rejected": -6.971589088439941, |
| "logps/chosen": -57.0744514465332, |
| "logps/rejected": -77.71913146972656, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1222480982542038, |
| "rewards/margins": 2.1965291500091553, |
| "rewards/rejected": -2.0742809772491455, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.1488300728807057, |
| "grad_norm": 0.024658203125, |
| "learning_rate": 3.7586883591328396e-05, |
| "logits/chosen": -6.899412631988525, |
| "logits/rejected": -6.98648738861084, |
| "logps/chosen": -53.3134765625, |
| "logps/rejected": -73.20283508300781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.2066071331501007, |
| "rewards/margins": 2.1967947483062744, |
| "rewards/rejected": -1.9901872873306274, |
| "step": 2995 |
| }, |
| { |
| "epoch": 1.1507479861910241, |
| "grad_norm": 0.017822265625, |
| "learning_rate": 3.7542310804065875e-05, |
| "logits/chosen": -6.8181610107421875, |
| "logits/rejected": -6.9312543869018555, |
| "logps/chosen": -53.06462860107422, |
| "logps/rejected": -74.0704345703125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.2070445716381073, |
| "rewards/margins": 2.197538137435913, |
| "rewards/rejected": -1.9904934167861938, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.1507479861910241, |
| "eval_logits/chosen": -6.733577728271484, |
| "eval_logits/rejected": -7.0103278160095215, |
| "eval_logps/chosen": -54.098121643066406, |
| "eval_logps/rejected": -76.94671630859375, |
| "eval_loss": 0.32510754466056824, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 0.13547228276729584, |
| "eval_rewards/margins": 2.2008495330810547, |
| "eval_rewards/rejected": -2.0653772354125977, |
| "eval_runtime": 5.247, |
| "eval_samples_per_second": 38.117, |
| "eval_steps_per_second": 38.117, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.1526658995013426, |
| "grad_norm": 0.0213623046875, |
| "learning_rate": 3.7497672560229e-05, |
| "logits/chosen": -6.835085391998291, |
| "logits/rejected": -6.877735137939453, |
| "logps/chosen": -56.40270233154297, |
| "logps/rejected": -77.52452087402344, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18680231273174286, |
| "rewards/margins": 2.198249340057373, |
| "rewards/rejected": -2.011446714401245, |
| "step": 3005 |
| }, |
| { |
| "epoch": 1.154583812811661, |
| "grad_norm": 0.013916015625, |
| "learning_rate": 3.745296908217341e-05, |
| "logits/chosen": -6.860623359680176, |
| "logits/rejected": -6.987900733947754, |
| "logps/chosen": -55.96862030029297, |
| "logps/rejected": -76.41857147216797, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16915419697761536, |
| "rewards/margins": 2.1973133087158203, |
| "rewards/rejected": -2.0281593799591064, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.1565017261219792, |
| "grad_norm": 0.0205078125, |
| "learning_rate": 3.74082005925797e-05, |
| "logits/chosen": -6.845609188079834, |
| "logits/rejected": -6.985723972320557, |
| "logps/chosen": -49.98345947265625, |
| "logps/rejected": -71.40119934082031, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.28592175245285034, |
| "rewards/margins": 2.197575330734253, |
| "rewards/rejected": -1.911653757095337, |
| "step": 3015 |
| }, |
| { |
| "epoch": 1.1584196394322976, |
| "grad_norm": 0.0223388671875, |
| "learning_rate": 3.7363367314452307e-05, |
| "logits/chosen": -6.875467777252197, |
| "logits/rejected": -6.945004463195801, |
| "logps/chosen": -54.1942024230957, |
| "logps/rejected": -76.0034408569336, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11485148966312408, |
| "rewards/margins": 2.1981101036071777, |
| "rewards/rejected": -2.083258867263794, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.160337552742616, |
| "grad_norm": 0.019287109375, |
| "learning_rate": 3.731846947111837e-05, |
| "logits/chosen": -6.876239776611328, |
| "logits/rejected": -6.9427642822265625, |
| "logps/chosen": -58.8173828125, |
| "logps/rejected": -80.17155456542969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1200895681977272, |
| "rewards/margins": 2.1984190940856934, |
| "rewards/rejected": -2.07832932472229, |
| "step": 3025 |
| }, |
| { |
| "epoch": 1.1622554660529345, |
| "grad_norm": 0.0157470703125, |
| "learning_rate": 3.7273507286226684e-05, |
| "logits/chosen": -6.8489837646484375, |
| "logits/rejected": -6.969413757324219, |
| "logps/chosen": -56.049156188964844, |
| "logps/rejected": -77.49417114257812, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1288760006427765, |
| "rewards/margins": 2.1958889961242676, |
| "rewards/rejected": -2.0670130252838135, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.1641733793632527, |
| "grad_norm": 0.0157470703125, |
| "learning_rate": 3.722848098374653e-05, |
| "logits/chosen": -6.9269609451293945, |
| "logits/rejected": -6.994809150695801, |
| "logps/chosen": -55.518402099609375, |
| "logps/rejected": -76.10893249511719, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1681075096130371, |
| "rewards/margins": 2.1947579383850098, |
| "rewards/rejected": -2.0266504287719727, |
| "step": 3035 |
| }, |
| { |
| "epoch": 1.1660912926735711, |
| "grad_norm": 0.0191650390625, |
| "learning_rate": 3.718339078796656e-05, |
| "logits/chosen": -6.861593723297119, |
| "logits/rejected": -6.984339714050293, |
| "logps/chosen": -57.30218505859375, |
| "logps/rejected": -78.30956268310547, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07483305037021637, |
| "rewards/margins": 2.1995906829833984, |
| "rewards/rejected": -2.1247572898864746, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.1680092059838896, |
| "grad_norm": 0.01611328125, |
| "learning_rate": 3.7138236923493745e-05, |
| "logits/chosen": -6.8836259841918945, |
| "logits/rejected": -6.949560642242432, |
| "logps/chosen": -58.995750427246094, |
| "logps/rejected": -79.87813568115234, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1622563898563385, |
| "rewards/margins": 2.1966044902801514, |
| "rewards/rejected": -2.034348249435425, |
| "step": 3045 |
| }, |
| { |
| "epoch": 1.1699271192942078, |
| "grad_norm": 0.0189208984375, |
| "learning_rate": 3.709301961525215e-05, |
| "logits/chosen": -6.907643795013428, |
| "logits/rejected": -6.970582008361816, |
| "logps/chosen": -52.92273712158203, |
| "logps/rejected": -74.26122283935547, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12806203961372375, |
| "rewards/margins": 2.198936700820923, |
| "rewards/rejected": -2.0708744525909424, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.1718450326045262, |
| "grad_norm": 0.0206298828125, |
| "learning_rate": 3.7047739088481896e-05, |
| "logits/chosen": -6.881479740142822, |
| "logits/rejected": -6.99112606048584, |
| "logps/chosen": -56.346168518066406, |
| "logps/rejected": -77.93466186523438, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11438564956188202, |
| "rewards/margins": 2.197819709777832, |
| "rewards/rejected": -2.0834341049194336, |
| "step": 3055 |
| }, |
| { |
| "epoch": 1.1737629459148446, |
| "grad_norm": 0.01544189453125, |
| "learning_rate": 3.700239556873803e-05, |
| "logits/chosen": -6.802818298339844, |
| "logits/rejected": -6.9134674072265625, |
| "logps/chosen": -55.84782028198242, |
| "logps/rejected": -76.99220275878906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14138475060462952, |
| "rewards/margins": 2.1987390518188477, |
| "rewards/rejected": -2.05735445022583, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.175680859225163, |
| "grad_norm": 0.01446533203125, |
| "learning_rate": 3.695698928188936e-05, |
| "logits/chosen": -6.930673122406006, |
| "logits/rejected": -7.025910377502441, |
| "logps/chosen": -55.19673538208008, |
| "logps/rejected": -77.19384765625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08800756931304932, |
| "rewards/margins": 2.198402166366577, |
| "rewards/rejected": -2.1103944778442383, |
| "step": 3065 |
| }, |
| { |
| "epoch": 1.1775987725354815, |
| "grad_norm": 0.0159912109375, |
| "learning_rate": 3.6911520454117364e-05, |
| "logits/chosen": -6.800990104675293, |
| "logits/rejected": -6.837213039398193, |
| "logps/chosen": -52.48417282104492, |
| "logps/rejected": -75.27742767333984, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.22398178279399872, |
| "rewards/margins": 2.1966872215270996, |
| "rewards/rejected": -1.9727054834365845, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.1795166858457997, |
| "grad_norm": 0.014892578125, |
| "learning_rate": 3.686598931191506e-05, |
| "logits/chosen": -6.877493858337402, |
| "logits/rejected": -6.961717128753662, |
| "logps/chosen": -55.0633659362793, |
| "logps/rejected": -74.63704681396484, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1815473586320877, |
| "rewards/margins": 2.1940340995788574, |
| "rewards/rejected": -2.0124869346618652, |
| "step": 3075 |
| }, |
| { |
| "epoch": 1.1814345991561181, |
| "grad_norm": 0.0196533203125, |
| "learning_rate": 3.6820396082085854e-05, |
| "logits/chosen": -6.874161720275879, |
| "logits/rejected": -6.961171627044678, |
| "logps/chosen": -55.867698669433594, |
| "logps/rejected": -77.20703125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.09498462826013565, |
| "rewards/margins": 2.1980140209198, |
| "rewards/rejected": -2.103029489517212, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.1833525124664366, |
| "grad_norm": 0.019287109375, |
| "learning_rate": 3.6774740991742456e-05, |
| "logits/chosen": -6.843630790710449, |
| "logits/rejected": -6.964916229248047, |
| "logps/chosen": -54.132240295410156, |
| "logps/rejected": -74.72636413574219, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1745484322309494, |
| "rewards/margins": 2.1994543075561523, |
| "rewards/rejected": -2.0249056816101074, |
| "step": 3085 |
| }, |
| { |
| "epoch": 1.1852704257767548, |
| "grad_norm": 0.0177001953125, |
| "learning_rate": 3.672902426830569e-05, |
| "logits/chosen": -6.85568904876709, |
| "logits/rejected": -6.929766654968262, |
| "logps/chosen": -57.98683547973633, |
| "logps/rejected": -79.25408172607422, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.13423751294612885, |
| "rewards/margins": 2.1988065242767334, |
| "rewards/rejected": -2.0645689964294434, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.1871883390870732, |
| "grad_norm": 0.019287109375, |
| "learning_rate": 3.668324613950339e-05, |
| "logits/chosen": -6.860760688781738, |
| "logits/rejected": -6.9167962074279785, |
| "logps/chosen": -56.801109313964844, |
| "logps/rejected": -76.44920349121094, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12205035984516144, |
| "rewards/margins": 2.196124315261841, |
| "rewards/rejected": -2.0740737915039062, |
| "step": 3095 |
| }, |
| { |
| "epoch": 1.1891062523973916, |
| "grad_norm": 0.02294921875, |
| "learning_rate": 3.6637406833369336e-05, |
| "logits/chosen": -6.864434719085693, |
| "logits/rejected": -6.943790435791016, |
| "logps/chosen": -55.60688400268555, |
| "logps/rejected": -75.59327697753906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20054812729358673, |
| "rewards/margins": 2.1957311630249023, |
| "rewards/rejected": -1.9951833486557007, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.19102416570771, |
| "grad_norm": 0.0206298828125, |
| "learning_rate": 3.659150657824194e-05, |
| "logits/chosen": -6.883216857910156, |
| "logits/rejected": -6.926869869232178, |
| "logps/chosen": -54.942848205566406, |
| "logps/rejected": -75.64328002929688, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.25306662917137146, |
| "rewards/margins": 2.1989903450012207, |
| "rewards/rejected": -1.9459235668182373, |
| "step": 3105 |
| }, |
| { |
| "epoch": 1.1929420790180283, |
| "grad_norm": 0.023193359375, |
| "learning_rate": 3.6545545602763296e-05, |
| "logits/chosen": -6.765725612640381, |
| "logits/rejected": -6.863596439361572, |
| "logps/chosen": -51.03193664550781, |
| "logps/rejected": -72.6492919921875, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.24340319633483887, |
| "rewards/margins": 2.1984338760375977, |
| "rewards/rejected": -1.9550306797027588, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.1948599923283467, |
| "grad_norm": 0.0223388671875, |
| "learning_rate": 3.649952413587796e-05, |
| "logits/chosen": -6.795783042907715, |
| "logits/rejected": -6.881206512451172, |
| "logps/chosen": -59.676666259765625, |
| "logps/rejected": -80.19944763183594, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1134626716375351, |
| "rewards/margins": 2.1996171474456787, |
| "rewards/rejected": -2.0861546993255615, |
| "step": 3115 |
| }, |
| { |
| "epoch": 1.1967779056386652, |
| "grad_norm": 0.022705078125, |
| "learning_rate": 3.645344240683176e-05, |
| "logits/chosen": -6.884184837341309, |
| "logits/rejected": -6.944088935852051, |
| "logps/chosen": -59.5294303894043, |
| "logps/rejected": -81.12696838378906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.05250592157244682, |
| "rewards/margins": 2.197688102722168, |
| "rewards/rejected": -2.1451821327209473, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.1986958189489836, |
| "grad_norm": 0.0211181640625, |
| "learning_rate": 3.640730064517077e-05, |
| "logits/chosen": -6.844605922698975, |
| "logits/rejected": -6.9427008628845215, |
| "logps/chosen": -58.22944259643555, |
| "logps/rejected": -79.50534057617188, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.05716212838888168, |
| "rewards/margins": 2.1979916095733643, |
| "rewards/rejected": -2.140829563140869, |
| "step": 3125 |
| }, |
| { |
| "epoch": 1.2006137322593018, |
| "grad_norm": 0.01446533203125, |
| "learning_rate": 3.636109908074006e-05, |
| "logits/chosen": -6.839515686035156, |
| "logits/rejected": -6.921526908874512, |
| "logps/chosen": -54.10723114013672, |
| "logps/rejected": -75.46257781982422, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16128435730934143, |
| "rewards/margins": 2.1983935832977295, |
| "rewards/rejected": -2.037109375, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.2025316455696202, |
| "grad_norm": 0.01708984375, |
| "learning_rate": 3.631483794368261e-05, |
| "logits/chosen": -6.844223976135254, |
| "logits/rejected": -6.917840480804443, |
| "logps/chosen": -57.75761795043945, |
| "logps/rejected": -78.73417663574219, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14469479024410248, |
| "rewards/margins": 2.1967318058013916, |
| "rewards/rejected": -2.052037000656128, |
| "step": 3135 |
| }, |
| { |
| "epoch": 1.2044495588799387, |
| "grad_norm": 0.0196533203125, |
| "learning_rate": 3.626851746443816e-05, |
| "logits/chosen": -6.834771156311035, |
| "logits/rejected": -6.892405033111572, |
| "logps/chosen": -56.64507293701172, |
| "logps/rejected": -77.17357635498047, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1187446340918541, |
| "rewards/margins": 2.1978745460510254, |
| "rewards/rejected": -2.079129934310913, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.206367472190257, |
| "grad_norm": 0.0166015625, |
| "learning_rate": 3.622213787374202e-05, |
| "logits/chosen": -6.868593692779541, |
| "logits/rejected": -6.975184440612793, |
| "logps/chosen": -56.99578094482422, |
| "logps/rejected": -77.51200103759766, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16191808879375458, |
| "rewards/margins": 2.1966910362243652, |
| "rewards/rejected": -2.0347728729248047, |
| "step": 3145 |
| }, |
| { |
| "epoch": 1.2082853855005753, |
| "grad_norm": 0.0179443359375, |
| "learning_rate": 3.617569940262397e-05, |
| "logits/chosen": -6.836582183837891, |
| "logits/rejected": -6.939114570617676, |
| "logps/chosen": -52.9292106628418, |
| "logps/rejected": -74.92284393310547, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12345506250858307, |
| "rewards/margins": 2.1982033252716064, |
| "rewards/rejected": -2.0747480392456055, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.2102032988108937, |
| "grad_norm": 0.020263671875, |
| "learning_rate": 3.61292022824071e-05, |
| "logits/chosen": -6.889954566955566, |
| "logits/rejected": -6.915895938873291, |
| "logps/chosen": -55.65690994262695, |
| "logps/rejected": -75.88868713378906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11389796435832977, |
| "rewards/margins": 2.1961417198181152, |
| "rewards/rejected": -2.0822436809539795, |
| "step": 3155 |
| }, |
| { |
| "epoch": 1.2121212121212122, |
| "grad_norm": 0.017578125, |
| "learning_rate": 3.6082646744706615e-05, |
| "logits/chosen": -6.902949333190918, |
| "logits/rejected": -6.99478816986084, |
| "logps/chosen": -54.92612838745117, |
| "logps/rejected": -75.23097229003906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16485390067100525, |
| "rewards/margins": 2.195668935775757, |
| "rewards/rejected": -2.0308148860931396, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.2140391254315306, |
| "grad_norm": 0.02099609375, |
| "learning_rate": 3.6036033021428763e-05, |
| "logits/chosen": -6.836057186126709, |
| "logits/rejected": -6.9265947341918945, |
| "logps/chosen": -55.4571647644043, |
| "logps/rejected": -77.2686538696289, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1580025851726532, |
| "rewards/margins": 2.1961824893951416, |
| "rewards/rejected": -2.038179874420166, |
| "step": 3165 |
| }, |
| { |
| "epoch": 1.2159570387418488, |
| "grad_norm": 0.0157470703125, |
| "learning_rate": 3.598936134476957e-05, |
| "logits/chosen": -6.854968070983887, |
| "logits/rejected": -6.951455593109131, |
| "logps/chosen": -56.069427490234375, |
| "logps/rejected": -77.87905883789062, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1228158250451088, |
| "rewards/margins": 2.196932077407837, |
| "rewards/rejected": -2.0741162300109863, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.2178749520521672, |
| "grad_norm": 0.0133056640625, |
| "learning_rate": 3.59426319472138e-05, |
| "logits/chosen": -6.809521675109863, |
| "logits/rejected": -6.926182746887207, |
| "logps/chosen": -57.12538528442383, |
| "logps/rejected": -77.77890014648438, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.09594462811946869, |
| "rewards/margins": 2.195291042327881, |
| "rewards/rejected": -2.09934663772583, |
| "step": 3175 |
| }, |
| { |
| "epoch": 1.2197928653624857, |
| "grad_norm": 0.019775390625, |
| "learning_rate": 3.58958450615337e-05, |
| "logits/chosen": -6.759768486022949, |
| "logits/rejected": -6.831299781799316, |
| "logps/chosen": -56.357330322265625, |
| "logps/rejected": -78.47567749023438, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.24715280532836914, |
| "rewards/margins": 2.1972153186798096, |
| "rewards/rejected": -1.9500625133514404, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.2217107786728039, |
| "grad_norm": 0.0166015625, |
| "learning_rate": 3.5849000920787916e-05, |
| "logits/chosen": -6.913460731506348, |
| "logits/rejected": -6.9834394454956055, |
| "logps/chosen": -54.47563934326172, |
| "logps/rejected": -75.08209228515625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.0902893990278244, |
| "rewards/margins": 2.1981558799743652, |
| "rewards/rejected": -2.1078667640686035, |
| "step": 3185 |
| }, |
| { |
| "epoch": 1.2236286919831223, |
| "grad_norm": 0.01531982421875, |
| "learning_rate": 3.580209975832027e-05, |
| "logits/chosen": -6.87200403213501, |
| "logits/rejected": -6.999584197998047, |
| "logps/chosen": -55.20698928833008, |
| "logps/rejected": -77.44094848632812, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12308267503976822, |
| "rewards/margins": 2.196704387664795, |
| "rewards/rejected": -2.073621988296509, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.2255466052934407, |
| "grad_norm": 0.01483154296875, |
| "learning_rate": 3.5755141807758636e-05, |
| "logits/chosen": -6.90018367767334, |
| "logits/rejected": -6.980373382568359, |
| "logps/chosen": -58.038169860839844, |
| "logps/rejected": -79.75993347167969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07556581497192383, |
| "rewards/margins": 2.197662115097046, |
| "rewards/rejected": -2.122096061706543, |
| "step": 3195 |
| }, |
| { |
| "epoch": 1.2274645186037592, |
| "grad_norm": 0.0186767578125, |
| "learning_rate": 3.570812730301377e-05, |
| "logits/chosen": -6.910298824310303, |
| "logits/rejected": -6.948716163635254, |
| "logps/chosen": -58.978660583496094, |
| "logps/rejected": -79.23673248291016, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10630476474761963, |
| "rewards/margins": 2.1975457668304443, |
| "rewards/rejected": -2.0912413597106934, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.2293824319140776, |
| "grad_norm": 0.0166015625, |
| "learning_rate": 3.5661056478278125e-05, |
| "logits/chosen": -6.822863578796387, |
| "logits/rejected": -6.938515663146973, |
| "logps/chosen": -56.370582580566406, |
| "logps/rejected": -77.22187805175781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1240546703338623, |
| "rewards/margins": 2.1947436332702637, |
| "rewards/rejected": -2.0706887245178223, |
| "step": 3205 |
| }, |
| { |
| "epoch": 1.2313003452243958, |
| "grad_norm": 0.0196533203125, |
| "learning_rate": 3.561392956802472e-05, |
| "logits/chosen": -6.785411834716797, |
| "logits/rejected": -6.905782222747803, |
| "logps/chosen": -52.2618408203125, |
| "logps/rejected": -73.18843078613281, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20731830596923828, |
| "rewards/margins": 2.1965816020965576, |
| "rewards/rejected": -1.9892632961273193, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.2332182585347142, |
| "grad_norm": 0.0179443359375, |
| "learning_rate": 3.556674680700593e-05, |
| "logits/chosen": -6.823803901672363, |
| "logits/rejected": -6.908896446228027, |
| "logps/chosen": -53.1720085144043, |
| "logps/rejected": -75.19302368164062, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1953558772802353, |
| "rewards/margins": 2.1971781253814697, |
| "rewards/rejected": -2.0018222332000732, |
| "step": 3215 |
| }, |
| { |
| "epoch": 1.2351361718450327, |
| "grad_norm": 0.0162353515625, |
| "learning_rate": 3.5519508430252364e-05, |
| "logits/chosen": -6.835984706878662, |
| "logits/rejected": -6.891526699066162, |
| "logps/chosen": -59.92537307739258, |
| "logps/rejected": -81.62738800048828, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.04124726727604866, |
| "rewards/margins": 2.1987502574920654, |
| "rewards/rejected": -2.1575026512145996, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.2370540851553509, |
| "grad_norm": 0.0184326171875, |
| "learning_rate": 3.5472214673071645e-05, |
| "logits/chosen": -6.899560451507568, |
| "logits/rejected": -6.903540134429932, |
| "logps/chosen": -53.959434509277344, |
| "logps/rejected": -75.20841979980469, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11058112233877182, |
| "rewards/margins": 2.197727680206299, |
| "rewards/rejected": -2.087146520614624, |
| "step": 3225 |
| }, |
| { |
| "epoch": 1.2389719984656693, |
| "grad_norm": 0.018310546875, |
| "learning_rate": 3.542486577104728e-05, |
| "logits/chosen": -6.880455017089844, |
| "logits/rejected": -6.971824645996094, |
| "logps/chosen": -58.014862060546875, |
| "logps/rejected": -78.8389892578125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1089930385351181, |
| "rewards/margins": 2.199493646621704, |
| "rewards/rejected": -2.0905003547668457, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.2408899117759877, |
| "grad_norm": 0.0177001953125, |
| "learning_rate": 3.5377461960037454e-05, |
| "logits/chosen": -6.817750453948975, |
| "logits/rejected": -6.916102409362793, |
| "logps/chosen": -56.97007369995117, |
| "logps/rejected": -78.2486572265625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10282842814922333, |
| "rewards/margins": 2.1967597007751465, |
| "rewards/rejected": -2.093931198120117, |
| "step": 3235 |
| }, |
| { |
| "epoch": 1.2428078250863062, |
| "grad_norm": 0.0157470703125, |
| "learning_rate": 3.533000347617386e-05, |
| "logits/chosen": -6.9070892333984375, |
| "logits/rejected": -6.9540510177612305, |
| "logps/chosen": -54.85721969604492, |
| "logps/rejected": -75.885986328125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.13600504398345947, |
| "rewards/margins": 2.1978845596313477, |
| "rewards/rejected": -2.0618793964385986, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.2447257383966246, |
| "grad_norm": 0.01806640625, |
| "learning_rate": 3.5282490555860566e-05, |
| "logits/chosen": -6.82870626449585, |
| "logits/rejected": -6.905839443206787, |
| "logps/chosen": -58.199119567871094, |
| "logps/rejected": -78.6649169921875, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07048743218183517, |
| "rewards/margins": 2.197709321975708, |
| "rewards/rejected": -2.1272220611572266, |
| "step": 3245 |
| }, |
| { |
| "epoch": 1.2466436517069428, |
| "grad_norm": 0.0157470703125, |
| "learning_rate": 3.5234923435772776e-05, |
| "logits/chosen": -6.8863935470581055, |
| "logits/rejected": -6.9651618003845215, |
| "logps/chosen": -56.577064514160156, |
| "logps/rejected": -78.65019226074219, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.006662911735475063, |
| "rewards/margins": 2.197619915008545, |
| "rewards/rejected": -2.1909565925598145, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.2485615650172612, |
| "grad_norm": 0.01434326171875, |
| "learning_rate": 3.518730235285569e-05, |
| "logits/chosen": -6.8779778480529785, |
| "logits/rejected": -6.99672794342041, |
| "logps/chosen": -54.36248779296875, |
| "logps/rejected": -75.02043151855469, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16153615713119507, |
| "rewards/margins": 2.19610857963562, |
| "rewards/rejected": -2.0345723628997803, |
| "step": 3255 |
| }, |
| { |
| "epoch": 1.2504794783275797, |
| "grad_norm": 0.0172119140625, |
| "learning_rate": 3.513962754432329e-05, |
| "logits/chosen": -6.871232509613037, |
| "logits/rejected": -6.936939239501953, |
| "logps/chosen": -55.50481033325195, |
| "logps/rejected": -77.04280090332031, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08255159109830856, |
| "rewards/margins": 2.1961824893951416, |
| "rewards/rejected": -2.113631010055542, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.2523973916378979, |
| "grad_norm": 0.0177001953125, |
| "learning_rate": 3.509189924765723e-05, |
| "logits/chosen": -6.956633567810059, |
| "logits/rejected": -6.980214595794678, |
| "logps/chosen": -55.857261657714844, |
| "logps/rejected": -76.93995666503906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07723622769117355, |
| "rewards/margins": 2.1976943016052246, |
| "rewards/rejected": -2.1204581260681152, |
| "step": 3265 |
| }, |
| { |
| "epoch": 1.2543153049482163, |
| "grad_norm": 0.0169677734375, |
| "learning_rate": 3.504411770060553e-05, |
| "logits/chosen": -6.8603997230529785, |
| "logits/rejected": -6.952916622161865, |
| "logps/chosen": -54.337066650390625, |
| "logps/rejected": -75.54595947265625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.19328327476978302, |
| "rewards/margins": 2.196732759475708, |
| "rewards/rejected": -2.0034494400024414, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.2562332182585347, |
| "grad_norm": 0.01226806640625, |
| "learning_rate": 3.499628314118154e-05, |
| "logits/chosen": -6.9065842628479, |
| "logits/rejected": -6.992828369140625, |
| "logps/chosen": -53.137229919433594, |
| "logps/rejected": -75.12387084960938, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1256095916032791, |
| "rewards/margins": 2.197791576385498, |
| "rewards/rejected": -2.0721821784973145, |
| "step": 3275 |
| }, |
| { |
| "epoch": 1.2581511315688532, |
| "grad_norm": 0.0250244140625, |
| "learning_rate": 3.4948395807662644e-05, |
| "logits/chosen": -6.882128715515137, |
| "logits/rejected": -6.942866325378418, |
| "logps/chosen": -54.67943572998047, |
| "logps/rejected": -75.58345031738281, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.21490740776062012, |
| "rewards/margins": 2.196397066116333, |
| "rewards/rejected": -1.9814897775650024, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.2600690448791716, |
| "grad_norm": 0.015869140625, |
| "learning_rate": 3.490045593858909e-05, |
| "logits/chosen": -6.784398555755615, |
| "logits/rejected": -6.870619297027588, |
| "logps/chosen": -58.52797317504883, |
| "logps/rejected": -78.68760681152344, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.22775070369243622, |
| "rewards/margins": 2.1975009441375732, |
| "rewards/rejected": -1.969750165939331, |
| "step": 3285 |
| }, |
| { |
| "epoch": 1.2619869581894898, |
| "grad_norm": 0.0179443359375, |
| "learning_rate": 3.485246377276285e-05, |
| "logits/chosen": -6.8864336013793945, |
| "logits/rejected": -6.979198455810547, |
| "logps/chosen": -55.7845458984375, |
| "logps/rejected": -76.38514709472656, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07828356325626373, |
| "rewards/margins": 2.195582628250122, |
| "rewards/rejected": -2.1172988414764404, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.2639048714998082, |
| "grad_norm": 0.01611328125, |
| "learning_rate": 3.48044195492464e-05, |
| "logits/chosen": -6.8806962966918945, |
| "logits/rejected": -6.947422981262207, |
| "logps/chosen": -55.211936950683594, |
| "logps/rejected": -76.61866760253906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16678637266159058, |
| "rewards/margins": 2.1968917846679688, |
| "rewards/rejected": -2.0301055908203125, |
| "step": 3295 |
| }, |
| { |
| "epoch": 1.2658227848101267, |
| "grad_norm": 0.01806640625, |
| "learning_rate": 3.4756323507361515e-05, |
| "logits/chosen": -6.963173866271973, |
| "logits/rejected": -7.004177093505859, |
| "logps/chosen": -57.901634216308594, |
| "logps/rejected": -79.7287826538086, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07122094929218292, |
| "rewards/margins": 2.1957080364227295, |
| "rewards/rejected": -2.1244869232177734, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.2677406981204449, |
| "grad_norm": 0.018310546875, |
| "learning_rate": 3.47081758866881e-05, |
| "logits/chosen": -6.824810028076172, |
| "logits/rejected": -6.932721138000488, |
| "logps/chosen": -56.644142150878906, |
| "logps/rejected": -78.6021728515625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11658480018377304, |
| "rewards/margins": 2.1953206062316895, |
| "rewards/rejected": -2.078735589981079, |
| "step": 3305 |
| }, |
| { |
| "epoch": 1.2696586114307633, |
| "grad_norm": 0.019287109375, |
| "learning_rate": 3.465997692706299e-05, |
| "logits/chosen": -6.901789665222168, |
| "logits/rejected": -7.001550197601318, |
| "logps/chosen": -53.700408935546875, |
| "logps/rejected": -75.07813262939453, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17139770090579987, |
| "rewards/margins": 2.1972126960754395, |
| "rewards/rejected": -2.0258147716522217, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.2715765247410817, |
| "grad_norm": 0.0218505859375, |
| "learning_rate": 3.461172686857874e-05, |
| "logits/chosen": -6.829520225524902, |
| "logits/rejected": -6.944082736968994, |
| "logps/chosen": -53.95747756958008, |
| "logps/rejected": -75.05677795410156, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16962416470050812, |
| "rewards/margins": 2.1952710151672363, |
| "rewards/rejected": -2.0256474018096924, |
| "step": 3315 |
| }, |
| { |
| "epoch": 1.2734944380514, |
| "grad_norm": 0.0162353515625, |
| "learning_rate": 3.456342595158247e-05, |
| "logits/chosen": -6.842916011810303, |
| "logits/rejected": -6.947360992431641, |
| "logps/chosen": -55.469261169433594, |
| "logps/rejected": -76.36644744873047, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12937235832214355, |
| "rewards/margins": 2.198073387145996, |
| "rewards/rejected": -2.0687010288238525, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.2754123513617184, |
| "grad_norm": 0.022216796875, |
| "learning_rate": 3.45150744166746e-05, |
| "logits/chosen": -6.803062438964844, |
| "logits/rejected": -6.875199794769287, |
| "logps/chosen": -60.186309814453125, |
| "logps/rejected": -81.22868347167969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.029813658446073532, |
| "rewards/margins": 2.200932025909424, |
| "rewards/rejected": -2.1711182594299316, |
| "step": 3325 |
| }, |
| { |
| "epoch": 1.2773302646720368, |
| "grad_norm": 0.015869140625, |
| "learning_rate": 3.446667250470774e-05, |
| "logits/chosen": -6.896180152893066, |
| "logits/rejected": -6.9682936668396, |
| "logps/chosen": -57.92467498779297, |
| "logps/rejected": -79.21630096435547, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07933951914310455, |
| "rewards/margins": 2.197868824005127, |
| "rewards/rejected": -2.1185295581817627, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.2792481779823552, |
| "grad_norm": 0.0196533203125, |
| "learning_rate": 3.441822045678542e-05, |
| "logits/chosen": -6.864504337310791, |
| "logits/rejected": -6.974801540374756, |
| "logps/chosen": -56.012306213378906, |
| "logps/rejected": -77.16068267822266, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12609338760375977, |
| "rewards/margins": 2.196408748626709, |
| "rewards/rejected": -2.0703155994415283, |
| "step": 3335 |
| }, |
| { |
| "epoch": 1.2811660912926737, |
| "grad_norm": 0.020751953125, |
| "learning_rate": 3.436971851426089e-05, |
| "logits/chosen": -6.881219387054443, |
| "logits/rejected": -6.962426662445068, |
| "logps/chosen": -54.42626190185547, |
| "logps/rejected": -75.70896911621094, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16146163642406464, |
| "rewards/margins": 2.197981595993042, |
| "rewards/rejected": -2.036520004272461, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.2830840046029919, |
| "grad_norm": 0.016357421875, |
| "learning_rate": 3.4321166918735966e-05, |
| "logits/chosen": -6.941650390625, |
| "logits/rejected": -6.986546993255615, |
| "logps/chosen": -57.1054573059082, |
| "logps/rejected": -78.90348815917969, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11171239614486694, |
| "rewards/margins": 2.1978182792663574, |
| "rewards/rejected": -2.0861058235168457, |
| "step": 3345 |
| }, |
| { |
| "epoch": 1.2850019179133103, |
| "grad_norm": 0.0179443359375, |
| "learning_rate": 3.4272565912059794e-05, |
| "logits/chosen": -6.926405906677246, |
| "logits/rejected": -7.031134605407715, |
| "logps/chosen": -55.7785758972168, |
| "logps/rejected": -77.1592788696289, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1397164762020111, |
| "rewards/margins": 2.1975929737091064, |
| "rewards/rejected": -2.0578763484954834, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.2869198312236287, |
| "grad_norm": 0.0174560546875, |
| "learning_rate": 3.4223915736327656e-05, |
| "logits/chosen": -6.847365379333496, |
| "logits/rejected": -6.972783088684082, |
| "logps/chosen": -54.550384521484375, |
| "logps/rejected": -75.249755859375, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20894715189933777, |
| "rewards/margins": 2.196265459060669, |
| "rewards/rejected": -1.9873180389404297, |
| "step": 3355 |
| }, |
| { |
| "epoch": 1.288837744533947, |
| "grad_norm": 0.019775390625, |
| "learning_rate": 3.417521663387974e-05, |
| "logits/chosen": -6.9112982749938965, |
| "logits/rejected": -6.933781623840332, |
| "logps/chosen": -56.79960250854492, |
| "logps/rejected": -77.83222961425781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14105048775672913, |
| "rewards/margins": 2.1970365047454834, |
| "rewards/rejected": -2.055985927581787, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.2907556578442654, |
| "grad_norm": 0.018798828125, |
| "learning_rate": 3.412646884729998e-05, |
| "logits/chosen": -6.834046840667725, |
| "logits/rejected": -6.908982276916504, |
| "logps/chosen": -56.66218948364258, |
| "logps/rejected": -77.36146545410156, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1607808917760849, |
| "rewards/margins": 2.1996397972106934, |
| "rewards/rejected": -2.0388588905334473, |
| "step": 3365 |
| }, |
| { |
| "epoch": 1.2926735711545838, |
| "grad_norm": 0.0133056640625, |
| "learning_rate": 3.407767261941478e-05, |
| "logits/chosen": -6.865869045257568, |
| "logits/rejected": -6.947390556335449, |
| "logps/chosen": -55.22978591918945, |
| "logps/rejected": -76.95445251464844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16833913326263428, |
| "rewards/margins": 2.198389768600464, |
| "rewards/rejected": -2.030050754547119, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.2945914844649022, |
| "grad_norm": 0.01904296875, |
| "learning_rate": 3.4028828193291894e-05, |
| "logits/chosen": -6.868216037750244, |
| "logits/rejected": -6.96189022064209, |
| "logps/chosen": -53.82965087890625, |
| "logps/rejected": -75.89132690429688, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.15720410645008087, |
| "rewards/margins": 2.197294235229492, |
| "rewards/rejected": -2.0400900840759277, |
| "step": 3375 |
| }, |
| { |
| "epoch": 1.2965093977752207, |
| "grad_norm": 0.01495361328125, |
| "learning_rate": 3.3979935812239116e-05, |
| "logits/chosen": -6.839072227478027, |
| "logits/rejected": -6.940023899078369, |
| "logps/chosen": -56.325706481933594, |
| "logps/rejected": -76.52516174316406, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1907631903886795, |
| "rewards/margins": 2.196441888809204, |
| "rewards/rejected": -2.005678653717041, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.2984273110855389, |
| "grad_norm": 0.0194091796875, |
| "learning_rate": 3.393099571980315e-05, |
| "logits/chosen": -6.903067111968994, |
| "logits/rejected": -6.9862565994262695, |
| "logps/chosen": -51.87671661376953, |
| "logps/rejected": -74.49134826660156, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12387783825397491, |
| "rewards/margins": 2.196930170059204, |
| "rewards/rejected": -2.073052406311035, |
| "step": 3385 |
| }, |
| { |
| "epoch": 1.3003452243958573, |
| "grad_norm": 0.0152587890625, |
| "learning_rate": 3.3882008159768344e-05, |
| "logits/chosen": -6.856507301330566, |
| "logits/rejected": -6.935732364654541, |
| "logps/chosen": -52.5578727722168, |
| "logps/rejected": -73.25543212890625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.2329297810792923, |
| "rewards/margins": 2.1983587741851807, |
| "rewards/rejected": -1.9654289484024048, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.3022631377061757, |
| "grad_norm": 0.0135498046875, |
| "learning_rate": 3.383297337615551e-05, |
| "logits/chosen": -6.921877861022949, |
| "logits/rejected": -6.980062007904053, |
| "logps/chosen": -54.018531799316406, |
| "logps/rejected": -76.19944763183594, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14331956207752228, |
| "rewards/margins": 2.1956329345703125, |
| "rewards/rejected": -2.0523130893707275, |
| "step": 3395 |
| }, |
| { |
| "epoch": 1.304181051016494, |
| "grad_norm": 0.021728515625, |
| "learning_rate": 3.378389161322069e-05, |
| "logits/chosen": -6.900376796722412, |
| "logits/rejected": -6.962699890136719, |
| "logps/chosen": -59.96956253051758, |
| "logps/rejected": -81.11237335205078, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.007493323180824518, |
| "rewards/margins": 2.1981189250946045, |
| "rewards/rejected": -2.1906256675720215, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.3060989643268124, |
| "grad_norm": 0.0166015625, |
| "learning_rate": 3.373476311545394e-05, |
| "logits/chosen": -6.904867649078369, |
| "logits/rejected": -6.971893310546875, |
| "logps/chosen": -56.5262336730957, |
| "logps/rejected": -76.99403381347656, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11257918179035187, |
| "rewards/margins": 2.1982243061065674, |
| "rewards/rejected": -2.0856451988220215, |
| "step": 3405 |
| }, |
| { |
| "epoch": 1.3080168776371308, |
| "grad_norm": 0.015869140625, |
| "learning_rate": 3.368558812757811e-05, |
| "logits/chosen": -6.831631660461426, |
| "logits/rejected": -6.9506120681762695, |
| "logps/chosen": -60.0141716003418, |
| "logps/rejected": -81.53129577636719, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.057561974972486496, |
| "rewards/margins": 2.19670033454895, |
| "rewards/rejected": -2.1391382217407227, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.3099347909474492, |
| "grad_norm": 0.0152587890625, |
| "learning_rate": 3.363636689454765e-05, |
| "logits/chosen": -6.874886989593506, |
| "logits/rejected": -6.909640312194824, |
| "logps/chosen": -58.28754425048828, |
| "logps/rejected": -79.06494140625, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.044596798717975616, |
| "rewards/margins": 2.1944453716278076, |
| "rewards/rejected": -2.149848461151123, |
| "step": 3415 |
| }, |
| { |
| "epoch": 1.3118527042577677, |
| "grad_norm": 0.01434326171875, |
| "learning_rate": 3.358709966154735e-05, |
| "logits/chosen": -6.809027194976807, |
| "logits/rejected": -6.925405025482178, |
| "logps/chosen": -53.523101806640625, |
| "logps/rejected": -75.57826232910156, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17070826888084412, |
| "rewards/margins": 2.1942429542541504, |
| "rewards/rejected": -2.0235350131988525, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.3137706175680859, |
| "grad_norm": 0.0181884765625, |
| "learning_rate": 3.353778667399114e-05, |
| "logits/chosen": -6.857409477233887, |
| "logits/rejected": -6.90288782119751, |
| "logps/chosen": -56.48028564453125, |
| "logps/rejected": -77.70547485351562, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18256667256355286, |
| "rewards/margins": 2.197591781616211, |
| "rewards/rejected": -2.0150249004364014, |
| "step": 3425 |
| }, |
| { |
| "epoch": 1.3156885308784043, |
| "grad_norm": 0.0185546875, |
| "learning_rate": 3.348842817752088e-05, |
| "logits/chosen": -6.897669315338135, |
| "logits/rejected": -6.962693214416504, |
| "logps/chosen": -56.440391540527344, |
| "logps/rejected": -77.97578430175781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.077767513692379, |
| "rewards/margins": 2.2007930278778076, |
| "rewards/rejected": -2.1230251789093018, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.3176064441887227, |
| "grad_norm": 0.0198974609375, |
| "learning_rate": 3.343902441800511e-05, |
| "logits/chosen": -6.962320804595947, |
| "logits/rejected": -7.01629638671875, |
| "logps/chosen": -50.10385513305664, |
| "logps/rejected": -71.91188049316406, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20694105327129364, |
| "rewards/margins": 2.1971373558044434, |
| "rewards/rejected": -1.9901962280273438, |
| "step": 3435 |
| }, |
| { |
| "epoch": 1.319524357499041, |
| "grad_norm": 0.0155029296875, |
| "learning_rate": 3.338957564153784e-05, |
| "logits/chosen": -6.870461463928223, |
| "logits/rejected": -6.979992866516113, |
| "logps/chosen": -56.268280029296875, |
| "logps/rejected": -77.19740295410156, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11492518335580826, |
| "rewards/margins": 2.1967813968658447, |
| "rewards/rejected": -2.0818562507629395, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.3214422708093594, |
| "grad_norm": 0.021728515625, |
| "learning_rate": 3.3340082094437343e-05, |
| "logits/chosen": -6.830872535705566, |
| "logits/rejected": -6.9500885009765625, |
| "logps/chosen": -51.35883712768555, |
| "logps/rejected": -75.20509338378906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17043998837471008, |
| "rewards/margins": 2.1950507164001465, |
| "rewards/rejected": -2.0246105194091797, |
| "step": 3445 |
| }, |
| { |
| "epoch": 1.3233601841196778, |
| "grad_norm": 0.0155029296875, |
| "learning_rate": 3.329054402324487e-05, |
| "logits/chosen": -6.876837730407715, |
| "logits/rejected": -6.953248500823975, |
| "logps/chosen": -53.9123649597168, |
| "logps/rejected": -75.28543853759766, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1824209988117218, |
| "rewards/margins": 2.196981906890869, |
| "rewards/rejected": -2.0145609378814697, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.325278097429996, |
| "grad_norm": 0.015869140625, |
| "learning_rate": 3.3240961674723483e-05, |
| "logits/chosen": -6.871293067932129, |
| "logits/rejected": -6.961977958679199, |
| "logps/chosen": -53.93735885620117, |
| "logps/rejected": -74.31446838378906, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18244056403636932, |
| "rewards/margins": 2.195777416229248, |
| "rewards/rejected": -2.013336658477783, |
| "step": 3455 |
| }, |
| { |
| "epoch": 1.3271960107403145, |
| "grad_norm": 0.0128173828125, |
| "learning_rate": 3.3191335295856795e-05, |
| "logits/chosen": -6.839479923248291, |
| "logits/rejected": -6.9421234130859375, |
| "logps/chosen": -52.604591369628906, |
| "logps/rejected": -73.18003845214844, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.24276569485664368, |
| "rewards/margins": 2.200589179992676, |
| "rewards/rejected": -1.957823395729065, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.3291139240506329, |
| "grad_norm": 0.0157470703125, |
| "learning_rate": 3.314166513384775e-05, |
| "logits/chosen": -6.872336387634277, |
| "logits/rejected": -6.956648826599121, |
| "logps/chosen": -51.502952575683594, |
| "logps/rejected": -73.16020202636719, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.21097616851329803, |
| "rewards/margins": 2.1991126537323, |
| "rewards/rejected": -1.9881365299224854, |
| "step": 3465 |
| }, |
| { |
| "epoch": 1.3310318373609513, |
| "grad_norm": 0.0123291015625, |
| "learning_rate": 3.309195143611739e-05, |
| "logits/chosen": -6.865052223205566, |
| "logits/rejected": -6.946972846984863, |
| "logps/chosen": -52.97285842895508, |
| "logps/rejected": -73.24884033203125, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18618887662887573, |
| "rewards/margins": 2.196037769317627, |
| "rewards/rejected": -2.0098490715026855, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.3329497506712698, |
| "grad_norm": 0.01348876953125, |
| "learning_rate": 3.304219445030361e-05, |
| "logits/chosen": -6.938165187835693, |
| "logits/rejected": -6.9916205406188965, |
| "logps/chosen": -55.28739547729492, |
| "logps/rejected": -76.15113830566406, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.13353893160820007, |
| "rewards/margins": 2.19685697555542, |
| "rewards/rejected": -2.0633180141448975, |
| "step": 3475 |
| }, |
| { |
| "epoch": 1.334867663981588, |
| "grad_norm": 0.01806640625, |
| "learning_rate": 3.2992394424259955e-05, |
| "logits/chosen": -6.886143684387207, |
| "logits/rejected": -6.981758117675781, |
| "logps/chosen": -54.95038604736328, |
| "logps/rejected": -75.45576477050781, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12386339902877808, |
| "rewards/margins": 2.197850227355957, |
| "rewards/rejected": -2.073986768722534, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.3367855772919064, |
| "grad_norm": 0.01287841796875, |
| "learning_rate": 3.294255160605433e-05, |
| "logits/chosen": -6.929156303405762, |
| "logits/rejected": -7.001688480377197, |
| "logps/chosen": -57.22789764404297, |
| "logps/rejected": -78.39673614501953, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.09558825939893723, |
| "rewards/margins": 2.1976473331451416, |
| "rewards/rejected": -2.1020588874816895, |
| "step": 3485 |
| }, |
| { |
| "epoch": 1.3387034906022248, |
| "grad_norm": 0.0137939453125, |
| "learning_rate": 3.289266624396785e-05, |
| "logits/chosen": -6.8782854080200195, |
| "logits/rejected": -6.912919044494629, |
| "logps/chosen": -57.03832244873047, |
| "logps/rejected": -77.84246063232422, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10016844421625137, |
| "rewards/margins": 2.197028875350952, |
| "rewards/rejected": -2.096860408782959, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.340621403912543, |
| "grad_norm": 0.0157470703125, |
| "learning_rate": 3.28427385864935e-05, |
| "logits/chosen": -6.863548278808594, |
| "logits/rejected": -6.929646968841553, |
| "logps/chosen": -55.10469436645508, |
| "logps/rejected": -77.05055236816406, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10287058353424072, |
| "rewards/margins": 2.1964352130889893, |
| "rewards/rejected": -2.093564510345459, |
| "step": 3495 |
| }, |
| { |
| "epoch": 1.3425393172228615, |
| "grad_norm": 0.0159912109375, |
| "learning_rate": 3.2792768882335e-05, |
| "logits/chosen": -6.898591041564941, |
| "logits/rejected": -6.955502510070801, |
| "logps/chosen": -57.91131591796875, |
| "logps/rejected": -79.73384094238281, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08750198036432266, |
| "rewards/margins": 2.1968607902526855, |
| "rewards/rejected": -2.109358787536621, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.3425393172228615, |
| "eval_logits/chosen": -6.735074043273926, |
| "eval_logits/rejected": -7.012642860412598, |
| "eval_logps/chosen": -54.117008209228516, |
| "eval_logps/rejected": -76.93775177001953, |
| "eval_loss": 0.325105220079422, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 0.13358335196971893, |
| "eval_rewards/margins": 2.1980643272399902, |
| "eval_rewards/rejected": -2.064480781555176, |
| "eval_runtime": 5.3108, |
| "eval_samples_per_second": 37.659, |
| "eval_steps_per_second": 37.659, |
| "step": 3500 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 7821, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|