| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 5676, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.007049545084043795, |
| "grad_norm": 5.251685782291382, |
| "learning_rate": 4.999861761763694e-06, |
| "logits/chosen": -2.4226253032684326, |
| "logits/rejected": -2.0827410221099854, |
| "logps/chosen": -369.7997131347656, |
| "logps/rejected": -72.51252746582031, |
| "loss": 2.7826, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 3.4676175117492676, |
| "rewards/margins": 3.82804799079895, |
| "rewards/rejected": -0.36043041944503784, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01409909016808759, |
| "grad_norm": 4.303040759809255, |
| "learning_rate": 4.999417578584734e-06, |
| "logits/chosen": -2.213839292526245, |
| "logits/rejected": -1.9602348804473877, |
| "logps/chosen": -219.6575164794922, |
| "logps/rejected": -121.3716049194336, |
| "loss": 1.5829, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.987636089324951, |
| "rewards/margins": 5.836641788482666, |
| "rewards/rejected": -0.8490053415298462, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.021148635252131384, |
| "grad_norm": 4.576766531423992, |
| "learning_rate": 4.998667121957487e-06, |
| "logits/chosen": -2.1551291942596436, |
| "logits/rejected": -1.8213732242584229, |
| "logps/chosen": -195.5138397216797, |
| "logps/rejected": -153.83941650390625, |
| "loss": 1.4187, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.166686058044434, |
| "rewards/margins": 6.340417385101318, |
| "rewards/rejected": -1.1737314462661743, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02819818033617518, |
| "grad_norm": 4.045608043687929, |
| "learning_rate": 4.997610483841349e-06, |
| "logits/chosen": -2.1854336261749268, |
| "logits/rejected": -1.8104079961776733, |
| "logps/chosen": -181.33554077148438, |
| "logps/rejected": -178.6750946044922, |
| "loss": 1.3198, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.2234673500061035, |
| "rewards/margins": 6.645481109619141, |
| "rewards/rejected": -1.4220136404037476, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.03524772542021898, |
| "grad_norm": 4.309056287854017, |
| "learning_rate": 4.996247793714565e-06, |
| "logits/chosen": -2.153714418411255, |
| "logits/rejected": -1.7523826360702515, |
| "logps/chosen": -181.3177947998047, |
| "logps/rejected": -197.3472442626953, |
| "loss": 1.3159, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.32370662689209, |
| "rewards/margins": 6.932420253753662, |
| "rewards/rejected": -1.6087143421173096, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04229727050426277, |
| "grad_norm": 4.38954998222076, |
| "learning_rate": 4.994579218558358e-06, |
| "logits/chosen": -2.1097121238708496, |
| "logits/rejected": -1.6037172079086304, |
| "logps/chosen": -171.5686798095703, |
| "logps/rejected": -214.49600219726562, |
| "loss": 1.2613, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.3470964431762695, |
| "rewards/margins": 7.127236843109131, |
| "rewards/rejected": -1.7801411151885986, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.049346815588306565, |
| "grad_norm": 4.170152738327058, |
| "learning_rate": 4.992604962836471e-06, |
| "logits/chosen": -2.061870574951172, |
| "logits/rejected": -1.5608799457550049, |
| "logps/chosen": -175.2593231201172, |
| "logps/rejected": -227.2431182861328, |
| "loss": 1.2586, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.457849979400635, |
| "rewards/margins": 7.365653991699219, |
| "rewards/rejected": -1.9078038930892944, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05639636067235036, |
| "grad_norm": 3.7793931281850064, |
| "learning_rate": 4.990325268470103e-06, |
| "logits/chosen": -2.0291988849639893, |
| "logits/rejected": -1.5090599060058594, |
| "logps/chosen": -170.09222412109375, |
| "logps/rejected": -238.5521240234375, |
| "loss": 1.2228, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.505599021911621, |
| "rewards/margins": 7.526421546936035, |
| "rewards/rejected": -2.020822525024414, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.06344590575639415, |
| "grad_norm": 4.03339809015517, |
| "learning_rate": 4.987740414808279e-06, |
| "logits/chosen": -2.0087292194366455, |
| "logits/rejected": -1.4583772420883179, |
| "logps/chosen": -171.950927734375, |
| "logps/rejected": -249.205810546875, |
| "loss": 1.2355, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.471736431121826, |
| "rewards/margins": 7.599116802215576, |
| "rewards/rejected": -2.1273796558380127, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07049545084043796, |
| "grad_norm": 3.8396983397552003, |
| "learning_rate": 4.9848507185936054e-06, |
| "logits/chosen": -1.9715420007705688, |
| "logits/rejected": -1.3983051776885986, |
| "logps/chosen": -167.9102020263672, |
| "logps/rejected": -258.29931640625, |
| "loss": 1.2011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.551548957824707, |
| "rewards/margins": 7.769871711730957, |
| "rewards/rejected": -2.218322992324829, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07754499592448175, |
| "grad_norm": 3.8300256337552883, |
| "learning_rate": 4.9816565339234665e-06, |
| "logits/chosen": -1.9795246124267578, |
| "logits/rejected": -1.4027624130249023, |
| "logps/chosen": -159.7227783203125, |
| "logps/rejected": -267.54132080078125, |
| "loss": 1.1606, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.484537601470947, |
| "rewards/margins": 7.795154094696045, |
| "rewards/rejected": -2.3106157779693604, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08459454100852554, |
| "grad_norm": 3.6985177789621537, |
| "learning_rate": 4.978158252206628e-06, |
| "logits/chosen": -1.8784259557724, |
| "logits/rejected": -1.339755892753601, |
| "logps/chosen": -159.25930786132812, |
| "logps/rejected": -274.899658203125, |
| "loss": 1.1487, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.546479225158691, |
| "rewards/margins": 7.930718898773193, |
| "rewards/rejected": -2.384239912033081, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.09164408609256934, |
| "grad_norm": 4.180449865034002, |
| "learning_rate": 4.9743563021152815e-06, |
| "logits/chosen": -1.8454569578170776, |
| "logits/rejected": -1.3575037717819214, |
| "logps/chosen": -160.8242950439453, |
| "logps/rejected": -281.50543212890625, |
| "loss": 1.1609, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.501753330230713, |
| "rewards/margins": 7.952068328857422, |
| "rewards/rejected": -2.45031476020813, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.09869363117661313, |
| "grad_norm": 4.150749378416668, |
| "learning_rate": 4.970251149532508e-06, |
| "logits/chosen": -1.867268443107605, |
| "logits/rejected": -1.3786919116973877, |
| "logps/chosen": -162.6690673828125, |
| "logps/rejected": -284.708984375, |
| "loss": 1.1587, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.61737585067749, |
| "rewards/margins": 8.099893569946289, |
| "rewards/rejected": -2.4825170040130615, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.10574317626065693, |
| "grad_norm": 3.9481813668439925, |
| "learning_rate": 4.965843297495193e-06, |
| "logits/chosen": -1.8595079183578491, |
| "logits/rejected": -1.3652888536453247, |
| "logps/chosen": -160.74594116210938, |
| "logps/rejected": -290.0582275390625, |
| "loss": 1.1566, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.584461212158203, |
| "rewards/margins": 8.120338439941406, |
| "rewards/rejected": -2.535876989364624, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.11279272134470072, |
| "grad_norm": 3.892019725921358, |
| "learning_rate": 4.9611332861323875e-06, |
| "logits/chosen": -1.815410852432251, |
| "logits/rejected": -1.345049262046814, |
| "logps/chosen": -158.49656677246094, |
| "logps/rejected": -296.3229675292969, |
| "loss": 1.1334, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.662566661834717, |
| "rewards/margins": 8.261143684387207, |
| "rewards/rejected": -2.5985770225524902, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.11984226642874451, |
| "grad_norm": 4.074050442543863, |
| "learning_rate": 4.956121692599119e-06, |
| "logits/chosen": -1.8308916091918945, |
| "logits/rejected": -1.3203500509262085, |
| "logps/chosen": -156.52281188964844, |
| "logps/rejected": -301.0868225097656, |
| "loss": 1.1271, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.592663288116455, |
| "rewards/margins": 8.23884105682373, |
| "rewards/rejected": -2.6461777687072754, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1268918115127883, |
| "grad_norm": 3.8941695396480807, |
| "learning_rate": 4.95080913100567e-06, |
| "logits/chosen": -1.7772724628448486, |
| "logits/rejected": -1.2583340406417847, |
| "logps/chosen": -152.46060180664062, |
| "logps/rejected": -305.64990234375, |
| "loss": 1.1067, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.578180313110352, |
| "rewards/margins": 8.269899368286133, |
| "rewards/rejected": -2.6917190551757812, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.13394135659683212, |
| "grad_norm": 3.986642176301124, |
| "learning_rate": 4.945196252342323e-06, |
| "logits/chosen": -1.746899962425232, |
| "logits/rejected": -1.252589464187622, |
| "logps/chosen": -156.21347045898438, |
| "logps/rejected": -308.6796569824219, |
| "loss": 1.1189, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.675221920013428, |
| "rewards/margins": 8.397418975830078, |
| "rewards/rejected": -2.7221975326538086, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1409909016808759, |
| "grad_norm": 3.835238186793769, |
| "learning_rate": 4.9392837443995935e-06, |
| "logits/chosen": -1.7575358152389526, |
| "logits/rejected": -1.2477468252182007, |
| "logps/chosen": -152.92015075683594, |
| "logps/rejected": -313.30816650390625, |
| "loss": 1.0949, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.6858229637146, |
| "rewards/margins": 8.454258918762207, |
| "rewards/rejected": -2.76843523979187, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.1480404467649197, |
| "grad_norm": 3.7735268672320554, |
| "learning_rate": 4.933072331683947e-06, |
| "logits/chosen": -1.7370452880859375, |
| "logits/rejected": -1.234609603881836, |
| "logps/chosen": -157.04910278320312, |
| "logps/rejected": -316.3162536621094, |
| "loss": 1.1293, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.680102825164795, |
| "rewards/margins": 8.478598594665527, |
| "rewards/rejected": -2.7984960079193115, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1550899918489635, |
| "grad_norm": 3.5998804416212145, |
| "learning_rate": 4.9265627753290195e-06, |
| "logits/chosen": -1.7116130590438843, |
| "logits/rejected": -1.1605967283248901, |
| "logps/chosen": -153.48583984375, |
| "logps/rejected": -319.1722412109375, |
| "loss": 1.1021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.628140926361084, |
| "rewards/margins": 8.455215454101562, |
| "rewards/rejected": -2.827075481414795, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.16213953693300728, |
| "grad_norm": 3.619213515730465, |
| "learning_rate": 4.9197558730023524e-06, |
| "logits/chosen": -1.725412368774414, |
| "logits/rejected": -1.16403067111969, |
| "logps/chosen": -149.7213897705078, |
| "logps/rejected": -323.0830383300781, |
| "loss": 1.0849, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.608808994293213, |
| "rewards/margins": 8.474872589111328, |
| "rewards/rejected": -2.8660638332366943, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.16918908201705107, |
| "grad_norm": 3.8639522615901845, |
| "learning_rate": 4.912652458807642e-06, |
| "logits/chosen": -1.6482775211334229, |
| "logits/rejected": -1.121797800064087, |
| "logps/chosen": -151.90098571777344, |
| "logps/rejected": -325.0082092285156, |
| "loss": 1.0933, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.6815924644470215, |
| "rewards/margins": 8.567051887512207, |
| "rewards/rejected": -2.8854596614837646, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.1762386271010949, |
| "grad_norm": 3.6715917089410746, |
| "learning_rate": 4.905253403182541e-06, |
| "logits/chosen": -1.6202948093414307, |
| "logits/rejected": -1.0874967575073242, |
| "logps/chosen": -152.82394409179688, |
| "logps/rejected": -328.7615051269531, |
| "loss": 1.0932, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.710689067840576, |
| "rewards/margins": 8.633631706237793, |
| "rewards/rejected": -2.922943353652954, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.18328817218513868, |
| "grad_norm": 3.6185403170755275, |
| "learning_rate": 4.897559612791982e-06, |
| "logits/chosen": -1.6128886938095093, |
| "logits/rejected": -1.08529531955719, |
| "logps/chosen": -150.95596313476562, |
| "logps/rejected": -332.5636901855469, |
| "loss": 1.0892, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.6448516845703125, |
| "rewards/margins": 8.605791091918945, |
| "rewards/rejected": -2.9609391689300537, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.19033771726918247, |
| "grad_norm": 4.961359246548627, |
| "learning_rate": 4.889572030417091e-06, |
| "logits/chosen": -1.5982520580291748, |
| "logits/rejected": -1.0809301137924194, |
| "logps/chosen": -145.88729858398438, |
| "logps/rejected": -337.07427978515625, |
| "loss": 1.0691, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.617259979248047, |
| "rewards/margins": 8.623309135437012, |
| "rewards/rejected": -3.0060489177703857, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.19738726235322626, |
| "grad_norm": 3.828455969236192, |
| "learning_rate": 4.881291634839652e-06, |
| "logits/chosen": -1.6171013116836548, |
| "logits/rejected": -1.136992335319519, |
| "logps/chosen": -148.6068572998047, |
| "logps/rejected": -330.5426025390625, |
| "loss": 1.0661, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.703982830047607, |
| "rewards/margins": 8.644716262817383, |
| "rewards/rejected": -2.940732479095459, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.20443680743727005, |
| "grad_norm": 3.9472389539377692, |
| "learning_rate": 4.872719440722171e-06, |
| "logits/chosen": -1.626006007194519, |
| "logits/rejected": -1.1716492176055908, |
| "logps/chosen": -145.8668670654297, |
| "logps/rejected": -331.05413818359375, |
| "loss": 1.0442, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.7226738929748535, |
| "rewards/margins": 8.668530464172363, |
| "rewards/rejected": -2.945856809616089, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.21148635252131387, |
| "grad_norm": 3.581652653947185, |
| "learning_rate": 4.863856498483545e-06, |
| "logits/chosen": -1.5996553897857666, |
| "logits/rejected": -1.1265536546707153, |
| "logps/chosen": -147.13487243652344, |
| "logps/rejected": -336.3114929199219, |
| "loss": 1.0497, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.683503150939941, |
| "rewards/margins": 8.681916236877441, |
| "rewards/rejected": -2.9984130859375, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.21853589760535766, |
| "grad_norm": 3.6006765539179186, |
| "learning_rate": 4.854703894170342e-06, |
| "logits/chosen": -1.5823760032653809, |
| "logits/rejected": -1.0852136611938477, |
| "logps/chosen": -143.90567016601562, |
| "logps/rejected": -339.850830078125, |
| "loss": 1.0334, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.708090782165527, |
| "rewards/margins": 8.741941452026367, |
| "rewards/rejected": -3.0338504314422607, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.22558544268940145, |
| "grad_norm": 3.673035488034299, |
| "learning_rate": 4.845262749323716e-06, |
| "logits/chosen": -1.5890778303146362, |
| "logits/rejected": -1.125832200050354, |
| "logps/chosen": -144.8064422607422, |
| "logps/rejected": -342.69403076171875, |
| "loss": 1.0435, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.719069957733154, |
| "rewards/margins": 8.781278610229492, |
| "rewards/rejected": -3.0622079372406006, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.23263498777344524, |
| "grad_norm": 3.7341178394643464, |
| "learning_rate": 4.835534220841986e-06, |
| "logits/chosen": -1.5730316638946533, |
| "logits/rejected": -1.0738062858581543, |
| "logps/chosen": -147.9169158935547, |
| "logps/rejected": -345.677978515625, |
| "loss": 1.0506, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.784816741943359, |
| "rewards/margins": 8.876952171325684, |
| "rewards/rejected": -3.092135190963745, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.23968453285748903, |
| "grad_norm": 3.5484016113310126, |
| "learning_rate": 4.825519500838861e-06, |
| "logits/chosen": -1.5520037412643433, |
| "logits/rejected": -1.0344088077545166, |
| "logps/chosen": -143.9144287109375, |
| "logps/rejected": -348.7065124511719, |
| "loss": 1.0334, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.744712829589844, |
| "rewards/margins": 8.867079734802246, |
| "rewards/rejected": -3.122366189956665, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.24673407794153285, |
| "grad_norm": 3.9857566294011275, |
| "learning_rate": 4.815219816497369e-06, |
| "logits/chosen": -1.5165714025497437, |
| "logits/rejected": -0.994964599609375, |
| "logps/chosen": -144.06155395507812, |
| "logps/rejected": -351.51776123046875, |
| "loss": 1.0338, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.70768928527832, |
| "rewards/margins": 8.858179092407227, |
| "rewards/rejected": -3.150489091873169, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2537836230255766, |
| "grad_norm": 3.6922569534263223, |
| "learning_rate": 4.804636429919477e-06, |
| "logits/chosen": -1.5148602724075317, |
| "logits/rejected": -0.9885136485099792, |
| "logps/chosen": -147.0210418701172, |
| "logps/rejected": -354.9969177246094, |
| "loss": 1.0549, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.779511451721191, |
| "rewards/margins": 8.96484375, |
| "rewards/rejected": -3.1853320598602295, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.26083316810962043, |
| "grad_norm": 3.7203570683891285, |
| "learning_rate": 4.793770637971432e-06, |
| "logits/chosen": -1.4832053184509277, |
| "logits/rejected": -0.9658412337303162, |
| "logps/chosen": -142.5421600341797, |
| "logps/rejected": -355.7452697753906, |
| "loss": 1.0267, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.72859525680542, |
| "rewards/margins": 8.921265602111816, |
| "rewards/rejected": -3.19266939163208, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.26788271319366425, |
| "grad_norm": 3.523981090385713, |
| "learning_rate": 4.782623772124854e-06, |
| "logits/chosen": -1.4618552923202515, |
| "logits/rejected": -1.0074139833450317, |
| "logps/chosen": -143.67218017578125, |
| "logps/rejected": -357.75860595703125, |
| "loss": 1.0341, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.69673490524292, |
| "rewards/margins": 8.909579277038574, |
| "rewards/rejected": -3.212844133377075, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.274932258277708, |
| "grad_norm": 3.543830337375067, |
| "learning_rate": 4.771197198293574e-06, |
| "logits/chosen": -1.439631700515747, |
| "logits/rejected": -0.9814452528953552, |
| "logps/chosen": -143.27450561523438, |
| "logps/rejected": -361.0159606933594, |
| "loss": 1.0287, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.738966464996338, |
| "rewards/margins": 8.98444938659668, |
| "rewards/rejected": -3.245483160018921, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.2819818033617518, |
| "grad_norm": 3.7840061115601817, |
| "learning_rate": 4.7594923166662604e-06, |
| "logits/chosen": -1.4854661226272583, |
| "logits/rejected": -0.973606288433075, |
| "logps/chosen": -142.18592834472656, |
| "logps/rejected": -361.9082946777344, |
| "loss": 1.0202, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.734087944030762, |
| "rewards/margins": 8.988512992858887, |
| "rewards/rejected": -3.2544243335723877, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2890313484457956, |
| "grad_norm": 3.444393315103109, |
| "learning_rate": 4.74751056153484e-06, |
| "logits/chosen": -1.4658998250961304, |
| "logits/rejected": -0.9327756762504578, |
| "logps/chosen": -138.7262420654297, |
| "logps/rejected": -363.95166015625, |
| "loss": 1.0027, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.733180522918701, |
| "rewards/margins": 9.007993698120117, |
| "rewards/rejected": -3.274812698364258, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.2960808935298394, |
| "grad_norm": 3.565026142089265, |
| "learning_rate": 4.7352534011187465e-06, |
| "logits/chosen": -1.468241572380066, |
| "logits/rejected": -0.9021957516670227, |
| "logps/chosen": -140.53086853027344, |
| "logps/rejected": -365.4902038574219, |
| "loss": 1.0082, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.738412380218506, |
| "rewards/margins": 9.028605461120605, |
| "rewards/rejected": -3.2901928424835205, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.3031304386138832, |
| "grad_norm": 3.896801938758768, |
| "learning_rate": 4.722722337385005e-06, |
| "logits/chosen": -1.4516186714172363, |
| "logits/rejected": -0.9541074633598328, |
| "logps/chosen": -139.03001403808594, |
| "logps/rejected": -366.9988098144531, |
| "loss": 1.0077, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.699275016784668, |
| "rewards/margins": 9.004546165466309, |
| "rewards/rejected": -3.3052709102630615, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.310179983697927, |
| "grad_norm": 3.502095791512193, |
| "learning_rate": 4.709918905864188e-06, |
| "logits/chosen": -1.3555845022201538, |
| "logits/rejected": -0.9211882948875427, |
| "logps/chosen": -144.34078979492188, |
| "logps/rejected": -366.9481506347656, |
| "loss": 1.038, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.776908874511719, |
| "rewards/margins": 9.081772804260254, |
| "rewards/rejected": -3.3048646450042725, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3172295287819708, |
| "grad_norm": 3.6471771757282254, |
| "learning_rate": 4.696844675462248e-06, |
| "logits/chosen": -1.377302646636963, |
| "logits/rejected": -0.9443565607070923, |
| "logps/chosen": -140.3704376220703, |
| "logps/rejected": -363.6595153808594, |
| "loss": 1.0094, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.706255912780762, |
| "rewards/margins": 8.9780912399292, |
| "rewards/rejected": -3.2718353271484375, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.32427907386601457, |
| "grad_norm": 3.6615699755313194, |
| "learning_rate": 4.683501248268274e-06, |
| "logits/chosen": -1.3805917501449585, |
| "logits/rejected": -0.9572538733482361, |
| "logps/chosen": -138.28404235839844, |
| "logps/rejected": -365.3840026855469, |
| "loss": 0.999, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.751240253448486, |
| "rewards/margins": 9.04038143157959, |
| "rewards/rejected": -3.2891411781311035, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3313286189500584, |
| "grad_norm": 3.318362750466131, |
| "learning_rate": 4.66989025935817e-06, |
| "logits/chosen": -1.3741607666015625, |
| "logits/rejected": -0.9632787108421326, |
| "logps/chosen": -140.46435546875, |
| "logps/rejected": -369.9010314941406, |
| "loss": 1.0078, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.778801918029785, |
| "rewards/margins": 9.11315631866455, |
| "rewards/rejected": -3.334355115890503, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.33837816403410215, |
| "grad_norm": 3.582839489017987, |
| "learning_rate": 4.6560133765943006e-06, |
| "logits/chosen": -1.3752954006195068, |
| "logits/rejected": -0.9404975771903992, |
| "logps/chosen": -135.32681274414062, |
| "logps/rejected": -372.66534423828125, |
| "loss": 0.9837, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.729502201080322, |
| "rewards/margins": 9.091387748718262, |
| "rewards/rejected": -3.3618857860565186, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.34542770911814596, |
| "grad_norm": 3.5417976857081284, |
| "learning_rate": 4.641872300421108e-06, |
| "logits/chosen": -1.3784650564193726, |
| "logits/rejected": -0.8944051861763, |
| "logps/chosen": -143.4613494873047, |
| "logps/rejected": -375.3058776855469, |
| "loss": 1.025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.82838773727417, |
| "rewards/margins": 9.216733932495117, |
| "rewards/rejected": -3.388345718383789, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.3524772542021898, |
| "grad_norm": 3.4196142612670313, |
| "learning_rate": 4.62746876365675e-06, |
| "logits/chosen": -1.3793405294418335, |
| "logits/rejected": -0.8886861801147461, |
| "logps/chosen": -137.5000762939453, |
| "logps/rejected": -377.03350830078125, |
| "loss": 0.9952, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.801843166351318, |
| "rewards/margins": 9.207419395446777, |
| "rewards/rejected": -3.4055771827697754, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.35952679928623354, |
| "grad_norm": 3.7855239785429533, |
| "learning_rate": 4.61280453128076e-06, |
| "logits/chosen": -1.372959852218628, |
| "logits/rejected": -0.9385848045349121, |
| "logps/chosen": -138.4940948486328, |
| "logps/rejected": -368.65045166015625, |
| "loss": 1.0047, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.794959545135498, |
| "rewards/margins": 9.116719245910645, |
| "rewards/rejected": -3.3217601776123047, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.36657634437027736, |
| "grad_norm": 3.540584082558615, |
| "learning_rate": 4.597881400217773e-06, |
| "logits/chosen": -1.3944286108016968, |
| "logits/rejected": -1.0817062854766846, |
| "logps/chosen": -135.97718811035156, |
| "logps/rejected": -361.6622009277344, |
| "loss": 0.9877, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.732675075531006, |
| "rewards/margins": 8.984468460083008, |
| "rewards/rejected": -3.2517929077148438, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.3736258894543211, |
| "grad_norm": 3.587308477754233, |
| "learning_rate": 4.5827011991173284e-06, |
| "logits/chosen": -1.3007700443267822, |
| "logits/rejected": -1.0200260877609253, |
| "logps/chosen": -137.0142364501953, |
| "logps/rejected": -360.9063415527344, |
| "loss": 0.9879, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.7438249588012695, |
| "rewards/margins": 8.988321304321289, |
| "rewards/rejected": -3.2444958686828613, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.38067543453836494, |
| "grad_norm": 3.4394832652194562, |
| "learning_rate": 4.5672657881298e-06, |
| "logits/chosen": -1.3347376585006714, |
| "logits/rejected": -1.0313987731933594, |
| "logps/chosen": -137.21844482421875, |
| "logps/rejected": -358.77288818359375, |
| "loss": 0.9891, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.776368141174316, |
| "rewards/margins": 8.999374389648438, |
| "rewards/rejected": -3.2230064868927, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.38772497962240876, |
| "grad_norm": 3.7326559968101463, |
| "learning_rate": 4.551577058678447e-06, |
| "logits/chosen": -1.3314663171768188, |
| "logits/rejected": -0.9995508193969727, |
| "logps/chosen": -133.8922576904297, |
| "logps/rejected": -362.88214111328125, |
| "loss": 0.9669, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.811877250671387, |
| "rewards/margins": 9.075952529907227, |
| "rewards/rejected": -3.2640750408172607, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3947745247064525, |
| "grad_norm": 3.4767646455734447, |
| "learning_rate": 4.535636933227651e-06, |
| "logits/chosen": -1.2618310451507568, |
| "logits/rejected": -0.8865704536437988, |
| "logps/chosen": -136.52227783203125, |
| "logps/rejected": -366.13525390625, |
| "loss": 0.9855, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.7819976806640625, |
| "rewards/margins": 9.078688621520996, |
| "rewards/rejected": -3.2966911792755127, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.40182406979049634, |
| "grad_norm": 3.660942268417341, |
| "learning_rate": 4.519447365047341e-06, |
| "logits/chosen": -1.2751779556274414, |
| "logits/rejected": -0.9009385108947754, |
| "logps/chosen": -135.4525909423828, |
| "logps/rejected": -369.0229187011719, |
| "loss": 0.9765, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.787187099456787, |
| "rewards/margins": 9.112687110900879, |
| "rewards/rejected": -3.3254997730255127, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4088736148745401, |
| "grad_norm": 3.6419994005758127, |
| "learning_rate": 4.5030103379736335e-06, |
| "logits/chosen": -1.2887815237045288, |
| "logits/rejected": -0.9329848289489746, |
| "logps/chosen": -135.89544677734375, |
| "logps/rejected": -370.81414794921875, |
| "loss": 0.9764, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.7939133644104, |
| "rewards/margins": 9.13733959197998, |
| "rewards/rejected": -3.3434274196624756, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.4159231599585839, |
| "grad_norm": 3.4187952865950453, |
| "learning_rate": 4.486327866165751e-06, |
| "logits/chosen": -1.2444560527801514, |
| "logits/rejected": -0.9014924168586731, |
| "logps/chosen": -136.8031768798828, |
| "logps/rejected": -373.68890380859375, |
| "loss": 0.983, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.815255641937256, |
| "rewards/margins": 9.18748664855957, |
| "rewards/rejected": -3.3722312450408936, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.42297270504262774, |
| "grad_norm": 3.5442759882748103, |
| "learning_rate": 4.469401993859201e-06, |
| "logits/chosen": -1.262442946434021, |
| "logits/rejected": -0.8692361116409302, |
| "logps/chosen": -132.19586181640625, |
| "logps/rejected": -375.8874206542969, |
| "loss": 0.9612, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.739358425140381, |
| "rewards/margins": 9.133481979370117, |
| "rewards/rejected": -3.394123077392578, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4300222501266715, |
| "grad_norm": 3.917612027189787, |
| "learning_rate": 4.4522347951152876e-06, |
| "logits/chosen": -1.2577459812164307, |
| "logits/rejected": -0.8758344650268555, |
| "logps/chosen": -132.18719482421875, |
| "logps/rejected": -378.2060241699219, |
| "loss": 0.9635, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.746466636657715, |
| "rewards/margins": 9.163701057434082, |
| "rewards/rejected": -3.417234420776367, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.4370717952107153, |
| "grad_norm": 3.3976029680607365, |
| "learning_rate": 4.434828373566952e-06, |
| "logits/chosen": -1.2668578624725342, |
| "logits/rejected": -0.8984266519546509, |
| "logps/chosen": -134.82275390625, |
| "logps/rejected": -380.01171875, |
| "loss": 0.9756, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.738933086395264, |
| "rewards/margins": 9.174293518066406, |
| "rewards/rejected": -3.4353606700897217, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.4441213402947591, |
| "grad_norm": 3.4516443155163783, |
| "learning_rate": 4.417184862161005e-06, |
| "logits/chosen": -1.2303470373153687, |
| "logits/rejected": -0.872378945350647, |
| "logps/chosen": -141.74575805664062, |
| "logps/rejected": -381.9054870605469, |
| "loss": 1.007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.898972988128662, |
| "rewards/margins": 9.353425979614258, |
| "rewards/rejected": -3.4544525146484375, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.4511708853788029, |
| "grad_norm": 3.4252391290463713, |
| "learning_rate": 4.399306422896755e-06, |
| "logits/chosen": -1.1987241506576538, |
| "logits/rejected": -0.8535988926887512, |
| "logps/chosen": -137.49851989746094, |
| "logps/rejected": -383.2096252441406, |
| "loss": 0.9777, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.86844539642334, |
| "rewards/margins": 9.335935592651367, |
| "rewards/rejected": -3.4674899578094482, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4582204304628467, |
| "grad_norm": 3.545736837903789, |
| "learning_rate": 4.3811952465610835e-06, |
| "logits/chosen": -1.2155053615570068, |
| "logits/rejected": -0.899692714214325, |
| "logps/chosen": -129.8439178466797, |
| "logps/rejected": -383.0055847167969, |
| "loss": 0.9496, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.7504801750183105, |
| "rewards/margins": 9.215784072875977, |
| "rewards/rejected": -3.465303421020508, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4652699755468905, |
| "grad_norm": 3.447372993429395, |
| "learning_rate": 4.362853552459992e-06, |
| "logits/chosen": -1.2124435901641846, |
| "logits/rejected": -0.9287330508232117, |
| "logps/chosen": -130.0380096435547, |
| "logps/rejected": -381.68341064453125, |
| "loss": 0.9461, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.767488479614258, |
| "rewards/margins": 9.219490051269531, |
| "rewards/rejected": -3.4520013332366943, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.4723195206309343, |
| "grad_norm": 3.623096464653975, |
| "learning_rate": 4.344283588146648e-06, |
| "logits/chosen": -1.190288782119751, |
| "logits/rejected": -0.8485528826713562, |
| "logps/chosen": -130.83909606933594, |
| "logps/rejected": -384.2164001464844, |
| "loss": 0.9506, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.791656017303467, |
| "rewards/margins": 9.269119262695312, |
| "rewards/rejected": -3.477463483810425, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.47936906571497806, |
| "grad_norm": 3.560577638508316, |
| "learning_rate": 4.3254876291459805e-06, |
| "logits/chosen": -1.1886543035507202, |
| "logits/rejected": -0.8671566247940063, |
| "logps/chosen": -136.43968200683594, |
| "logps/rejected": -386.629638671875, |
| "loss": 0.9836, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.842299461364746, |
| "rewards/margins": 9.343871116638184, |
| "rewards/rejected": -3.5015721321105957, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.4864186107990219, |
| "grad_norm": 3.1473460328784997, |
| "learning_rate": 4.3064679786758364e-06, |
| "logits/chosen": -1.2196760177612305, |
| "logits/rejected": -0.8831031918525696, |
| "logps/chosen": -136.54266357421875, |
| "logps/rejected": -388.2689208984375, |
| "loss": 0.9732, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.864408016204834, |
| "rewards/margins": 9.382452964782715, |
| "rewards/rejected": -3.51804518699646, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.4934681558830657, |
| "grad_norm": 3.4991605727429156, |
| "learning_rate": 4.287226967364755e-06, |
| "logits/chosen": -1.1478255987167358, |
| "logits/rejected": -0.8476384282112122, |
| "logps/chosen": -135.9375457763672, |
| "logps/rejected": -390.1582946777344, |
| "loss": 0.9744, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.896569728851318, |
| "rewards/margins": 9.433442115783691, |
| "rewards/rejected": -3.536872625350952, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5005177009671095, |
| "grad_norm": 3.307262546072772, |
| "learning_rate": 4.267766952966369e-06, |
| "logits/chosen": -1.1719753742218018, |
| "logits/rejected": -0.8473686575889587, |
| "logps/chosen": -132.99285888671875, |
| "logps/rejected": -390.5616149902344, |
| "loss": 0.9544, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.861688613891602, |
| "rewards/margins": 9.402670860290527, |
| "rewards/rejected": -3.5409820079803467, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.5075672460511532, |
| "grad_norm": 3.418974654141689, |
| "learning_rate": 4.248090320070501e-06, |
| "logits/chosen": -1.1494816541671753, |
| "logits/rejected": -0.8639947772026062, |
| "logps/chosen": -130.6112060546875, |
| "logps/rejected": -389.1405334472656, |
| "loss": 0.9413, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.852599620819092, |
| "rewards/margins": 9.379307746887207, |
| "rewards/rejected": -3.5267083644866943, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5146167911351971, |
| "grad_norm": 3.641099772646816, |
| "learning_rate": 4.22819947981095e-06, |
| "logits/chosen": -1.2039188146591187, |
| "logits/rejected": -0.8875476717948914, |
| "logps/chosen": -133.9625701904297, |
| "logps/rejected": -394.1423034667969, |
| "loss": 0.9624, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.86251974105835, |
| "rewards/margins": 9.439249038696289, |
| "rewards/rejected": -3.5767295360565186, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5216663362192409, |
| "grad_norm": 3.5377427783220856, |
| "learning_rate": 4.208096869570046e-06, |
| "logits/chosen": -1.1980522871017456, |
| "logits/rejected": -0.8648649454116821, |
| "logps/chosen": -133.25439453125, |
| "logps/rejected": -394.1308288574219, |
| "loss": 0.9571, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.7785868644714355, |
| "rewards/margins": 9.355122566223145, |
| "rewards/rejected": -3.576535940170288, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5287158813032846, |
| "grad_norm": 3.572038643146727, |
| "learning_rate": 4.1877849526799705e-06, |
| "logits/chosen": -1.1758784055709839, |
| "logits/rejected": -0.8390051126480103, |
| "logps/chosen": -130.81890869140625, |
| "logps/rejected": -396.5048522949219, |
| "loss": 0.9403, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.8407206535339355, |
| "rewards/margins": 9.441041946411133, |
| "rewards/rejected": -3.60032057762146, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5357654263873285, |
| "grad_norm": 3.4947337389011577, |
| "learning_rate": 4.167266218120907e-06, |
| "logits/chosen": -1.1654809713363647, |
| "logits/rejected": -0.8482118844985962, |
| "logps/chosen": -129.94468688964844, |
| "logps/rejected": -397.8246765136719, |
| "loss": 0.93, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.867563724517822, |
| "rewards/margins": 9.481162071228027, |
| "rewards/rejected": -3.613598585128784, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5428149714713723, |
| "grad_norm": 3.42502470705207, |
| "learning_rate": 4.146543180216051e-06, |
| "logits/chosen": -1.1724631786346436, |
| "logits/rejected": -0.8869396448135376, |
| "logps/chosen": -132.4146270751953, |
| "logps/rejected": -398.17681884765625, |
| "loss": 0.954, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.82112979888916, |
| "rewards/margins": 9.438194274902344, |
| "rewards/rejected": -3.6170647144317627, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.549864516555416, |
| "grad_norm": 3.4338454588217493, |
| "learning_rate": 4.125618378323503e-06, |
| "logits/chosen": -1.2047984600067139, |
| "logits/rejected": -0.8852383494377136, |
| "logps/chosen": -125.2190933227539, |
| "logps/rejected": -398.3727722167969, |
| "loss": 0.9137, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.7483320236206055, |
| "rewards/margins": 9.367304801940918, |
| "rewards/rejected": -3.6189732551574707, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5569140616394598, |
| "grad_norm": 3.476811547379371, |
| "learning_rate": 4.104494376525106e-06, |
| "logits/chosen": -1.2003843784332275, |
| "logits/rejected": -0.8351160883903503, |
| "logps/chosen": -127.03767395019531, |
| "logps/rejected": -399.4728088378906, |
| "loss": 0.9273, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.7366766929626465, |
| "rewards/margins": 9.36662769317627, |
| "rewards/rejected": -3.6299514770507812, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.5639636067235037, |
| "grad_norm": 3.383144084845009, |
| "learning_rate": 4.083173763312248e-06, |
| "logits/chosen": -1.142123818397522, |
| "logits/rejected": -0.8299978375434875, |
| "logps/chosen": -132.69578552246094, |
| "logps/rejected": -402.67486572265625, |
| "loss": 0.9548, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.840543746948242, |
| "rewards/margins": 9.502639770507812, |
| "rewards/rejected": -3.662095785140991, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5710131518075474, |
| "grad_norm": 3.4145198061570103, |
| "learning_rate": 4.061659151268668e-06, |
| "logits/chosen": -1.1268980503082275, |
| "logits/rejected": -0.8165463805198669, |
| "logps/chosen": -130.29315185546875, |
| "logps/rejected": -404.2532043457031, |
| "loss": 0.9449, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.816205024719238, |
| "rewards/margins": 9.493982315063477, |
| "rewards/rejected": -3.677777051925659, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.5780626968915912, |
| "grad_norm": 3.2632269319301845, |
| "learning_rate": 4.039953176750321e-06, |
| "logits/chosen": -1.123947024345398, |
| "logits/rejected": -0.7603979706764221, |
| "logps/chosen": -129.68569946289062, |
| "logps/rejected": -404.5145568847656, |
| "loss": 0.9394, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.798472881317139, |
| "rewards/margins": 9.478851318359375, |
| "rewards/rejected": -3.6803791522979736, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.585112241975635, |
| "grad_norm": 3.413795992881312, |
| "learning_rate": 4.018058499562326e-06, |
| "logits/chosen": -1.1000627279281616, |
| "logits/rejected": -0.7446752786636353, |
| "logps/chosen": -134.6430206298828, |
| "logps/rejected": -405.8460998535156, |
| "loss": 0.9556, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.897136211395264, |
| "rewards/margins": 9.590985298156738, |
| "rewards/rejected": -3.6938483715057373, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.5921617870596788, |
| "grad_norm": 3.362057771016843, |
| "learning_rate": 3.995977802633032e-06, |
| "logits/chosen": -1.0729659795761108, |
| "logits/rejected": -0.7292208075523376, |
| "logps/chosen": -131.8883056640625, |
| "logps/rejected": -407.1473083496094, |
| "loss": 0.9444, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.883275985717773, |
| "rewards/margins": 9.590205192565918, |
| "rewards/rejected": -3.706928253173828, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.5992113321437226, |
| "grad_norm": 3.428303427953224, |
| "learning_rate": 3.973713791685263e-06, |
| "logits/chosen": -1.0722216367721558, |
| "logits/rejected": -0.7673947215080261, |
| "logps/chosen": -132.4364471435547, |
| "logps/rejected": -407.9749755859375, |
| "loss": 0.9449, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.872178554534912, |
| "rewards/margins": 9.58730411529541, |
| "rewards/rejected": -3.7151248455047607, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6062608772277664, |
| "grad_norm": 4.027806823030179, |
| "learning_rate": 3.951269194904765e-06, |
| "logits/chosen": -1.0447555780410767, |
| "logits/rejected": -0.7338883280754089, |
| "logps/chosen": -129.0581817626953, |
| "logps/rejected": -409.84765625, |
| "loss": 0.9372, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.825490951538086, |
| "rewards/margins": 9.559223175048828, |
| "rewards/rejected": -3.7337327003479004, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6133104223118102, |
| "grad_norm": 3.5067474676414108, |
| "learning_rate": 3.928646762605892e-06, |
| "logits/chosen": -1.0310039520263672, |
| "logits/rejected": -0.7086827158927917, |
| "logps/chosen": -130.6362762451172, |
| "logps/rejected": -411.41943359375, |
| "loss": 0.9471, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.827083587646484, |
| "rewards/margins": 9.576539993286133, |
| "rewards/rejected": -3.7494568824768066, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.620359967395854, |
| "grad_norm": 3.4568451567973306, |
| "learning_rate": 3.9058492668945995e-06, |
| "logits/chosen": -1.0341156721115112, |
| "logits/rejected": -0.6810585856437683, |
| "logps/chosen": -132.6650848388672, |
| "logps/rejected": -412.41943359375, |
| "loss": 0.9477, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.893308162689209, |
| "rewards/margins": 9.652871131896973, |
| "rewards/rejected": -3.759563446044922, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6274095124798977, |
| "grad_norm": 3.6505774836083535, |
| "learning_rate": 3.882879501328747e-06, |
| "logits/chosen": -1.0008310079574585, |
| "logits/rejected": -0.6594039797782898, |
| "logps/chosen": -129.55776977539062, |
| "logps/rejected": -414.1068420410156, |
| "loss": 0.9315, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.86118221282959, |
| "rewards/margins": 9.637504577636719, |
| "rewards/rejected": -3.776322603225708, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.6344590575639416, |
| "grad_norm": 3.5805353661928248, |
| "learning_rate": 3.859740280575787e-06, |
| "logits/chosen": -0.9931684732437134, |
| "logits/rejected": -0.6309129595756531, |
| "logps/chosen": -130.759033203125, |
| "logps/rejected": -415.9930725097656, |
| "loss": 0.9437, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.825643539428711, |
| "rewards/margins": 9.62082576751709, |
| "rewards/rejected": -3.7951819896698, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6415086026479854, |
| "grad_norm": 3.4415482352225326, |
| "learning_rate": 3.836434440067858e-06, |
| "logits/chosen": -0.9874204993247986, |
| "logits/rejected": -0.6279218792915344, |
| "logps/chosen": -129.0675811767578, |
| "logps/rejected": -417.6011657714844, |
| "loss": 0.9333, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.867391109466553, |
| "rewards/margins": 9.678707122802734, |
| "rewards/rejected": -3.8113160133361816, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.6485581477320291, |
| "grad_norm": 3.4807858238324023, |
| "learning_rate": 3.8129648356543382e-06, |
| "logits/chosen": -1.0203678607940674, |
| "logits/rejected": -0.6446800827980042, |
| "logps/chosen": -127.60528564453125, |
| "logps/rejected": -418.719482421875, |
| "loss": 0.9229, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.817119121551514, |
| "rewards/margins": 9.639582633972168, |
| "rewards/rejected": -3.8224639892578125, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.655607692816073, |
| "grad_norm": 3.3116942159563876, |
| "learning_rate": 3.789334343251895e-06, |
| "logits/chosen": -1.0190130472183228, |
| "logits/rejected": -0.6429179906845093, |
| "logps/chosen": -128.70944213867188, |
| "logps/rejected": -420.0308532714844, |
| "loss": 0.9263, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.867619037628174, |
| "rewards/margins": 9.703242301940918, |
| "rewards/rejected": -3.8356235027313232, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6626572379001168, |
| "grad_norm": 3.4172689324940664, |
| "learning_rate": 3.765545858492077e-06, |
| "logits/chosen": -0.9674955606460571, |
| "logits/rejected": -0.6231255531311035, |
| "logps/chosen": -128.2279052734375, |
| "logps/rejected": -420.6991271972656, |
| "loss": 0.9211, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.826233863830566, |
| "rewards/margins": 9.668456077575684, |
| "rewards/rejected": -3.8422226905822754, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.6697067829841605, |
| "grad_norm": 3.62022832891231, |
| "learning_rate": 3.741602296366487e-06, |
| "logits/chosen": -0.9731711745262146, |
| "logits/rejected": -0.622948169708252, |
| "logps/chosen": -127.91731262207031, |
| "logps/rejected": -420.6474609375, |
| "loss": 0.9201, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.898708820343018, |
| "rewards/margins": 9.740521430969238, |
| "rewards/rejected": -3.8418128490448, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6767563280682043, |
| "grad_norm": 3.75244615174725, |
| "learning_rate": 3.7175065908695907e-06, |
| "logits/chosen": -0.9623433351516724, |
| "logits/rejected": -0.611689031124115, |
| "logps/chosen": -127.69451904296875, |
| "logps/rejected": -421.1045837402344, |
| "loss": 0.9245, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.853524684906006, |
| "rewards/margins": 9.699825286865234, |
| "rewards/rejected": -3.846301317214966, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.6838058731522482, |
| "grad_norm": 3.4336011780072306, |
| "learning_rate": 3.6932616946391825e-06, |
| "logits/chosen": -0.9564552307128906, |
| "logits/rejected": -0.5640252232551575, |
| "logps/chosen": -128.86248779296875, |
| "logps/rejected": -422.3670349121094, |
| "loss": 0.9265, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.866178035736084, |
| "rewards/margins": 9.725229263305664, |
| "rewards/rejected": -3.859050750732422, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.6908554182362919, |
| "grad_norm": 3.346062760434833, |
| "learning_rate": 3.6688705785945828e-06, |
| "logits/chosen": -0.9649378657341003, |
| "logits/rejected": -0.5636154413223267, |
| "logps/chosen": -128.705322265625, |
| "logps/rejected": -422.5312194824219, |
| "loss": 0.9322, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.877577304840088, |
| "rewards/margins": 9.738177299499512, |
| "rewards/rejected": -3.8605995178222656, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.6979049633203357, |
| "grad_norm": 3.562698291523997, |
| "learning_rate": 3.644336231572584e-06, |
| "logits/chosen": -1.0158581733703613, |
| "logits/rejected": -0.5938189625740051, |
| "logps/chosen": -127.7231674194336, |
| "logps/rejected": -423.938232421875, |
| "loss": 0.9313, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.831978797912598, |
| "rewards/margins": 9.706572532653809, |
| "rewards/rejected": -3.874593734741211, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7049545084043796, |
| "grad_norm": 3.871831303710938, |
| "learning_rate": 3.6196616599612043e-06, |
| "logits/chosen": -0.9945854544639587, |
| "logits/rejected": -0.5935266613960266, |
| "logps/chosen": -131.4221649169922, |
| "logps/rejected": -425.1352233886719, |
| "loss": 0.9457, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.880837917327881, |
| "rewards/margins": 9.767516136169434, |
| "rewards/rejected": -3.8866775035858154, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7120040534884233, |
| "grad_norm": 3.525675978075724, |
| "learning_rate": 3.5948498873312963e-06, |
| "logits/chosen": -0.9815523028373718, |
| "logits/rejected": -0.6091595888137817, |
| "logps/chosen": -125.77652740478516, |
| "logps/rejected": -425.8006896972656, |
| "loss": 0.914, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.811987400054932, |
| "rewards/margins": 9.705245971679688, |
| "rewards/rejected": -3.893258810043335, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7190535985724671, |
| "grad_norm": 3.44484661738424, |
| "learning_rate": 3.5699039540660364e-06, |
| "logits/chosen": -0.9921701550483704, |
| "logits/rejected": -0.6261746883392334, |
| "logps/chosen": -127.05594635009766, |
| "logps/rejected": -426.45538330078125, |
| "loss": 0.9157, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.895692348480225, |
| "rewards/margins": 9.795554161071777, |
| "rewards/rejected": -3.8998615741729736, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.726103143656511, |
| "grad_norm": 3.4745813514606203, |
| "learning_rate": 3.5448269169883686e-06, |
| "logits/chosen": -0.9629077911376953, |
| "logits/rejected": -0.562868058681488, |
| "logps/chosen": -125.16448211669922, |
| "logps/rejected": -427.8833923339844, |
| "loss": 0.896, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.892127990722656, |
| "rewards/margins": 9.806340217590332, |
| "rewards/rejected": -3.9142112731933594, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.7331526887405547, |
| "grad_norm": 3.424852141033239, |
| "learning_rate": 3.519621848986428e-06, |
| "logits/chosen": -0.9507054686546326, |
| "logits/rejected": -0.5376430749893188, |
| "logps/chosen": -128.2971954345703, |
| "logps/rejected": -428.9288024902344, |
| "loss": 0.9258, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.92517614364624, |
| "rewards/margins": 9.849757194519043, |
| "rewards/rejected": -3.9245803356170654, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.7402022338245985, |
| "grad_norm": 3.5251677265369543, |
| "learning_rate": 3.4942918386369916e-06, |
| "logits/chosen": -0.9696516394615173, |
| "logits/rejected": -0.5560165643692017, |
| "logps/chosen": -128.28604125976562, |
| "logps/rejected": -429.8753967285156, |
| "loss": 0.923, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.882317543029785, |
| "rewards/margins": 9.816309928894043, |
| "rewards/rejected": -3.9339921474456787, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7472517789086423, |
| "grad_norm": 3.4170327558831333, |
| "learning_rate": 3.468839989827014e-06, |
| "logits/chosen": -0.9457041025161743, |
| "logits/rejected": -0.5628274083137512, |
| "logps/chosen": -127.13785552978516, |
| "logps/rejected": -429.71923828125, |
| "loss": 0.9242, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.836861610412598, |
| "rewards/margins": 9.769371032714844, |
| "rewards/rejected": -3.932508945465088, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.7543013239926861, |
| "grad_norm": 3.6140577579219695, |
| "learning_rate": 3.443269421373282e-06, |
| "logits/chosen": -0.9552961587905884, |
| "logits/rejected": -0.5585734844207764, |
| "logps/chosen": -125.71052551269531, |
| "logps/rejected": -431.2530822753906, |
| "loss": 0.9171, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.742475509643555, |
| "rewards/margins": 9.690230369567871, |
| "rewards/rejected": -3.9477555751800537, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.7613508690767299, |
| "grad_norm": 3.3716609559874087, |
| "learning_rate": 3.41758326664024e-06, |
| "logits/chosen": -0.9388333559036255, |
| "logits/rejected": -0.5697802901268005, |
| "logps/chosen": -125.131103515625, |
| "logps/rejected": -432.1859436035156, |
| "loss": 0.9039, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.860114097595215, |
| "rewards/margins": 9.817200660705566, |
| "rewards/rejected": -3.9570865631103516, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.7684004141607736, |
| "grad_norm": 3.647585572092197, |
| "learning_rate": 3.391784673156038e-06, |
| "logits/chosen": -0.9397434592247009, |
| "logits/rejected": -0.5741142630577087, |
| "logps/chosen": -129.66160583496094, |
| "logps/rejected": -433.1015319824219, |
| "loss": 0.924, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.916826248168945, |
| "rewards/margins": 9.883145332336426, |
| "rewards/rejected": -3.966318130493164, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.7754499592448175, |
| "grad_norm": 3.2724520562704438, |
| "learning_rate": 3.365876802226833e-06, |
| "logits/chosen": -0.9321144223213196, |
| "logits/rejected": -0.5627609491348267, |
| "logps/chosen": -124.29692840576172, |
| "logps/rejected": -433.846923828125, |
| "loss": 0.8999, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.887063026428223, |
| "rewards/margins": 9.86075496673584, |
| "rewards/rejected": -3.9736931324005127, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7824995043288613, |
| "grad_norm": 3.458299468379887, |
| "learning_rate": 3.3398628285494193e-06, |
| "logits/chosen": -0.9217039346694946, |
| "logits/rejected": -0.5298233032226562, |
| "logps/chosen": -126.9094009399414, |
| "logps/rejected": -434.9250183105469, |
| "loss": 0.9202, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.8385796546936035, |
| "rewards/margins": 9.823028564453125, |
| "rewards/rejected": -3.9844493865966797, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.789549049412905, |
| "grad_norm": 3.5226619493219804, |
| "learning_rate": 3.3137459398221984e-06, |
| "logits/chosen": -0.8872036337852478, |
| "logits/rejected": -0.498056560754776, |
| "logps/chosen": -127.6511459350586, |
| "logps/rejected": -435.1647033691406, |
| "loss": 0.9222, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.896953105926514, |
| "rewards/margins": 9.883868217468262, |
| "rewards/rejected": -3.9869160652160645, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.7965985944969489, |
| "grad_norm": 3.5412559165567745, |
| "learning_rate": 3.2875293363545692e-06, |
| "logits/chosen": -0.876203715801239, |
| "logits/rejected": -0.49807339906692505, |
| "logps/chosen": -128.12411499023438, |
| "logps/rejected": -435.5065002441406, |
| "loss": 0.9134, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.977075576782227, |
| "rewards/margins": 9.967486381530762, |
| "rewards/rejected": -3.9904098510742188, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.8036481395809927, |
| "grad_norm": 3.620961758496946, |
| "learning_rate": 3.261216230674768e-06, |
| "logits/chosen": -0.8721205592155457, |
| "logits/rejected": -0.46372947096824646, |
| "logps/chosen": -127.4286880493164, |
| "logps/rejected": -436.84674072265625, |
| "loss": 0.9183, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.863312244415283, |
| "rewards/margins": 9.866989135742188, |
| "rewards/rejected": -4.0036773681640625, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.8106976846650364, |
| "grad_norm": 3.4592850588257718, |
| "learning_rate": 3.2348098471362132e-06, |
| "logits/chosen": -0.8742119073867798, |
| "logits/rejected": -0.4837910234928131, |
| "logps/chosen": -128.3339080810547, |
| "logps/rejected": -437.7392578125, |
| "loss": 0.9189, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.9210028648376465, |
| "rewards/margins": 9.933659553527832, |
| "rewards/rejected": -4.0126566886901855, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.8177472297490802, |
| "grad_norm": 3.5572863657837153, |
| "learning_rate": 3.208313421522397e-06, |
| "logits/chosen": -0.8337503671646118, |
| "logits/rejected": -0.47826629877090454, |
| "logps/chosen": -127.3692398071289, |
| "logps/rejected": -437.60162353515625, |
| "loss": 0.9134, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.904942512512207, |
| "rewards/margins": 9.916193962097168, |
| "rewards/rejected": -4.011251449584961, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.8247967748331241, |
| "grad_norm": 3.8162541172864333, |
| "learning_rate": 3.1817302006503835e-06, |
| "logits/chosen": -0.8695448040962219, |
| "logits/rejected": -0.5193209052085876, |
| "logps/chosen": -127.39215087890625, |
| "logps/rejected": -438.3293151855469, |
| "loss": 0.9169, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.902700901031494, |
| "rewards/margins": 9.921298027038574, |
| "rewards/rejected": -4.018597602844238, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.8318463199171678, |
| "grad_norm": 3.339934252923497, |
| "learning_rate": 3.1550634419729443e-06, |
| "logits/chosen": -0.9015306830406189, |
| "logits/rejected": -0.533819854259491, |
| "logps/chosen": -127.2344970703125, |
| "logps/rejected": -438.45684814453125, |
| "loss": 0.9114, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.944534778594971, |
| "rewards/margins": 9.964533805847168, |
| "rewards/rejected": -4.019999980926514, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.8388958650012116, |
| "grad_norm": 3.271467539577929, |
| "learning_rate": 3.128316413179403e-06, |
| "logits/chosen": -0.8996972441673279, |
| "logits/rejected": -0.48372983932495117, |
| "logps/chosen": -132.72434997558594, |
| "logps/rejected": -438.97900390625, |
| "loss": 0.9392, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.998867988586426, |
| "rewards/margins": 10.023943901062012, |
| "rewards/rejected": -4.025076389312744, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.8459454100852555, |
| "grad_norm": 3.347650161642198, |
| "learning_rate": 3.101492391795215e-06, |
| "logits/chosen": -0.923694908618927, |
| "logits/rejected": -0.521001935005188, |
| "logps/chosen": -124.9764633178711, |
| "logps/rejected": -439.68194580078125, |
| "loss": 0.9067, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.902924537658691, |
| "rewards/margins": 9.934977531433105, |
| "rewards/rejected": -4.032052516937256, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.8529949551692992, |
| "grad_norm": 3.5949752114784594, |
| "learning_rate": 3.0745946647803506e-06, |
| "logits/chosen": -0.8853136301040649, |
| "logits/rejected": -0.5156723260879517, |
| "logps/chosen": -127.0760726928711, |
| "logps/rejected": -440.3129577636719, |
| "loss": 0.9012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.988919258117676, |
| "rewards/margins": 10.027365684509277, |
| "rewards/rejected": -4.038445949554443, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.860044500253343, |
| "grad_norm": 3.4036399165146873, |
| "learning_rate": 3.0476265281265136e-06, |
| "logits/chosen": -0.8544861078262329, |
| "logits/rejected": -0.5153040885925293, |
| "logps/chosen": -126.94913482666016, |
| "logps/rejected": -440.95733642578125, |
| "loss": 0.9124, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.942080020904541, |
| "rewards/margins": 9.986968040466309, |
| "rewards/rejected": -4.044888496398926, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.8670940453373869, |
| "grad_norm": 3.441908586206191, |
| "learning_rate": 3.0205912864532582e-06, |
| "logits/chosen": -0.8383521437644958, |
| "logits/rejected": -0.4816366732120514, |
| "logps/chosen": -118.70378875732422, |
| "logps/rejected": -442.1132507324219, |
| "loss": 0.8588, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.885745048522949, |
| "rewards/margins": 9.942169189453125, |
| "rewards/rejected": -4.056425094604492, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.8741435904214306, |
| "grad_norm": 3.4985409509349084, |
| "learning_rate": 2.9934922526030507e-06, |
| "logits/chosen": -0.8542296290397644, |
| "logits/rejected": -0.4874165654182434, |
| "logps/chosen": -126.6626205444336, |
| "logps/rejected": -442.2069396972656, |
| "loss": 0.9089, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.916454315185547, |
| "rewards/margins": 9.973841667175293, |
| "rewards/rejected": -4.057387351989746, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.8811931355054744, |
| "grad_norm": 3.498245870729679, |
| "learning_rate": 2.966332747235318e-06, |
| "logits/chosen": -0.8780538439750671, |
| "logits/rejected": -0.4646807610988617, |
| "logps/chosen": -128.723876953125, |
| "logps/rejected": -442.9123229980469, |
| "loss": 0.9113, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.958227634429932, |
| "rewards/margins": 10.022747993469238, |
| "rewards/rejected": -4.064519882202148, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.8882426805895182, |
| "grad_norm": 3.591985658151727, |
| "learning_rate": 2.939116098419538e-06, |
| "logits/chosen": -0.8663986325263977, |
| "logits/rejected": -0.4639105796813965, |
| "logps/chosen": -128.5757293701172, |
| "logps/rejected": -443.9267578125, |
| "loss": 0.9148, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.976249694824219, |
| "rewards/margins": 10.05082893371582, |
| "rewards/rejected": -4.074578762054443, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.895292225673562, |
| "grad_norm": 3.722094728579672, |
| "learning_rate": 2.9118456412274348e-06, |
| "logits/chosen": -0.8501833081245422, |
| "logits/rejected": -0.45762744545936584, |
| "logps/chosen": -125.53106689453125, |
| "logps/rejected": -444.51739501953125, |
| "loss": 0.9058, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.896867275238037, |
| "rewards/margins": 9.977302551269531, |
| "rewards/rejected": -4.080434322357178, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.9023417707576058, |
| "grad_norm": 3.2456180119701377, |
| "learning_rate": 2.8845247173242923e-06, |
| "logits/chosen": -0.8174735903739929, |
| "logits/rejected": -0.44560080766677856, |
| "logps/chosen": -123.5894546508789, |
| "logps/rejected": -445.1806640625, |
| "loss": 0.8893, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.879065990447998, |
| "rewards/margins": 9.966200828552246, |
| "rewards/rejected": -4.087134838104248, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.9093913158416496, |
| "grad_norm": 3.5653205016331357, |
| "learning_rate": 2.8571566745594857e-06, |
| "logits/chosen": -0.8129379153251648, |
| "logits/rejected": -0.45377278327941895, |
| "logps/chosen": -124.4063949584961, |
| "logps/rejected": -445.7228698730469, |
| "loss": 0.8938, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.926529407501221, |
| "rewards/margins": 10.0189790725708, |
| "rewards/rejected": -4.0924506187438965, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.9164408609256934, |
| "grad_norm": 3.5526191928837303, |
| "learning_rate": 2.829744866556236e-06, |
| "logits/chosen": -0.7916550636291504, |
| "logits/rejected": -0.40035343170166016, |
| "logps/chosen": -124.70146942138672, |
| "logps/rejected": -446.320068359375, |
| "loss": 0.8904, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.9350385665893555, |
| "rewards/margins": 10.033607482910156, |
| "rewards/rejected": -4.098568916320801, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.9234904060097372, |
| "grad_norm": 3.434227344310014, |
| "learning_rate": 2.8022926523006644e-06, |
| "logits/chosen": -0.8052730560302734, |
| "logits/rejected": -0.4015885293483734, |
| "logps/chosen": -124.60784912109375, |
| "logps/rejected": -446.3960876464844, |
| "loss": 0.8916, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.919919967651367, |
| "rewards/margins": 10.019259452819824, |
| "rewards/rejected": -4.099339008331299, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.930539951093781, |
| "grad_norm": 3.3926060029845986, |
| "learning_rate": 2.774803395730194e-06, |
| "logits/chosen": -0.8420242667198181, |
| "logits/rejected": -0.41102123260498047, |
| "logps/chosen": -126.0671615600586, |
| "logps/rejected": -446.8029479980469, |
| "loss": 0.9077, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.900039196014404, |
| "rewards/margins": 10.003410339355469, |
| "rewards/rejected": -4.103371620178223, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.9375894961778247, |
| "grad_norm": 3.357665650193962, |
| "learning_rate": 2.747280465321332e-06, |
| "logits/chosen": -0.8105039000511169, |
| "logits/rejected": -0.3818654417991638, |
| "logps/chosen": -126.90190887451172, |
| "logps/rejected": -447.68603515625, |
| "loss": 0.9119, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.911780834197998, |
| "rewards/margins": 10.023977279663086, |
| "rewards/rejected": -4.1121954917907715, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.9446390412618686, |
| "grad_norm": 3.32023322129936, |
| "learning_rate": 2.7197272336769114e-06, |
| "logits/chosen": -0.8306812644004822, |
| "logits/rejected": -0.38350504636764526, |
| "logps/chosen": -123.29646301269531, |
| "logps/rejected": -448.5968933105469, |
| "loss": 0.8882, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.89159631729126, |
| "rewards/margins": 10.012816429138184, |
| "rewards/rejected": -4.121219635009766, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.9516885863459124, |
| "grad_norm": 3.5798392363898803, |
| "learning_rate": 2.692147077112815e-06, |
| "logits/chosen": -0.8416573405265808, |
| "logits/rejected": -0.40649136900901794, |
| "logps/chosen": -128.4845428466797, |
| "logps/rejected": -448.6703186035156, |
| "loss": 0.9173, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.955367565155029, |
| "rewards/margins": 10.07739543914795, |
| "rewards/rejected": -4.122028350830078, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.9587381314299561, |
| "grad_norm": 3.5837421382411807, |
| "learning_rate": 2.6645433752442474e-06, |
| "logits/chosen": -0.8264873623847961, |
| "logits/rejected": -0.39921826124191284, |
| "logps/chosen": -123.0130386352539, |
| "logps/rejected": -449.3578796386719, |
| "loss": 0.8858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.90243673324585, |
| "rewards/margins": 10.031229972839355, |
| "rewards/rejected": -4.128793239593506, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.965787676514, |
| "grad_norm": 3.436374551260621, |
| "learning_rate": 2.6369195105716087e-06, |
| "logits/chosen": -0.8044994473457336, |
| "logits/rejected": -0.3846818506717682, |
| "logps/chosen": -124.25041961669922, |
| "logps/rejected": -449.81298828125, |
| "loss": 0.8951, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.933631420135498, |
| "rewards/margins": 10.06702995300293, |
| "rewards/rejected": -4.133397579193115, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.9728372215980438, |
| "grad_norm": 3.3164716286577227, |
| "learning_rate": 2.609278868066007e-06, |
| "logits/chosen": -0.8319023251533508, |
| "logits/rejected": -0.4071156978607178, |
| "logps/chosen": -125.79058074951172, |
| "logps/rejected": -449.4647521972656, |
| "loss": 0.8959, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.004671573638916, |
| "rewards/margins": 10.134662628173828, |
| "rewards/rejected": -4.129991054534912, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.9798867666820875, |
| "grad_norm": 3.4420462182992106, |
| "learning_rate": 2.581624834754469e-06, |
| "logits/chosen": -0.7811191082000732, |
| "logits/rejected": -0.35583096742630005, |
| "logps/chosen": -125.7827377319336, |
| "logps/rejected": -448.8321228027344, |
| "loss": 0.9017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.957380771636963, |
| "rewards/margins": 10.080957412719727, |
| "rewards/rejected": -4.123576641082764, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.9869363117661314, |
| "grad_norm": 3.4368002175698105, |
| "learning_rate": 2.553960799304905e-06, |
| "logits/chosen": -0.7721039056777954, |
| "logits/rejected": -0.35003602504730225, |
| "logps/chosen": -126.46416473388672, |
| "logps/rejected": -450.1924743652344, |
| "loss": 0.9023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.950610637664795, |
| "rewards/margins": 10.08786392211914, |
| "rewards/rejected": -4.137254238128662, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9939858568501752, |
| "grad_norm": 3.3094259982405445, |
| "learning_rate": 2.526290151610865e-06, |
| "logits/chosen": -0.7781057357788086, |
| "logits/rejected": -0.3507390320301056, |
| "logps/chosen": -123.1922836303711, |
| "logps/rejected": -450.4504089355469, |
| "loss": 0.8887, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.893508434295654, |
| "rewards/margins": 10.033327102661133, |
| "rewards/rejected": -4.13981819152832, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.0007049545084044, |
| "grad_norm": 3.3806657760520955, |
| "learning_rate": 2.4986162823761513e-06, |
| "logits/chosen": -0.8073826432228088, |
| "logits/rejected": -0.3557504415512085, |
| "logps/chosen": -121.75922393798828, |
| "logps/rejected": -450.6792907714844, |
| "loss": 0.8728, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.962116718292236, |
| "rewards/margins": 10.104215621948242, |
| "rewards/rejected": -4.1421003341674805, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.0077544995924481, |
| "grad_norm": 3.3516929443658294, |
| "learning_rate": 2.4709425826993274e-06, |
| "logits/chosen": -0.8178227543830872, |
| "logits/rejected": -0.3367989957332611, |
| "logps/chosen": -112.90401458740234, |
| "logps/rejected": -450.7651062011719, |
| "loss": 0.8123, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.030337810516357, |
| "rewards/margins": 10.173301696777344, |
| "rewards/rejected": -4.1429643630981445, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.0148040446764919, |
| "grad_norm": 3.4921890072506168, |
| "learning_rate": 2.443272443658177e-06, |
| "logits/chosen": -0.8200713992118835, |
| "logits/rejected": -0.3234314024448395, |
| "logps/chosen": -112.2830810546875, |
| "logps/rejected": -451.45068359375, |
| "loss": 0.8067, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.004191875457764, |
| "rewards/margins": 10.153976440429688, |
| "rewards/rejected": -4.149785041809082, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.0218535897605359, |
| "grad_norm": 3.589176883533258, |
| "learning_rate": 2.415609255894173e-06, |
| "logits/chosen": -0.8283817172050476, |
| "logits/rejected": -0.35207217931747437, |
| "logps/chosen": -116.6107177734375, |
| "logps/rejected": -452.73248291015625, |
| "loss": 0.8273, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.116900444030762, |
| "rewards/margins": 10.279582023620605, |
| "rewards/rejected": -4.16268253326416, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.0289031348445796, |
| "grad_norm": 3.3300561164770324, |
| "learning_rate": 2.3879564091969936e-06, |
| "logits/chosen": -0.7885460257530212, |
| "logits/rejected": -0.3300701081752777, |
| "logps/chosen": -111.32238006591797, |
| "logps/rejected": -452.65673828125, |
| "loss": 0.7952, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.027840614318848, |
| "rewards/margins": 10.189643859863281, |
| "rewards/rejected": -4.16180419921875, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.0359526799286234, |
| "grad_norm": 3.5974185568251706, |
| "learning_rate": 2.360317292089142e-06, |
| "logits/chosen": -0.7770583033561707, |
| "logits/rejected": -0.30780109763145447, |
| "logps/chosen": -113.72601318359375, |
| "logps/rejected": -452.1132507324219, |
| "loss": 0.815, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.973827362060547, |
| "rewards/margins": 10.13033390045166, |
| "rewards/rejected": -4.156505584716797, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.0430022250126672, |
| "grad_norm": 3.94979541076315, |
| "learning_rate": 2.3326952914107272e-06, |
| "logits/chosen": -0.7770149111747742, |
| "logits/rejected": -0.32130613923072815, |
| "logps/chosen": -109.78759765625, |
| "logps/rejected": -452.708984375, |
| "loss": 0.8016, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.9994988441467285, |
| "rewards/margins": 10.161717414855957, |
| "rewards/rejected": -4.162219047546387, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.050051770096711, |
| "grad_norm": 3.416366688616228, |
| "learning_rate": 2.3050937919044476e-06, |
| "logits/chosen": -0.7574631571769714, |
| "logits/rejected": -0.3230978548526764, |
| "logps/chosen": -112.95440673828125, |
| "logps/rejected": -453.5429382324219, |
| "loss": 0.8057, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.04919958114624, |
| "rewards/margins": 10.219966888427734, |
| "rewards/rejected": -4.170767307281494, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.0571013151807547, |
| "grad_norm": 3.5726005824550033, |
| "learning_rate": 2.277516175800829e-06, |
| "logits/chosen": -0.7706856727600098, |
| "logits/rejected": -0.3133412003517151, |
| "logps/chosen": -108.99263000488281, |
| "logps/rejected": -454.4375, |
| "loss": 0.7875, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.972984790802002, |
| "rewards/margins": 10.152580261230469, |
| "rewards/rejected": -4.179595470428467, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.0641508602647984, |
| "grad_norm": 3.644467271254268, |
| "learning_rate": 2.249965822403773e-06, |
| "logits/chosen": -0.785606324672699, |
| "logits/rejected": -0.30744919180870056, |
| "logps/chosen": -110.87646484375, |
| "logps/rejected": -454.81695556640625, |
| "loss": 0.8022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.023101806640625, |
| "rewards/margins": 10.206517219543457, |
| "rewards/rejected": -4.183415412902832, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.0712004053488424, |
| "grad_norm": 3.5762988132387528, |
| "learning_rate": 2.2224461076764703e-06, |
| "logits/chosen": -0.7679362893104553, |
| "logits/rejected": -0.2972542941570282, |
| "logps/chosen": -112.6702880859375, |
| "logps/rejected": -454.98028564453125, |
| "loss": 0.8051, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.082765102386475, |
| "rewards/margins": 10.267889976501465, |
| "rewards/rejected": -4.18512487411499, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.0782499504328862, |
| "grad_norm": 3.765453501473496, |
| "learning_rate": 2.1949604038277085e-06, |
| "logits/chosen": -0.7607068419456482, |
| "logits/rejected": -0.29195815324783325, |
| "logps/chosen": -108.87700653076172, |
| "logps/rejected": -455.62042236328125, |
| "loss": 0.7928, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.96248197555542, |
| "rewards/margins": 10.153925895690918, |
| "rewards/rejected": -4.191443920135498, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.08529949551693, |
| "grad_norm": 3.675075817780507, |
| "learning_rate": 2.1675120788986524e-06, |
| "logits/chosen": -0.7517310380935669, |
| "logits/rejected": -0.2829591631889343, |
| "logps/chosen": -113.5805435180664, |
| "logps/rejected": -456.20068359375, |
| "loss": 0.8091, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.096705913543701, |
| "rewards/margins": 10.29398250579834, |
| "rewards/rejected": -4.197276592254639, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.0923490406009737, |
| "grad_norm": 3.510939604019646, |
| "learning_rate": 2.1401044963501353e-06, |
| "logits/chosen": -0.7697411775588989, |
| "logits/rejected": -0.28985849022865295, |
| "logps/chosen": -109.6702651977539, |
| "logps/rejected": -456.3376159667969, |
| "loss": 0.7883, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.059773921966553, |
| "rewards/margins": 10.258381843566895, |
| "rewards/rejected": -4.1986083984375, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.0993985856850175, |
| "grad_norm": 3.78162571016694, |
| "learning_rate": 2.1127410146505006e-06, |
| "logits/chosen": -0.7736045122146606, |
| "logits/rejected": -0.2909747064113617, |
| "logps/chosen": -111.208251953125, |
| "logps/rejected": -456.29913330078125, |
| "loss": 0.8031, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.034745216369629, |
| "rewards/margins": 10.23304271697998, |
| "rewards/rejected": -4.198297023773193, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.1064481307690612, |
| "grad_norm": 3.446885891663511, |
| "learning_rate": 2.0854249868640653e-06, |
| "logits/chosen": -0.7775657176971436, |
| "logits/rejected": -0.28920355439186096, |
| "logps/chosen": -111.19278717041016, |
| "logps/rejected": -456.51739501953125, |
| "loss": 0.8033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.995384216308594, |
| "rewards/margins": 10.195777893066406, |
| "rewards/rejected": -4.2003936767578125, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.1134976758531052, |
| "grad_norm": 3.756805627719574, |
| "learning_rate": 2.0581597602402425e-06, |
| "logits/chosen": -0.7638501524925232, |
| "logits/rejected": -0.2594032287597656, |
| "logps/chosen": -115.24254608154297, |
| "logps/rejected": -456.53448486328125, |
| "loss": 0.8195, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.091201305389404, |
| "rewards/margins": 10.291844367980957, |
| "rewards/rejected": -4.200643062591553, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.120547220937149, |
| "grad_norm": 3.812655052084755, |
| "learning_rate": 2.0309486758033777e-06, |
| "logits/chosen": -0.7667771577835083, |
| "logits/rejected": -0.23339663445949554, |
| "logps/chosen": -112.5297622680664, |
| "logps/rejected": -456.4374084472656, |
| "loss": 0.8099, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.06128454208374, |
| "rewards/margins": 10.260945320129395, |
| "rewards/rejected": -4.199659824371338, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.1275967660211927, |
| "grad_norm": 3.9420982726472773, |
| "learning_rate": 2.0037950679433425e-06, |
| "logits/chosen": -0.7690004706382751, |
| "logits/rejected": -0.2527337074279785, |
| "logps/chosen": -113.76244354248047, |
| "logps/rejected": -456.77178955078125, |
| "loss": 0.8198, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.039876461029053, |
| "rewards/margins": 10.242877006530762, |
| "rewards/rejected": -4.203000545501709, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.1346463111052365, |
| "grad_norm": 3.335775619061638, |
| "learning_rate": 1.9767022640069493e-06, |
| "logits/chosen": -0.7629236578941345, |
| "logits/rejected": -0.24979268014431, |
| "logps/chosen": -111.00728607177734, |
| "logps/rejected": -457.5516052246094, |
| "loss": 0.7979, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.058557510375977, |
| "rewards/margins": 10.269355773925781, |
| "rewards/rejected": -4.210798740386963, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.1416958561892803, |
| "grad_norm": 3.5494177496277683, |
| "learning_rate": 1.9496735838902254e-06, |
| "logits/chosen": -0.7654642462730408, |
| "logits/rejected": -0.24192610383033752, |
| "logps/chosen": -112.31331634521484, |
| "logps/rejected": -457.4699401855469, |
| "loss": 0.8023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.082005500793457, |
| "rewards/margins": 10.292023658752441, |
| "rewards/rejected": -4.210019111633301, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.148745401273324, |
| "grad_norm": 3.62036409136568, |
| "learning_rate": 1.922712339631595e-06, |
| "logits/chosen": -0.7688376903533936, |
| "logits/rejected": -0.2729661762714386, |
| "logps/chosen": -111.631591796875, |
| "logps/rejected": -457.541015625, |
| "loss": 0.8005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.04392671585083, |
| "rewards/margins": 10.254591941833496, |
| "rewards/rejected": -4.210665702819824, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.1557949463573678, |
| "grad_norm": 3.8845526177462775, |
| "learning_rate": 1.895821835006033e-06, |
| "logits/chosen": -0.7550647854804993, |
| "logits/rejected": -0.26288357377052307, |
| "logps/chosen": -112.28546142578125, |
| "logps/rejected": -458.4576721191406, |
| "loss": 0.8046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.07279634475708, |
| "rewards/margins": 10.29267406463623, |
| "rewards/rejected": -4.21987771987915, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.1628444914414118, |
| "grad_norm": 3.8168863101028276, |
| "learning_rate": 1.8690053651202278e-06, |
| "logits/chosen": -0.757368266582489, |
| "logits/rejected": -0.24931330978870392, |
| "logps/chosen": -110.4576187133789, |
| "logps/rejected": -458.091064453125, |
| "loss": 0.8051, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.992654323577881, |
| "rewards/margins": 10.208785057067871, |
| "rewards/rejected": -4.216129302978516, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.1698940365254555, |
| "grad_norm": 3.8170872257133874, |
| "learning_rate": 1.842266216008804e-06, |
| "logits/chosen": -0.7819269299507141, |
| "logits/rejected": -0.2846717834472656, |
| "logps/chosen": -110.04841613769531, |
| "logps/rejected": -458.6672058105469, |
| "loss": 0.8002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.956142425537109, |
| "rewards/margins": 10.178072929382324, |
| "rewards/rejected": -4.221930980682373, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.1769435816094993, |
| "grad_norm": 3.623556472869695, |
| "learning_rate": 1.8156076642316566e-06, |
| "logits/chosen": -0.7666402459144592, |
| "logits/rejected": -0.27397018671035767, |
| "logps/chosen": -116.65936279296875, |
| "logps/rejected": -459.2618103027344, |
| "loss": 0.826, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.08809757232666, |
| "rewards/margins": 10.316171646118164, |
| "rewards/rejected": -4.228073596954346, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.183993126693543, |
| "grad_norm": 3.6258528177441933, |
| "learning_rate": 1.7890329764724522e-06, |
| "logits/chosen": -0.7601782083511353, |
| "logits/rejected": -0.2446957379579544, |
| "logps/chosen": -111.53218841552734, |
| "logps/rejected": -459.0636291503906, |
| "loss": 0.804, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.022862911224365, |
| "rewards/margins": 10.248769760131836, |
| "rewards/rejected": -4.225907325744629, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.1910426717775868, |
| "grad_norm": 3.737614878414752, |
| "learning_rate": 1.7625454091383348e-06, |
| "logits/chosen": -0.7394657135009766, |
| "logits/rejected": -0.24199071526527405, |
| "logps/chosen": -109.6101303100586, |
| "logps/rejected": -458.5634765625, |
| "loss": 0.7923, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.999507904052734, |
| "rewards/margins": 10.220422744750977, |
| "rewards/rejected": -4.220914363861084, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.1980922168616306, |
| "grad_norm": 3.6014758695213773, |
| "learning_rate": 1.7361482079608916e-06, |
| "logits/chosen": -0.7469202876091003, |
| "logits/rejected": -0.24424724280834198, |
| "logps/chosen": -111.15461730957031, |
| "logps/rejected": -459.6180725097656, |
| "loss": 0.8001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.06093692779541, |
| "rewards/margins": 10.292411804199219, |
| "rewards/rejected": -4.231475353240967, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.2051417619456744, |
| "grad_norm": 3.7251487078814076, |
| "learning_rate": 1.7098446075984271e-06, |
| "logits/chosen": -0.7270140051841736, |
| "logits/rejected": -0.2223903387784958, |
| "logps/chosen": -110.1701431274414, |
| "logps/rejected": -460.193115234375, |
| "loss": 0.7937, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.080112934112549, |
| "rewards/margins": 10.3173246383667, |
| "rewards/rejected": -4.237212657928467, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.2121913070297183, |
| "grad_norm": 3.6824017522920425, |
| "learning_rate": 1.6836378312395985e-06, |
| "logits/chosen": -0.7231994271278381, |
| "logits/rejected": -0.2200583517551422, |
| "logps/chosen": -110.08992004394531, |
| "logps/rejected": -460.22802734375, |
| "loss": 0.7961, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.026852130889893, |
| "rewards/margins": 10.264342308044434, |
| "rewards/rejected": -4.237489223480225, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.219240852113762, |
| "grad_norm": 3.6649464025544725, |
| "learning_rate": 1.6575310902084486e-06, |
| "logits/chosen": -0.7727184295654297, |
| "logits/rejected": -0.2428928166627884, |
| "logps/chosen": -116.430419921875, |
| "logps/rejected": -460.3304748535156, |
| "loss": 0.8234, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.162951946258545, |
| "rewards/margins": 10.40161418914795, |
| "rewards/rejected": -4.238661766052246, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.2262903971978059, |
| "grad_norm": 3.516534580479379, |
| "learning_rate": 1.6315275835708968e-06, |
| "logits/chosen": -0.7715465426445007, |
| "logits/rejected": -0.2562185227870941, |
| "logps/chosen": -114.5073013305664, |
| "logps/rejected": -460.95611572265625, |
| "loss": 0.8189, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.068451881408691, |
| "rewards/margins": 10.313359260559082, |
| "rewards/rejected": -4.244908332824707, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.2333399422818496, |
| "grad_norm": 3.764498441728445, |
| "learning_rate": 1.6056304977427396e-06, |
| "logits/chosen": -0.7822647094726562, |
| "logits/rejected": -0.2754697799682617, |
| "logps/chosen": -108.8161849975586, |
| "logps/rejected": -460.80615234375, |
| "loss": 0.7835, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.099386692047119, |
| "rewards/margins": 10.342795372009277, |
| "rewards/rejected": -4.243407726287842, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.2403894873658934, |
| "grad_norm": 3.843560855128951, |
| "learning_rate": 1.579843006099182e-06, |
| "logits/chosen": -0.7731927633285522, |
| "logits/rejected": -0.2538703978061676, |
| "logps/chosen": -107.45731353759766, |
| "logps/rejected": -459.0361328125, |
| "loss": 0.7823, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.980363368988037, |
| "rewards/margins": 10.205938339233398, |
| "rewards/rejected": -4.225574493408203, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.2474390324499371, |
| "grad_norm": 3.852598571454623, |
| "learning_rate": 1.5541682685859877e-06, |
| "logits/chosen": -0.7549764513969421, |
| "logits/rejected": -0.23148897290229797, |
| "logps/chosen": -110.1994400024414, |
| "logps/rejected": -459.1859436035156, |
| "loss": 0.7968, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.038589000701904, |
| "rewards/margins": 10.265731811523438, |
| "rewards/rejected": -4.227142810821533, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.254488577533981, |
| "grad_norm": 3.7009734358554778, |
| "learning_rate": 1.5286094313322642e-06, |
| "logits/chosen": -0.784349799156189, |
| "logits/rejected": -0.25168928503990173, |
| "logps/chosen": -116.32988739013672, |
| "logps/rejected": -459.89044189453125, |
| "loss": 0.8291, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.14546537399292, |
| "rewards/margins": 10.379704475402832, |
| "rewards/rejected": -4.234239101409912, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.261538122618025, |
| "grad_norm": 3.6702836620131487, |
| "learning_rate": 1.5031696262649388e-06, |
| "logits/chosen": -0.7631133198738098, |
| "logits/rejected": -0.23987340927124023, |
| "logps/chosen": -108.21134185791016, |
| "logps/rejected": -460.1493225097656, |
| "loss": 0.7768, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.06735897064209, |
| "rewards/margins": 10.304133415222168, |
| "rewards/rejected": -4.236774444580078, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.2685876677020687, |
| "grad_norm": 3.7496096175509117, |
| "learning_rate": 1.4778519707249824e-06, |
| "logits/chosen": -0.7178513407707214, |
| "logits/rejected": -0.21321916580200195, |
| "logps/chosen": -108.5086669921875, |
| "logps/rejected": -460.8570251464844, |
| "loss": 0.7864, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.015305519104004, |
| "rewards/margins": 10.259174346923828, |
| "rewards/rejected": -4.243868827819824, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.2756372127861124, |
| "grad_norm": 3.5821190789210116, |
| "learning_rate": 1.452659567085416e-06, |
| "logits/chosen": -0.7405712008476257, |
| "logits/rejected": -0.22374410927295685, |
| "logps/chosen": -111.2579116821289, |
| "logps/rejected": -461.4320373535156, |
| "loss": 0.7931, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.068841934204102, |
| "rewards/margins": 10.318615913391113, |
| "rewards/rejected": -4.24977445602417, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.2826867578701562, |
| "grad_norm": 3.4834658925918993, |
| "learning_rate": 1.427595502371154e-06, |
| "logits/chosen": -0.7308667302131653, |
| "logits/rejected": -0.21432648599147797, |
| "logps/chosen": -112.19306945800781, |
| "logps/rejected": -461.59405517578125, |
| "loss": 0.8011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.054664134979248, |
| "rewards/margins": 10.305948257446289, |
| "rewards/rejected": -4.251284122467041, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.2897363029542, |
| "grad_norm": 3.6201120344067763, |
| "learning_rate": 1.4026628478807245e-06, |
| "logits/chosen": -0.7650494575500488, |
| "logits/rejected": -0.2274860143661499, |
| "logps/chosen": -108.77252197265625, |
| "logps/rejected": -461.6319274902344, |
| "loss": 0.7948, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.976110458374023, |
| "rewards/margins": 10.2275972366333, |
| "rewards/rejected": -4.251486778259277, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.2967858480382437, |
| "grad_norm": 3.7346652227529877, |
| "learning_rate": 1.3778646588099156e-06, |
| "logits/chosen": -0.7402178049087524, |
| "logits/rejected": -0.2060566246509552, |
| "logps/chosen": -112.3010025024414, |
| "logps/rejected": -461.74835205078125, |
| "loss": 0.7965, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.0907111167907715, |
| "rewards/margins": 10.343549728393555, |
| "rewards/rejected": -4.252838134765625, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.3038353931222875, |
| "grad_norm": 3.768580088883866, |
| "learning_rate": 1.353203973877406e-06, |
| "logits/chosen": -0.7112205624580383, |
| "logits/rejected": -0.1953021138906479, |
| "logps/chosen": -110.20354461669922, |
| "logps/rejected": -461.6409606933594, |
| "loss": 0.7956, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.044336795806885, |
| "rewards/margins": 10.29607105255127, |
| "rewards/rejected": -4.251734256744385, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.3108849382063315, |
| "grad_norm": 3.4440781350790908, |
| "learning_rate": 1.328683814952401e-06, |
| "logits/chosen": -0.7040776014328003, |
| "logits/rejected": -0.18124286830425262, |
| "logps/chosen": -111.324462890625, |
| "logps/rejected": -461.9501037597656, |
| "loss": 0.7997, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.025957107543945, |
| "rewards/margins": 10.280824661254883, |
| "rewards/rejected": -4.254867076873779, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.3179344832903752, |
| "grad_norm": 3.657636143497304, |
| "learning_rate": 1.3043071866843393e-06, |
| "logits/chosen": -0.740128755569458, |
| "logits/rejected": -0.20634058117866516, |
| "logps/chosen": -116.50548553466797, |
| "logps/rejected": -462.2780456542969, |
| "loss": 0.8239, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.147490978240967, |
| "rewards/margins": 10.405622482299805, |
| "rewards/rejected": -4.258132457733154, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.324984028374419, |
| "grad_norm": 3.7645166487401998, |
| "learning_rate": 1.280077076134713e-06, |
| "logits/chosen": -0.7460483908653259, |
| "logits/rejected": -0.22827637195587158, |
| "logps/chosen": -111.3073959350586, |
| "logps/rejected": -462.13555908203125, |
| "loss": 0.8051, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.000453472137451, |
| "rewards/margins": 10.25708293914795, |
| "rewards/rejected": -4.256629467010498, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.3320335734584627, |
| "grad_norm": 3.6937410357737135, |
| "learning_rate": 1.2559964524110329e-06, |
| "logits/chosen": -0.7219620943069458, |
| "logits/rejected": -0.20633235573768616, |
| "logps/chosen": -111.94634246826172, |
| "logps/rejected": -462.30010986328125, |
| "loss": 0.8003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.105963230133057, |
| "rewards/margins": 10.364226341247559, |
| "rewards/rejected": -4.258262634277344, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.3390831185425065, |
| "grad_norm": 3.5068780936908346, |
| "learning_rate": 1.2320682663030067e-06, |
| "logits/chosen": -0.7292844653129578, |
| "logits/rejected": -0.2123355120420456, |
| "logps/chosen": -110.45084381103516, |
| "logps/rejected": -462.43505859375, |
| "loss": 0.788, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.103517532348633, |
| "rewards/margins": 10.363227844238281, |
| "rewards/rejected": -4.259710788726807, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.3461326636265505, |
| "grad_norm": 4.0334399408644614, |
| "learning_rate": 1.208295449920953e-06, |
| "logits/chosen": -0.7406136393547058, |
| "logits/rejected": -0.21345853805541992, |
| "logps/chosen": -111.7240982055664, |
| "logps/rejected": -462.9325866699219, |
| "loss": 0.7966, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.093398571014404, |
| "rewards/margins": 10.35802173614502, |
| "rewards/rejected": -4.264623165130615, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.353182208710594, |
| "grad_norm": 3.8844200352095095, |
| "learning_rate": 1.1846809163365053e-06, |
| "logits/chosen": -0.7404711246490479, |
| "logits/rejected": -0.21118001639842987, |
| "logps/chosen": -114.276123046875, |
| "logps/rejected": -463.1968688964844, |
| "loss": 0.8175, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.063361644744873, |
| "rewards/margins": 10.330649375915527, |
| "rewards/rejected": -4.267287731170654, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.360231753794638, |
| "grad_norm": 3.6024931838167005, |
| "learning_rate": 1.1612275592256505e-06, |
| "logits/chosen": -0.7284431457519531, |
| "logits/rejected": -0.2048410028219223, |
| "logps/chosen": -109.4595947265625, |
| "logps/rejected": -463.3080139160156, |
| "loss": 0.7931, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.021738052368164, |
| "rewards/margins": 10.290040016174316, |
| "rewards/rejected": -4.2683024406433105, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.3672812988786818, |
| "grad_norm": 3.616015891805895, |
| "learning_rate": 1.137938252514146e-06, |
| "logits/chosen": -0.7418842315673828, |
| "logits/rejected": -0.21675625443458557, |
| "logps/chosen": -112.6479721069336, |
| "logps/rejected": -463.680908203125, |
| "loss": 0.7983, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.131333351135254, |
| "rewards/margins": 10.403575897216797, |
| "rewards/rejected": -4.272243022918701, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.3743308439627255, |
| "grad_norm": 3.5890363099718887, |
| "learning_rate": 1.1148158500253528e-06, |
| "logits/chosen": -0.7448534965515137, |
| "logits/rejected": -0.20057539641857147, |
| "logps/chosen": -111.8428955078125, |
| "logps/rejected": -464.0297546386719, |
| "loss": 0.8019, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.048543453216553, |
| "rewards/margins": 10.324110984802246, |
| "rewards/rejected": -4.275567531585693, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.3813803890467693, |
| "grad_norm": 3.840508286124758, |
| "learning_rate": 1.0918631851305357e-06, |
| "logits/chosen": -0.7599017024040222, |
| "logits/rejected": -0.20486782491207123, |
| "logps/chosen": -110.95333099365234, |
| "logps/rejected": -464.31695556640625, |
| "loss": 0.7993, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.010303020477295, |
| "rewards/margins": 10.288734436035156, |
| "rewards/rejected": -4.2784318923950195, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.388429934130813, |
| "grad_norm": 3.7059192007951807, |
| "learning_rate": 1.0690830704016624e-06, |
| "logits/chosen": -0.7338669300079346, |
| "logits/rejected": -0.19808827340602875, |
| "logps/chosen": -110.7686538696289, |
| "logps/rejected": -464.29425048828125, |
| "loss": 0.7951, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.024989128112793, |
| "rewards/margins": 10.303257942199707, |
| "rewards/rejected": -4.278269290924072, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.395479479214857, |
| "grad_norm": 3.7606971138852274, |
| "learning_rate": 1.0464782972667682e-06, |
| "logits/chosen": -0.7379679679870605, |
| "logits/rejected": -0.1948491781949997, |
| "logps/chosen": -108.2778091430664, |
| "logps/rejected": -464.478515625, |
| "loss": 0.7774, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.026111602783203, |
| "rewards/margins": 10.306151390075684, |
| "rewards/rejected": -4.280040740966797, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.4025290242989006, |
| "grad_norm": 3.816348534495336, |
| "learning_rate": 1.0240516356678853e-06, |
| "logits/chosen": -0.7188676595687866, |
| "logits/rejected": -0.20450268685817719, |
| "logps/chosen": -106.01759338378906, |
| "logps/rejected": -464.5401306152344, |
| "loss": 0.7649, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.007740020751953, |
| "rewards/margins": 10.288371086120605, |
| "rewards/rejected": -4.2806315422058105, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.4095785693829446, |
| "grad_norm": 3.612107514399364, |
| "learning_rate": 1.0018058337216327e-06, |
| "logits/chosen": -0.7266545295715332, |
| "logits/rejected": -0.19752562046051025, |
| "logps/chosen": -110.18603515625, |
| "logps/rejected": -464.22833251953125, |
| "loss": 0.7897, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.06878137588501, |
| "rewards/margins": 10.34636116027832, |
| "rewards/rejected": -4.277579307556152, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.4166281144669883, |
| "grad_norm": 3.7833872085921882, |
| "learning_rate": 9.797436173824606e-07, |
| "logits/chosen": -0.7136607766151428, |
| "logits/rejected": -0.1925588697195053, |
| "logps/chosen": -112.893310546875, |
| "logps/rejected": -463.73590087890625, |
| "loss": 0.8014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.104711055755615, |
| "rewards/margins": 10.377421379089355, |
| "rewards/rejected": -4.272710800170898, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.423677659551032, |
| "grad_norm": 3.7145911218808876, |
| "learning_rate": 9.578676901086213e-07, |
| "logits/chosen": -0.7393223643302917, |
| "logits/rejected": -0.20907925069332123, |
| "logps/chosen": -107.60369110107422, |
| "logps/rejected": -463.6431579589844, |
| "loss": 0.7849, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.018612384796143, |
| "rewards/margins": 10.290209770202637, |
| "rewards/rejected": -4.271597385406494, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.4307272046350759, |
| "grad_norm": 3.753197231623771, |
| "learning_rate": 9.361807325308861e-07, |
| "logits/chosen": -0.7394221425056458, |
| "logits/rejected": -0.2052200883626938, |
| "logps/chosen": -110.26177215576172, |
| "logps/rejected": -463.85186767578125, |
| "loss": 0.7936, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.044154644012451, |
| "rewards/margins": 10.317965507507324, |
| "rewards/rejected": -4.273810863494873, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.4377767497191196, |
| "grad_norm": 3.62410702725855, |
| "learning_rate": 9.146854021240795e-07, |
| "logits/chosen": -0.7467309832572937, |
| "logits/rejected": -0.20924925804138184, |
| "logps/chosen": -109.57176971435547, |
| "logps/rejected": -464.32281494140625, |
| "loss": 0.7825, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.084084987640381, |
| "rewards/margins": 10.362741470336914, |
| "rewards/rejected": -4.278656482696533, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.4448262948031636, |
| "grad_norm": 3.592620796932219, |
| "learning_rate": 8.933843328814224e-07, |
| "logits/chosen": -0.7418180704116821, |
| "logits/rejected": -0.2005995362997055, |
| "logps/chosen": -106.8327865600586, |
| "logps/rejected": -464.5583190917969, |
| "loss": 0.7709, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.014178276062012, |
| "rewards/margins": 10.294994354248047, |
| "rewards/rejected": -4.280816555023193, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.4518758398872071, |
| "grad_norm": 3.5324680986915196, |
| "learning_rate": 8.722801349917806e-07, |
| "logits/chosen": -0.7462785840034485, |
| "logits/rejected": -0.19319048523902893, |
| "logps/chosen": -104.92723846435547, |
| "logps/rejected": -464.9132995605469, |
| "loss": 0.7672, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.995131015777588, |
| "rewards/margins": 10.279424667358398, |
| "rewards/rejected": -4.2842936515808105, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.4589253849712511, |
| "grad_norm": 3.6252755412533073, |
| "learning_rate": 8.513753945198072e-07, |
| "logits/chosen": -0.7537373900413513, |
| "logits/rejected": -0.19358740746974945, |
| "logps/chosen": -110.26961517333984, |
| "logps/rejected": -465.21759033203125, |
| "loss": 0.789, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.088963031768799, |
| "rewards/margins": 10.37643814086914, |
| "rewards/rejected": -4.287475109100342, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.465974930055295, |
| "grad_norm": 3.872665997590583, |
| "learning_rate": 8.306726730890638e-07, |
| "logits/chosen": -0.7590047717094421, |
| "logits/rejected": -0.19472454488277435, |
| "logps/chosen": -110.58663177490234, |
| "logps/rejected": -465.0445861816406, |
| "loss": 0.7871, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.122416973114014, |
| "rewards/margins": 10.408243179321289, |
| "rewards/rejected": -4.285826206207275, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.4730244751393387, |
| "grad_norm": 3.7102846367736086, |
| "learning_rate": 8.101745075681106e-07, |
| "logits/chosen": -0.7329391837120056, |
| "logits/rejected": -0.1909240186214447, |
| "logps/chosen": -110.4598159790039, |
| "logps/rejected": -465.27642822265625, |
| "loss": 0.7896, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.111295223236084, |
| "rewards/margins": 10.399382591247559, |
| "rewards/rejected": -4.288087368011475, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.4800740202233824, |
| "grad_norm": 3.906032833336107, |
| "learning_rate": 7.898834097596553e-07, |
| "logits/chosen": -0.7385097742080688, |
| "logits/rejected": -0.19473496079444885, |
| "logps/chosen": -107.56981658935547, |
| "logps/rejected": -465.4288024902344, |
| "loss": 0.7791, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.026830196380615, |
| "rewards/margins": 10.316374778747559, |
| "rewards/rejected": -4.289544105529785, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.4871235653074262, |
| "grad_norm": 3.783514582812726, |
| "learning_rate": 7.698018660927562e-07, |
| "logits/chosen": -0.7368226647377014, |
| "logits/rejected": -0.19908547401428223, |
| "logps/chosen": -113.80726623535156, |
| "logps/rejected": -465.6442565917969, |
| "loss": 0.8112, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.0709099769592285, |
| "rewards/margins": 10.362727165222168, |
| "rewards/rejected": -4.291816711425781, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.4941731103914702, |
| "grad_norm": 3.463238510062497, |
| "learning_rate": 7.499323373181394e-07, |
| "logits/chosen": -0.7456727027893066, |
| "logits/rejected": -0.2014772891998291, |
| "logps/chosen": -110.4579849243164, |
| "logps/rejected": -465.69384765625, |
| "loss": 0.7934, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.0509185791015625, |
| "rewards/margins": 10.34317398071289, |
| "rewards/rejected": -4.29225492477417, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.5012226554755137, |
| "grad_norm": 3.64100892141048, |
| "learning_rate": 7.302772582066686e-07, |
| "logits/chosen": -0.7430599331855774, |
| "logits/rejected": -0.20189471542835236, |
| "logps/chosen": -107.80598449707031, |
| "logps/rejected": -465.7997741699219, |
| "loss": 0.7769, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.060474395751953, |
| "rewards/margins": 10.353759765625, |
| "rewards/rejected": -4.293285369873047, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.5082722005595577, |
| "grad_norm": 3.821344654148155, |
| "learning_rate": 7.108390372509894e-07, |
| "logits/chosen": -0.7576232552528381, |
| "logits/rejected": -0.20257721841335297, |
| "logps/chosen": -107.84185028076172, |
| "logps/rejected": -466.0832214355469, |
| "loss": 0.7835, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.016088008880615, |
| "rewards/margins": 10.312129020690918, |
| "rewards/rejected": -4.296041011810303, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.5153217456436014, |
| "grad_norm": 3.6781483222864537, |
| "learning_rate": 6.916200563703987e-07, |
| "logits/chosen": -0.7738979458808899, |
| "logits/rejected": -0.2092943638563156, |
| "logps/chosen": -110.91960906982422, |
| "logps/rejected": -466.02197265625, |
| "loss": 0.7967, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.113348007202148, |
| "rewards/margins": 10.408868789672852, |
| "rewards/rejected": -4.295520782470703, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.5223712907276452, |
| "grad_norm": 3.8463727860471097, |
| "learning_rate": 6.726226706189668e-07, |
| "logits/chosen": -0.7589637637138367, |
| "logits/rejected": -0.20386461913585663, |
| "logps/chosen": -108.56011962890625, |
| "logps/rejected": -466.29669189453125, |
| "loss": 0.7797, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.9991984367370605, |
| "rewards/margins": 10.297418594360352, |
| "rewards/rejected": -4.298220157623291, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.529420835811689, |
| "grad_norm": 3.7546467954048284, |
| "learning_rate": 6.538492078969611e-07, |
| "logits/chosen": -0.7622847557067871, |
| "logits/rejected": -0.2044745236635208, |
| "logps/chosen": -109.2506103515625, |
| "logps/rejected": -466.3732604980469, |
| "loss": 0.7839, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.045871257781982, |
| "rewards/margins": 10.344886779785156, |
| "rewards/rejected": -4.299015522003174, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.5364703808957327, |
| "grad_norm": 3.75404337894338, |
| "learning_rate": 6.353019686655806e-07, |
| "logits/chosen": -0.7753379940986633, |
| "logits/rejected": -0.20546992123126984, |
| "logps/chosen": -110.1863021850586, |
| "logps/rejected": -466.5959167480469, |
| "loss": 0.7903, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.081169128417969, |
| "rewards/margins": 10.382391929626465, |
| "rewards/rejected": -4.301222801208496, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.5435199259797767, |
| "grad_norm": 3.5216418635854274, |
| "learning_rate": 6.169832256650698e-07, |
| "logits/chosen": -0.7565549612045288, |
| "logits/rejected": -0.20086073875427246, |
| "logps/chosen": -104.84516906738281, |
| "logps/rejected": -466.4576721191406, |
| "loss": 0.7601, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.991360664367676, |
| "rewards/margins": 10.291136741638184, |
| "rewards/rejected": -4.299776554107666, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.5505694710638203, |
| "grad_norm": 3.5037838198076003, |
| "learning_rate": 5.988952236362153e-07, |
| "logits/chosen": -0.7642261385917664, |
| "logits/rejected": -0.2072605937719345, |
| "logps/chosen": -107.33918762207031, |
| "logps/rejected": -466.7052917480469, |
| "loss": 0.7674, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.061375617980957, |
| "rewards/margins": 10.363710403442383, |
| "rewards/rejected": -4.302334785461426, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.5576190161478642, |
| "grad_norm": 3.864008710250573, |
| "learning_rate": 5.810401790452888e-07, |
| "logits/chosen": -0.7727814316749573, |
| "logits/rejected": -0.21775054931640625, |
| "logps/chosen": -107.37860107421875, |
| "logps/rejected": -466.77545166015625, |
| "loss": 0.7793, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.998612403869629, |
| "rewards/margins": 10.301616668701172, |
| "rewards/rejected": -4.303005218505859, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.564668561231908, |
| "grad_norm": 3.809694724002222, |
| "learning_rate": 5.63420279812435e-07, |
| "logits/chosen": -0.7627360224723816, |
| "logits/rejected": -0.22790618240833282, |
| "logps/chosen": -106.657470703125, |
| "logps/rejected": -466.9906921386719, |
| "loss": 0.7697, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.044661998748779, |
| "rewards/margins": 10.349881172180176, |
| "rewards/rejected": -4.305219650268555, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.5717181063159518, |
| "grad_norm": 3.810418330921462, |
| "learning_rate": 5.460376850435775e-07, |
| "logits/chosen": -0.771653950214386, |
| "logits/rejected": -0.2361011505126953, |
| "logps/chosen": -112.78193664550781, |
| "logps/rejected": -467.257568359375, |
| "loss": 0.8037, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.097472667694092, |
| "rewards/margins": 10.40538501739502, |
| "rewards/rejected": -4.307912349700928, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.5787676513999958, |
| "grad_norm": 3.622523471695452, |
| "learning_rate": 5.288945247658411e-07, |
| "logits/chosen": -0.7521407008171082, |
| "logits/rejected": -0.22493302822113037, |
| "logps/chosen": -112.21357727050781, |
| "logps/rejected": -467.2759704589844, |
| "loss": 0.803, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.095398902893066, |
| "rewards/margins": 10.403505325317383, |
| "rewards/rejected": -4.308106899261475, |
| "step": 4480 |
| }, |
| { |
| "epoch": 1.5858171964840393, |
| "grad_norm": 3.891672529591952, |
| "learning_rate": 5.11992899666546e-07, |
| "logits/chosen": -0.7626200914382935, |
| "logits/rejected": -0.2184225618839264, |
| "logps/chosen": -107.69268035888672, |
| "logps/rejected": -466.9710998535156, |
| "loss": 0.7866, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.997985363006592, |
| "rewards/margins": 10.302816390991211, |
| "rewards/rejected": -4.3048319816589355, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.5928667415680833, |
| "grad_norm": 3.6231664217360335, |
| "learning_rate": 4.953348808357897e-07, |
| "logits/chosen": -0.7735008597373962, |
| "logits/rejected": -0.22704847157001495, |
| "logps/chosen": -108.8917007446289, |
| "logps/rejected": -466.9140319824219, |
| "loss": 0.7833, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.050678253173828, |
| "rewards/margins": 10.355135917663574, |
| "rewards/rejected": -4.304458141326904, |
| "step": 4520 |
| }, |
| { |
| "epoch": 1.5999162866521268, |
| "grad_norm": 3.503524696607652, |
| "learning_rate": 4.789225095126665e-07, |
| "logits/chosen": -0.7676795125007629, |
| "logits/rejected": -0.2262260913848877, |
| "logps/chosen": -110.2127456665039, |
| "logps/rejected": -467.002685546875, |
| "loss": 0.7868, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.059360027313232, |
| "rewards/margins": 10.364716529846191, |
| "rewards/rejected": -4.305356502532959, |
| "step": 4540 |
| }, |
| { |
| "epoch": 1.6069658317361708, |
| "grad_norm": 3.5468237051894795, |
| "learning_rate": 4.6275779683513044e-07, |
| "logits/chosen": -0.7663300633430481, |
| "logits/rejected": -0.22453832626342773, |
| "logps/chosen": -110.7569580078125, |
| "logps/rejected": -467.04058837890625, |
| "loss": 0.7952, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.11013650894165, |
| "rewards/margins": 10.415862083435059, |
| "rewards/rejected": -4.305725574493408, |
| "step": 4560 |
| }, |
| { |
| "epoch": 1.6140153768202146, |
| "grad_norm": 3.9447717460332745, |
| "learning_rate": 4.46842723593561e-07, |
| "logits/chosen": -0.7553516030311584, |
| "logits/rejected": -0.22027336061000824, |
| "logps/chosen": -108.72711181640625, |
| "logps/rejected": -467.15625, |
| "loss": 0.786, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.020168304443359, |
| "rewards/margins": 10.327046394348145, |
| "rewards/rejected": -4.306877613067627, |
| "step": 4580 |
| }, |
| { |
| "epoch": 1.6210649219042583, |
| "grad_norm": 3.642004095627008, |
| "learning_rate": 4.311792399880382e-07, |
| "logits/chosen": -0.7744750380516052, |
| "logits/rejected": -0.22373166680335999, |
| "logps/chosen": -105.97419738769531, |
| "logps/rejected": -467.371337890625, |
| "loss": 0.7647, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.069406509399414, |
| "rewards/margins": 10.37836742401123, |
| "rewards/rejected": -4.308960914611816, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.6281144669883023, |
| "grad_norm": 3.637753232067399, |
| "learning_rate": 4.1576926538936993e-07, |
| "logits/chosen": -0.7700881958007812, |
| "logits/rejected": -0.22813375294208527, |
| "logps/chosen": -107.51484680175781, |
| "logps/rejected": -467.3663635253906, |
| "loss": 0.7771, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.9894490242004395, |
| "rewards/margins": 10.298357963562012, |
| "rewards/rejected": -4.308909893035889, |
| "step": 4620 |
| }, |
| { |
| "epoch": 1.6351640120723459, |
| "grad_norm": 3.7321362303897905, |
| "learning_rate": 4.006146881038947e-07, |
| "logits/chosen": -0.7837023735046387, |
| "logits/rejected": -0.23206615447998047, |
| "logps/chosen": -109.95082092285156, |
| "logps/rejected": -467.3857116699219, |
| "loss": 0.7856, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.083016395568848, |
| "rewards/margins": 10.39222240447998, |
| "rewards/rejected": -4.309206008911133, |
| "step": 4640 |
| }, |
| { |
| "epoch": 1.6422135571563898, |
| "grad_norm": 3.761528498742703, |
| "learning_rate": 3.8571736514209477e-07, |
| "logits/chosen": -0.790883481502533, |
| "logits/rejected": -0.23554182052612305, |
| "logps/chosen": -108.13374328613281, |
| "logps/rejected": -467.5670471191406, |
| "loss": 0.7791, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.0900559425354, |
| "rewards/margins": 10.40097713470459, |
| "rewards/rejected": -4.310921669006348, |
| "step": 4660 |
| }, |
| { |
| "epoch": 1.6492631022404336, |
| "grad_norm": 3.8270268639175664, |
| "learning_rate": 3.710791219910409e-07, |
| "logits/chosen": -0.7919120788574219, |
| "logits/rejected": -0.23967795073986053, |
| "logps/chosen": -108.26678466796875, |
| "logps/rejected": -467.712158203125, |
| "loss": 0.7764, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.098334789276123, |
| "rewards/margins": 10.41071605682373, |
| "rewards/rejected": -4.312382698059082, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.6563126473244774, |
| "grad_norm": 3.736170667753776, |
| "learning_rate": 3.567017523907018e-07, |
| "logits/chosen": -0.7862281203269958, |
| "logits/rejected": -0.2402174472808838, |
| "logps/chosen": -106.91987609863281, |
| "logps/rejected": -467.6073303222656, |
| "loss": 0.7763, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.0048723220825195, |
| "rewards/margins": 10.316206932067871, |
| "rewards/rejected": -4.311334133148193, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.6633621924085211, |
| "grad_norm": 3.804063902145054, |
| "learning_rate": 3.425870181141394e-07, |
| "logits/chosen": -0.774181067943573, |
| "logits/rejected": -0.23379312455654144, |
| "logps/chosen": -109.593505859375, |
| "logps/rejected": -467.6258850097656, |
| "loss": 0.7874, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.03621768951416, |
| "rewards/margins": 10.347805976867676, |
| "rewards/rejected": -4.311588287353516, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.6704117374925649, |
| "grad_norm": 3.6994558144300425, |
| "learning_rate": 3.2873664875162997e-07, |
| "logits/chosen": -0.7831236720085144, |
| "logits/rejected": -0.22968469560146332, |
| "logps/chosen": -109.0361099243164, |
| "logps/rejected": -467.54327392578125, |
| "loss": 0.7804, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.145629405975342, |
| "rewards/margins": 10.456382751464844, |
| "rewards/rejected": -4.310754299163818, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.6774612825766089, |
| "grad_norm": 3.7255378247554765, |
| "learning_rate": 3.1515234149872123e-07, |
| "logits/chosen": -0.7805240154266357, |
| "logits/rejected": -0.2411789894104004, |
| "logps/chosen": -110.95159912109375, |
| "logps/rejected": -467.4502868652344, |
| "loss": 0.8005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.015338897705078, |
| "rewards/margins": 10.325087547302246, |
| "rewards/rejected": -4.30974817276001, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.6845108276606524, |
| "grad_norm": 3.8920066879065813, |
| "learning_rate": 3.018357609482603e-07, |
| "logits/chosen": -0.784945011138916, |
| "logits/rejected": -0.24280527234077454, |
| "logps/chosen": -108.68704986572266, |
| "logps/rejected": -467.4269104003906, |
| "loss": 0.7854, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.011569499969482, |
| "rewards/margins": 10.32103443145752, |
| "rewards/rejected": -4.309464931488037, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.6915603727446964, |
| "grad_norm": 3.72619208545696, |
| "learning_rate": 2.887885388864206e-07, |
| "logits/chosen": -0.7819967269897461, |
| "logits/rejected": -0.24671606719493866, |
| "logps/chosen": -109.5833511352539, |
| "logps/rejected": -467.4178161621094, |
| "loss": 0.7849, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.126197814941406, |
| "rewards/margins": 10.435675621032715, |
| "rewards/rejected": -4.309477806091309, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.6986099178287402, |
| "grad_norm": 3.7736322479661344, |
| "learning_rate": 2.760122740927429e-07, |
| "logits/chosen": -0.7813412547111511, |
| "logits/rejected": -0.24587781727313995, |
| "logps/chosen": -111.07955169677734, |
| "logps/rejected": -467.5440368652344, |
| "loss": 0.7956, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.071666717529297, |
| "rewards/margins": 10.382463455200195, |
| "rewards/rejected": -4.31079626083374, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.705659462912784, |
| "grad_norm": 3.7894965626963213, |
| "learning_rate": 2.6350853214422825e-07, |
| "logits/chosen": -0.7834030985832214, |
| "logits/rejected": -0.248988538980484, |
| "logps/chosen": -108.4309310913086, |
| "logps/rejected": -467.5397644042969, |
| "loss": 0.7789, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.071558952331543, |
| "rewards/margins": 10.382250785827637, |
| "rewards/rejected": -4.310691833496094, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.7127090079968277, |
| "grad_norm": 3.611336149501781, |
| "learning_rate": 2.512788452234921e-07, |
| "logits/chosen": -0.7827231287956238, |
| "logits/rejected": -0.24999628961086273, |
| "logps/chosen": -110.13957977294922, |
| "logps/rejected": -467.6318359375, |
| "loss": 0.7913, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.0582194328308105, |
| "rewards/margins": 10.369796752929688, |
| "rewards/rejected": -4.311577796936035, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.7197585530808714, |
| "grad_norm": 3.5996393260710535, |
| "learning_rate": 2.3932471193101546e-07, |
| "logits/chosen": -0.7880483865737915, |
| "logits/rejected": -0.2517296075820923, |
| "logps/chosen": -110.41153717041016, |
| "logps/rejected": -467.8431091308594, |
| "loss": 0.7877, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.099769115447998, |
| "rewards/margins": 10.413488388061523, |
| "rewards/rejected": -4.313718318939209, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.7268080981649154, |
| "grad_norm": 3.705968217028357, |
| "learning_rate": 2.2764759710150768e-07, |
| "logits/chosen": -0.7876673936843872, |
| "logits/rejected": -0.24836230278015137, |
| "logps/chosen": -107.29426574707031, |
| "logps/rejected": -467.8483581542969, |
| "loss": 0.7781, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.002329349517822, |
| "rewards/margins": 10.31602954864502, |
| "rewards/rejected": -4.313700199127197, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.733857643248959, |
| "grad_norm": 3.9354283285524443, |
| "learning_rate": 2.1624893162441179e-07, |
| "logits/chosen": -0.7937143445014954, |
| "logits/rejected": -0.24714671075344086, |
| "logps/chosen": -111.57144927978516, |
| "logps/rejected": -467.8736267089844, |
| "loss": 0.7945, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.093102931976318, |
| "rewards/margins": 10.407225608825684, |
| "rewards/rejected": -4.314122200012207, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.740907188333003, |
| "grad_norm": 3.6111104668103904, |
| "learning_rate": 2.0513011226856338e-07, |
| "logits/chosen": -0.7964981198310852, |
| "logits/rejected": -0.24506263434886932, |
| "logps/chosen": -107.6418228149414, |
| "logps/rejected": -467.8564147949219, |
| "loss": 0.7846, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.011826992034912, |
| "rewards/margins": 10.3255615234375, |
| "rewards/rejected": -4.313735485076904, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.7479567334170467, |
| "grad_norm": 3.9897264942811193, |
| "learning_rate": 1.94292501511035e-07, |
| "logits/chosen": -0.7860663533210754, |
| "logits/rejected": -0.24424245953559875, |
| "logps/chosen": -109.5399398803711, |
| "logps/rejected": -467.9327697753906, |
| "loss": 0.7766, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.140701770782471, |
| "rewards/margins": 10.455405235290527, |
| "rewards/rejected": -4.31470251083374, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.7550062785010905, |
| "grad_norm": 3.6959213997496243, |
| "learning_rate": 1.8373742737017975e-07, |
| "logits/chosen": -0.7847122550010681, |
| "logits/rejected": -0.24023690819740295, |
| "logps/chosen": -105.888671875, |
| "logps/rejected": -467.9982604980469, |
| "loss": 0.7668, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.049968719482422, |
| "rewards/margins": 10.365229606628418, |
| "rewards/rejected": -4.315260410308838, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.7620558235851342, |
| "grad_norm": 3.7782181080865986, |
| "learning_rate": 1.7346618324290105e-07, |
| "logits/chosen": -0.7863363027572632, |
| "logits/rejected": -0.24366001784801483, |
| "logps/chosen": -108.9438247680664, |
| "logps/rejected": -468.1618347167969, |
| "loss": 0.778, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.094727039337158, |
| "rewards/margins": 10.411661148071289, |
| "rewards/rejected": -4.316934585571289, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.769105368669178, |
| "grad_norm": 3.483830010875064, |
| "learning_rate": 1.634800277461593e-07, |
| "logits/chosen": -0.7824100255966187, |
| "logits/rejected": -0.24446825683116913, |
| "logps/chosen": -110.3822021484375, |
| "logps/rejected": -468.2240295410156, |
| "loss": 0.7862, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.0525736808776855, |
| "rewards/margins": 10.370158195495605, |
| "rewards/rejected": -4.31758451461792, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.776154913753222, |
| "grad_norm": 3.634525654068984, |
| "learning_rate": 1.5378018456274568e-07, |
| "logits/chosen": -0.7856306433677673, |
| "logits/rejected": -0.24479059875011444, |
| "logps/chosen": -109.49385070800781, |
| "logps/rejected": -468.20947265625, |
| "loss": 0.7786, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.074910640716553, |
| "rewards/margins": 10.392340660095215, |
| "rewards/rejected": -4.317430019378662, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.7832044588372655, |
| "grad_norm": 3.5533845133528943, |
| "learning_rate": 1.4436784229133444e-07, |
| "logits/chosen": -0.7944580912590027, |
| "logits/rejected": -0.24189452826976776, |
| "logps/chosen": -106.3443832397461, |
| "logps/rejected": -468.205078125, |
| "loss": 0.7666, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.092994213104248, |
| "rewards/margins": 10.410300254821777, |
| "rewards/rejected": -4.317306041717529, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.7902540039213095, |
| "grad_norm": 3.761310621400386, |
| "learning_rate": 1.35244154300834e-07, |
| "logits/chosen": -0.7947224974632263, |
| "logits/rejected": -0.24308596551418304, |
| "logps/chosen": -106.94145965576172, |
| "logps/rejected": -468.1993103027344, |
| "loss": 0.7762, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.0086212158203125, |
| "rewards/margins": 10.32580280303955, |
| "rewards/rejected": -4.3171820640563965, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.7973035490053533, |
| "grad_norm": 3.8649700277917898, |
| "learning_rate": 1.2641023858905476e-07, |
| "logits/chosen": -0.7907156348228455, |
| "logits/rejected": -0.24166785180568695, |
| "logps/chosen": -110.41487121582031, |
| "logps/rejected": -468.1279296875, |
| "loss": 0.7962, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.092523097991943, |
| "rewards/margins": 10.40918254852295, |
| "rewards/rejected": -4.316659450531006, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.804353094089397, |
| "grad_norm": 3.8300285347879295, |
| "learning_rate": 1.1786717764571464e-07, |
| "logits/chosen": -0.7850465178489685, |
| "logits/rejected": -0.23768572509288788, |
| "logps/chosen": -107.66456604003906, |
| "logps/rejected": -468.2193908691406, |
| "loss": 0.7835, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.02831506729126, |
| "rewards/margins": 10.345748901367188, |
| "rewards/rejected": -4.317434310913086, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.8114026391734408, |
| "grad_norm": 3.729330289057156, |
| "learning_rate": 1.0961601831978946e-07, |
| "logits/chosen": -0.7787120938301086, |
| "logits/rejected": -0.242794468998909, |
| "logps/chosen": -110.7241439819336, |
| "logps/rejected": -468.2672424316406, |
| "loss": 0.7869, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.073513031005859, |
| "rewards/margins": 10.3915433883667, |
| "rewards/rejected": -4.318031311035156, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.8184521842574846, |
| "grad_norm": 3.765132850408836, |
| "learning_rate": 1.0165777169123703e-07, |
| "logits/chosen": -0.7908723950386047, |
| "logits/rejected": -0.2450651377439499, |
| "logps/chosen": -107.59815979003906, |
| "logps/rejected": -468.2784118652344, |
| "loss": 0.7814, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.020245552062988, |
| "rewards/margins": 10.338305473327637, |
| "rewards/rejected": -4.318060398101807, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.8255017293415285, |
| "grad_norm": 3.8602406164077854, |
| "learning_rate": 9.399341294709957e-08, |
| "logits/chosen": -0.7792836427688599, |
| "logits/rejected": -0.24651508033275604, |
| "logps/chosen": -108.40166473388672, |
| "logps/rejected": -468.3190612792969, |
| "loss": 0.7774, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.076695919036865, |
| "rewards/margins": 10.395309448242188, |
| "rewards/rejected": -4.318613529205322, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.832551274425572, |
| "grad_norm": 3.700379682071922, |
| "learning_rate": 8.662388126200877e-08, |
| "logits/chosen": -0.7963529825210571, |
| "logits/rejected": -0.2502332627773285, |
| "logps/chosen": -110.1925277709961, |
| "logps/rejected": -468.2157287597656, |
| "loss": 0.782, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.111256122589111, |
| "rewards/margins": 10.428763389587402, |
| "rewards/rejected": -4.317507266998291, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.839600819509616, |
| "grad_norm": 3.933863492006353, |
| "learning_rate": 7.955007968309835e-08, |
| "logits/chosen": -0.7933685779571533, |
| "logits/rejected": -0.24959242343902588, |
| "logps/chosen": -109.4911117553711, |
| "logps/rejected": -468.35675048828125, |
| "loss": 0.7803, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.192666053771973, |
| "rewards/margins": 10.511618614196777, |
| "rewards/rejected": -4.318951606750488, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.8466503645936598, |
| "grad_norm": 3.7362448675611453, |
| "learning_rate": 7.277287501934794e-08, |
| "logits/chosen": -0.7918945550918579, |
| "logits/rejected": -0.24759231507778168, |
| "logps/chosen": -107.86492156982422, |
| "logps/rejected": -468.30352783203125, |
| "loss": 0.7757, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.056914806365967, |
| "rewards/margins": 10.37521743774414, |
| "rewards/rejected": -4.31830358505249, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.8536999096777036, |
| "grad_norm": 3.888128836007745, |
| "learning_rate": 6.629309773536735e-08, |
| "logits/chosen": -0.7919878363609314, |
| "logits/rejected": -0.24335601925849915, |
| "logps/chosen": -111.12847137451172, |
| "logps/rejected": -468.2657165527344, |
| "loss": 0.7844, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.172073841094971, |
| "rewards/margins": 10.490161895751953, |
| "rewards/rejected": -4.318088531494141, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.8607494547617474, |
| "grad_norm": 3.952148287689068, |
| "learning_rate": 6.011154184963092e-08, |
| "logits/chosen": -0.7919384241104126, |
| "logits/rejected": -0.23931005597114563, |
| "logps/chosen": -105.4823226928711, |
| "logps/rejected": -468.26806640625, |
| "loss": 0.7644, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.060011863708496, |
| "rewards/margins": 10.377952575683594, |
| "rewards/rejected": -4.317941188812256, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.8677989998457911, |
| "grad_norm": 3.748301603574638, |
| "learning_rate": 5.422896483718077e-08, |
| "logits/chosen": -0.796483039855957, |
| "logits/rejected": -0.24163733422756195, |
| "logps/chosen": -108.889892578125, |
| "logps/rejected": -468.3476257324219, |
| "loss": 0.786, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.064844608306885, |
| "rewards/margins": 10.383631706237793, |
| "rewards/rejected": -4.318787574768066, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.874848544929835, |
| "grad_norm": 3.7690566013848623, |
| "learning_rate": 4.864608753680861e-08, |
| "logits/chosen": -0.7904632091522217, |
| "logits/rejected": -0.23976294696331024, |
| "logps/chosen": -109.89836883544922, |
| "logps/rejected": -468.2937927246094, |
| "loss": 0.7855, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.115835666656494, |
| "rewards/margins": 10.434123039245605, |
| "rewards/rejected": -4.318286895751953, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.8818980900138786, |
| "grad_norm": 3.7426789606880466, |
| "learning_rate": 4.3363594062724444e-08, |
| "logits/chosen": -0.7964286804199219, |
| "logits/rejected": -0.2432793378829956, |
| "logps/chosen": -112.6116714477539, |
| "logps/rejected": -468.3102722167969, |
| "loss": 0.7944, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.161302089691162, |
| "rewards/margins": 10.479842185974121, |
| "rewards/rejected": -4.318540096282959, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.8889476350979226, |
| "grad_norm": 3.7547604537404653, |
| "learning_rate": 3.838213172072669e-08, |
| "logits/chosen": -0.7900466322898865, |
| "logits/rejected": -0.24007217586040497, |
| "logps/chosen": -112.05464172363281, |
| "logps/rejected": -468.3132019042969, |
| "loss": 0.7994, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.090831756591797, |
| "rewards/margins": 10.40927791595459, |
| "rewards/rejected": -4.318446636199951, |
| "step": 5360 |
| }, |
| { |
| "epoch": 1.8959971801819664, |
| "grad_norm": 3.8913448082346416, |
| "learning_rate": 3.370231092888365e-08, |
| "logits/chosen": -0.7889196872711182, |
| "logits/rejected": -0.23926615715026855, |
| "logps/chosen": -113.01226043701172, |
| "logps/rejected": -468.3741760253906, |
| "loss": 0.8007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.118028163909912, |
| "rewards/margins": 10.437150955200195, |
| "rewards/rejected": -4.319122314453125, |
| "step": 5380 |
| }, |
| { |
| "epoch": 1.9030467252660102, |
| "grad_norm": 3.9082849434048708, |
| "learning_rate": 2.9324705142732557e-08, |
| "logits/chosen": -0.7887745499610901, |
| "logits/rejected": -0.24104784429073334, |
| "logps/chosen": -108.8088150024414, |
| "logps/rejected": -468.35888671875, |
| "loss": 0.7816, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.0397725105285645, |
| "rewards/margins": 10.358664512634277, |
| "rewards/rejected": -4.318892002105713, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.9100962703500541, |
| "grad_norm": 3.774912938041044, |
| "learning_rate": 2.5249850785010743e-08, |
| "logits/chosen": -0.7838408350944519, |
| "logits/rejected": -0.24242563545703888, |
| "logps/chosen": -109.24266815185547, |
| "logps/rejected": -468.3182067871094, |
| "loss": 0.7807, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.064825534820557, |
| "rewards/margins": 10.383365631103516, |
| "rewards/rejected": -4.318539619445801, |
| "step": 5420 |
| }, |
| { |
| "epoch": 1.9171458154340977, |
| "grad_norm": 3.690894820094116, |
| "learning_rate": 2.1478247179923527e-08, |
| "logits/chosen": -0.7918921113014221, |
| "logits/rejected": -0.24019639194011688, |
| "logps/chosen": -108.5796890258789, |
| "logps/rejected": -468.2789611816406, |
| "loss": 0.7817, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.0598978996276855, |
| "rewards/margins": 10.377920150756836, |
| "rewards/rejected": -4.318021297454834, |
| "step": 5440 |
| }, |
| { |
| "epoch": 1.9241953605181417, |
| "grad_norm": 3.8565463435143443, |
| "learning_rate": 1.8010356491957038e-08, |
| "logits/chosen": -0.7949087023735046, |
| "logits/rejected": -0.2375962734222412, |
| "logps/chosen": -105.0801773071289, |
| "logps/rejected": -468.4278259277344, |
| "loss": 0.7635, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.9956889152526855, |
| "rewards/margins": 10.31513500213623, |
| "rewards/rejected": -4.319445610046387, |
| "step": 5460 |
| }, |
| { |
| "epoch": 1.9312449056021852, |
| "grad_norm": 3.7684906309960877, |
| "learning_rate": 1.484660366924684e-08, |
| "logits/chosen": -0.794578492641449, |
| "logits/rejected": -0.23898427188396454, |
| "logps/chosen": -109.71996307373047, |
| "logps/rejected": -468.4088439941406, |
| "loss": 0.7866, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.100671291351318, |
| "rewards/margins": 10.420153617858887, |
| "rewards/rejected": -4.319482326507568, |
| "step": 5480 |
| }, |
| { |
| "epoch": 1.9382944506862292, |
| "grad_norm": 3.719249551184422, |
| "learning_rate": 1.1987376391504601e-08, |
| "logits/chosen": -0.7956030964851379, |
| "logits/rejected": -0.240992933511734, |
| "logps/chosen": -108.1368408203125, |
| "logps/rejected": -468.4326171875, |
| "loss": 0.7728, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.102418422698975, |
| "rewards/margins": 10.422101020812988, |
| "rewards/rejected": -4.319683074951172, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.945343995770273, |
| "grad_norm": 3.6257082177562685, |
| "learning_rate": 9.433025022513309e-09, |
| "logits/chosen": -0.8015807271003723, |
| "logits/rejected": -0.24043157696723938, |
| "logps/chosen": -108.60029602050781, |
| "logps/rejected": -468.419921875, |
| "loss": 0.7834, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.090754985809326, |
| "rewards/margins": 10.410210609436035, |
| "rewards/rejected": -4.319455146789551, |
| "step": 5520 |
| }, |
| { |
| "epoch": 1.9523935408543167, |
| "grad_norm": 3.5711504630557824, |
| "learning_rate": 7.183862567194111e-09, |
| "logits/chosen": -0.792350709438324, |
| "logits/rejected": -0.23879127204418182, |
| "logps/chosen": -107.3970718383789, |
| "logps/rejected": -468.49072265625, |
| "loss": 0.7745, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.004356384277344, |
| "rewards/margins": 10.32451057434082, |
| "rewards/rejected": -4.320154190063477, |
| "step": 5540 |
| }, |
| { |
| "epoch": 1.9594430859383607, |
| "grad_norm": 3.9728705486510756, |
| "learning_rate": 5.2401646332508884e-09, |
| "logits/chosen": -0.7877883315086365, |
| "logits/rejected": -0.24027414619922638, |
| "logps/chosen": -108.71870422363281, |
| "logps/rejected": -468.4919128417969, |
| "loss": 0.7832, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.043579578399658, |
| "rewards/margins": 10.363776206970215, |
| "rewards/rejected": -4.320196628570557, |
| "step": 5560 |
| }, |
| { |
| "epoch": 1.9664926310224042, |
| "grad_norm": 3.826376351654586, |
| "learning_rate": 3.6021693973992135e-09, |
| "logits/chosen": -0.7913434505462646, |
| "logits/rejected": -0.23852479457855225, |
| "logps/chosen": -110.75666809082031, |
| "logps/rejected": -468.4978942871094, |
| "loss": 0.7869, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.171904563903809, |
| "rewards/margins": 10.492271423339844, |
| "rewards/rejected": -4.320366382598877, |
| "step": 5580 |
| }, |
| { |
| "epoch": 1.9735421761064482, |
| "grad_norm": 3.5362233127077127, |
| "learning_rate": 2.2700775761791416e-09, |
| "logits/chosen": -0.7925167083740234, |
| "logits/rejected": -0.23842616379261017, |
| "logps/chosen": -113.69185638427734, |
| "logps/rejected": -468.528564453125, |
| "loss": 0.8076, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.163191318511963, |
| "rewards/margins": 10.483903884887695, |
| "rewards/rejected": -4.320712566375732, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.9805917211904918, |
| "grad_norm": 4.004084496355029, |
| "learning_rate": 1.2440524013607181e-09, |
| "logits/chosen": -0.7958860397338867, |
| "logits/rejected": -0.23961055278778076, |
| "logps/chosen": -114.5505599975586, |
| "logps/rejected": -468.49609375, |
| "loss": 0.8084, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.199494361877441, |
| "rewards/margins": 10.519908905029297, |
| "rewards/rejected": -4.320415019989014, |
| "step": 5620 |
| }, |
| { |
| "epoch": 1.9876412662745357, |
| "grad_norm": 3.907140951530193, |
| "learning_rate": 5.242195999421995e-10, |
| "logits/chosen": -0.8031083941459656, |
| "logits/rejected": -0.2381582111120224, |
| "logps/chosen": -110.63336181640625, |
| "logps/rejected": -468.484375, |
| "loss": 0.7892, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.1121954917907715, |
| "rewards/margins": 10.432402610778809, |
| "rewards/rejected": -4.320207118988037, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.9946908113585795, |
| "grad_norm": 3.7397525668561244, |
| "learning_rate": 1.1066737874210199e-10, |
| "logits/chosen": -0.7896060943603516, |
| "logits/rejected": -0.23894786834716797, |
| "logps/chosen": -103.44597625732422, |
| "logps/rejected": -468.4378967285156, |
| "loss": 0.746, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.9833083152771, |
| "rewards/margins": 10.302966117858887, |
| "rewards/rejected": -4.319657802581787, |
| "step": 5660 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 5676, |
| "total_flos": 60766247190528.0, |
| "train_loss": 0.8989143536240387, |
| "train_runtime": 34286.54, |
| "train_samples_per_second": 5.296, |
| "train_steps_per_second": 0.166 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 5676, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 60766247190528.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|