| { |
| "best_global_step": 2800, |
| "best_metric": 0.4922027885913849, |
| "best_model_checkpoint": "/workspace/adversarial-rlhf/runs/dpo-mistral-7b-sft-20251109-1358/checkpoints/checkpoint-2800", |
| "epoch": 1.0, |
| "eval_steps": 400, |
| "global_step": 3821, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0013085579691180318, |
| "grad_norm": 8.714019775390625, |
| "learning_rate": 1.9979063072494113e-05, |
| "logits/chosen": -3.054150104522705, |
| "logits/rejected": -2.987114429473877, |
| "logps/chosen": -285.14349365234375, |
| "logps/rejected": -257.39349365234375, |
| "loss": 0.6783, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.7019270062446594, |
| "rewards/margins": 0.2313508689403534, |
| "rewards/rejected": -0.9332779049873352, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0026171159382360636, |
| "grad_norm": 9.763724327087402, |
| "learning_rate": 1.995289191311175e-05, |
| "logits/chosen": -3.0378708839416504, |
| "logits/rejected": -3.088167667388916, |
| "logps/chosen": -266.4122619628906, |
| "logps/rejected": -260.312255859375, |
| "loss": 0.6071, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.1473495066165924, |
| "rewards/margins": 0.6945368647575378, |
| "rewards/rejected": -0.8418864011764526, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.003925673907354096, |
| "grad_norm": 7.875804901123047, |
| "learning_rate": 1.9926720753729393e-05, |
| "logits/chosen": -2.932835102081299, |
| "logits/rejected": -2.925532341003418, |
| "logps/chosen": -251.9230499267578, |
| "logps/rejected": -243.9307861328125, |
| "loss": 0.5651, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.1294022798538208, |
| "rewards/margins": 0.73015958070755, |
| "rewards/rejected": -0.8595618009567261, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.005234231876472127, |
| "grad_norm": 12.238419532775879, |
| "learning_rate": 1.990054959434703e-05, |
| "logits/chosen": -2.963040828704834, |
| "logits/rejected": -2.9602060317993164, |
| "logps/chosen": -282.7713623046875, |
| "logps/rejected": -284.95208740234375, |
| "loss": 0.7029, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.3746412992477417, |
| "rewards/margins": 0.4599940776824951, |
| "rewards/rejected": -0.8346353769302368, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00654278984559016, |
| "grad_norm": 22.53215217590332, |
| "learning_rate": 1.987437843496467e-05, |
| "logits/chosen": -2.866917848587036, |
| "logits/rejected": -3.069469928741455, |
| "logps/chosen": -254.5303192138672, |
| "logps/rejected": -276.4834289550781, |
| "loss": 0.7576, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.47834309935569763, |
| "rewards/margins": 0.296063095331192, |
| "rewards/rejected": -0.7744062542915344, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.007851347814708191, |
| "grad_norm": 12.339269638061523, |
| "learning_rate": 1.9848207275582308e-05, |
| "logits/chosen": -2.9572787284851074, |
| "logits/rejected": -2.9429330825805664, |
| "logps/chosen": -357.49896240234375, |
| "logps/rejected": -307.7606201171875, |
| "loss": 0.6489, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.26222458481788635, |
| "rewards/margins": 0.576259970664978, |
| "rewards/rejected": -0.31403541564941406, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.009159905783826224, |
| "grad_norm": 14.245732307434082, |
| "learning_rate": 1.982203611619995e-05, |
| "logits/chosen": -2.901461362838745, |
| "logits/rejected": -2.9073104858398438, |
| "logps/chosen": -296.9358215332031, |
| "logps/rejected": -288.05694580078125, |
| "loss": 0.5916, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.48122453689575195, |
| "rewards/margins": 0.6745239496231079, |
| "rewards/rejected": -0.19329944252967834, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.010468463752944255, |
| "grad_norm": 9.731245040893555, |
| "learning_rate": 1.979586495681759e-05, |
| "logits/chosen": -3.002631425857544, |
| "logits/rejected": -2.911168336868286, |
| "logps/chosen": -294.17767333984375, |
| "logps/rejected": -266.30242919921875, |
| "loss": 0.6211, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.7887242436408997, |
| "rewards/margins": 0.6125372052192688, |
| "rewards/rejected": 0.17618700861930847, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.011777021722062287, |
| "grad_norm": 10.440970420837402, |
| "learning_rate": 1.9769693797435227e-05, |
| "logits/chosen": -2.9317564964294434, |
| "logits/rejected": -2.990628719329834, |
| "logps/chosen": -270.26019287109375, |
| "logps/rejected": -251.9040985107422, |
| "loss": 0.687, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.7482277154922485, |
| "rewards/margins": 0.3524876832962036, |
| "rewards/rejected": 0.39574000239372253, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.01308557969118032, |
| "grad_norm": 31.515274047851562, |
| "learning_rate": 1.974352263805287e-05, |
| "logits/chosen": -3.0300424098968506, |
| "logits/rejected": -2.9904026985168457, |
| "logps/chosen": -274.59783935546875, |
| "logps/rejected": -304.90802001953125, |
| "loss": 0.6521, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.6415464878082275, |
| "rewards/margins": 0.4436315596103668, |
| "rewards/rejected": 0.19791492819786072, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.014394137660298352, |
| "grad_norm": 11.78197956085205, |
| "learning_rate": 1.9717351478670507e-05, |
| "logits/chosen": -2.8813371658325195, |
| "logits/rejected": -2.969799757003784, |
| "logps/chosen": -311.2370300292969, |
| "logps/rejected": -267.30950927734375, |
| "loss": 0.7051, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.6925315856933594, |
| "rewards/margins": 0.44889575242996216, |
| "rewards/rejected": 0.243635892868042, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.015702695629416383, |
| "grad_norm": 10.35225772857666, |
| "learning_rate": 1.9691180319288145e-05, |
| "logits/chosen": -3.0078067779541016, |
| "logits/rejected": -2.9517364501953125, |
| "logps/chosen": -283.88946533203125, |
| "logps/rejected": -305.6519775390625, |
| "loss": 0.6004, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.9307526350021362, |
| "rewards/margins": 0.6657913327217102, |
| "rewards/rejected": 0.2649613320827484, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.017011253598534413, |
| "grad_norm": 8.242342948913574, |
| "learning_rate": 1.9665009159905787e-05, |
| "logits/chosen": -3.02073335647583, |
| "logits/rejected": -3.0664925575256348, |
| "logps/chosen": -242.3530731201172, |
| "logps/rejected": -191.35472106933594, |
| "loss": 0.5493, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.9005786180496216, |
| "rewards/margins": 0.8057304620742798, |
| "rewards/rejected": 0.09484807401895523, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.018319811567652448, |
| "grad_norm": 9.840733528137207, |
| "learning_rate": 1.9638838000523425e-05, |
| "logits/chosen": -2.8799917697906494, |
| "logits/rejected": -3.000739812850952, |
| "logps/chosen": -234.18637084960938, |
| "logps/rejected": -286.62249755859375, |
| "loss": 0.4875, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.5168167352676392, |
| "rewards/margins": 1.134171724319458, |
| "rewards/rejected": -0.6173551678657532, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01962836953677048, |
| "grad_norm": 10.049373626708984, |
| "learning_rate": 1.9612666841141064e-05, |
| "logits/chosen": -3.011906862258911, |
| "logits/rejected": -3.059084892272949, |
| "logps/chosen": -290.17071533203125, |
| "logps/rejected": -269.0260009765625, |
| "loss": 0.6103, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.2669380307197571, |
| "rewards/margins": 0.9674070477485657, |
| "rewards/rejected": -0.700468897819519, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.02093692750588851, |
| "grad_norm": 11.49927806854248, |
| "learning_rate": 1.9586495681758702e-05, |
| "logits/chosen": -3.0820119380950928, |
| "logits/rejected": -3.016047954559326, |
| "logps/chosen": -276.62841796875, |
| "logps/rejected": -263.7878723144531, |
| "loss": 0.6079, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.20246413350105286, |
| "rewards/margins": 1.208956003189087, |
| "rewards/rejected": -1.4114201068878174, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.022245485475006543, |
| "grad_norm": 9.35627269744873, |
| "learning_rate": 1.9560324522376344e-05, |
| "logits/chosen": -2.9503092765808105, |
| "logits/rejected": -2.965207576751709, |
| "logps/chosen": -276.2810974121094, |
| "logps/rejected": -255.14111328125, |
| "loss": 0.5773, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.09604328870773315, |
| "rewards/margins": 1.1082497835159302, |
| "rewards/rejected": -1.0122063159942627, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.023554043444124574, |
| "grad_norm": 8.25130844116211, |
| "learning_rate": 1.9534153362993982e-05, |
| "logits/chosen": -3.0043704509735107, |
| "logits/rejected": -3.0347070693969727, |
| "logps/chosen": -252.2435302734375, |
| "logps/rejected": -264.5854797363281, |
| "loss": 0.5873, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.10906165838241577, |
| "rewards/margins": 0.7212487459182739, |
| "rewards/rejected": -0.6121870279312134, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.02486260141324261, |
| "grad_norm": 16.03078842163086, |
| "learning_rate": 1.950798220361162e-05, |
| "logits/chosen": -3.1061108112335205, |
| "logits/rejected": -3.0602822303771973, |
| "logps/chosen": -257.32159423828125, |
| "logps/rejected": -257.50579833984375, |
| "loss": 0.5985, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.06869806349277496, |
| "rewards/margins": 0.6951006650924683, |
| "rewards/rejected": -0.6264026165008545, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.02617115938236064, |
| "grad_norm": 14.365320205688477, |
| "learning_rate": 1.9481811044229262e-05, |
| "logits/chosen": -2.9251151084899902, |
| "logits/rejected": -2.903024196624756, |
| "logps/chosen": -272.07733154296875, |
| "logps/rejected": -242.03759765625, |
| "loss": 0.6797, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.20394983887672424, |
| "rewards/margins": 0.6385009288787842, |
| "rewards/rejected": -0.43455109000205994, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02747971735147867, |
| "grad_norm": 7.471345901489258, |
| "learning_rate": 1.94556398848469e-05, |
| "logits/chosen": -2.9216761589050293, |
| "logits/rejected": -2.9945147037506104, |
| "logps/chosen": -263.98724365234375, |
| "logps/rejected": -267.64422607421875, |
| "loss": 0.5756, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.037400662899017334, |
| "rewards/margins": 0.7236355543136597, |
| "rewards/rejected": -0.7610361576080322, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.028788275320596704, |
| "grad_norm": 11.088753700256348, |
| "learning_rate": 1.942946872546454e-05, |
| "logits/chosen": -2.968885898590088, |
| "logits/rejected": -3.0910255908966064, |
| "logps/chosen": -284.25360107421875, |
| "logps/rejected": -248.49545288085938, |
| "loss": 0.6703, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.018188726156949997, |
| "rewards/margins": 0.46626266837120056, |
| "rewards/rejected": -0.48445138335227966, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.030096833289714735, |
| "grad_norm": 8.927261352539062, |
| "learning_rate": 1.940329756608218e-05, |
| "logits/chosen": -2.9310824871063232, |
| "logits/rejected": -3.0046865940093994, |
| "logps/chosen": -266.04168701171875, |
| "logps/rejected": -239.96011352539062, |
| "loss": 0.4949, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.1395626664161682, |
| "rewards/margins": 1.0587072372436523, |
| "rewards/rejected": -1.1982699632644653, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.031405391258832765, |
| "grad_norm": 9.871482849121094, |
| "learning_rate": 1.937712640669982e-05, |
| "logits/chosen": -3.0028929710388184, |
| "logits/rejected": -2.9868929386138916, |
| "logps/chosen": -302.8767395019531, |
| "logps/rejected": -311.38995361328125, |
| "loss": 0.6321, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.15047518908977509, |
| "rewards/margins": 0.8277810215950012, |
| "rewards/rejected": -0.9782562255859375, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.032713949227950796, |
| "grad_norm": 9.17164421081543, |
| "learning_rate": 1.9350955247317458e-05, |
| "logits/chosen": -2.9586594104766846, |
| "logits/rejected": -2.9300732612609863, |
| "logps/chosen": -258.01080322265625, |
| "logps/rejected": -308.2096862792969, |
| "loss": 0.5535, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.07981396466493607, |
| "rewards/margins": 1.1827664375305176, |
| "rewards/rejected": -1.2625802755355835, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.03402250719706883, |
| "grad_norm": 10.289148330688477, |
| "learning_rate": 1.93247840879351e-05, |
| "logits/chosen": -2.9960761070251465, |
| "logits/rejected": -3.059537172317505, |
| "logps/chosen": -254.6339874267578, |
| "logps/rejected": -250.0092315673828, |
| "loss": 0.6809, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.0245519932359457, |
| "rewards/margins": 0.7872114181518555, |
| "rewards/rejected": -0.8117634654045105, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.035331065166186865, |
| "grad_norm": 7.545676231384277, |
| "learning_rate": 1.9298612928552734e-05, |
| "logits/chosen": -2.88844633102417, |
| "logits/rejected": -2.9349312782287598, |
| "logps/chosen": -257.82183837890625, |
| "logps/rejected": -272.8002014160156, |
| "loss": 0.5251, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.11282087862491608, |
| "rewards/margins": 1.0913177728652954, |
| "rewards/rejected": -0.9784967303276062, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.036639623135304895, |
| "grad_norm": 9.456621170043945, |
| "learning_rate": 1.9272441769170376e-05, |
| "logits/chosen": -2.9461770057678223, |
| "logits/rejected": -3.001709461212158, |
| "logps/chosen": -276.93646240234375, |
| "logps/rejected": -288.7925109863281, |
| "loss": 0.5606, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.20001547038555145, |
| "rewards/margins": 1.2281715869903564, |
| "rewards/rejected": -1.0281562805175781, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.037948181104422926, |
| "grad_norm": 8.707345962524414, |
| "learning_rate": 1.9246270609788015e-05, |
| "logits/chosen": -3.082422971725464, |
| "logits/rejected": -3.10573673248291, |
| "logps/chosen": -270.7345275878906, |
| "logps/rejected": -260.58013916015625, |
| "loss": 0.6272, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.25665849447250366, |
| "rewards/margins": 1.013261079788208, |
| "rewards/rejected": -1.2699196338653564, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.03925673907354096, |
| "grad_norm": 7.2488203048706055, |
| "learning_rate": 1.9220099450405653e-05, |
| "logits/chosen": -2.9218993186950684, |
| "logits/rejected": -2.962679386138916, |
| "logps/chosen": -242.43215942382812, |
| "logps/rejected": -234.44113159179688, |
| "loss": 0.5128, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.46268120408058167, |
| "rewards/margins": 1.0108039379119873, |
| "rewards/rejected": -1.4734852313995361, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04056529704265899, |
| "grad_norm": 6.86571741104126, |
| "learning_rate": 1.9193928291023295e-05, |
| "logits/chosen": -2.927309513092041, |
| "logits/rejected": -3.0099613666534424, |
| "logps/chosen": -274.22613525390625, |
| "logps/rejected": -243.80191040039062, |
| "loss": 0.4807, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.6335469484329224, |
| "rewards/margins": 1.2637865543365479, |
| "rewards/rejected": -1.8973333835601807, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.04187385501177702, |
| "grad_norm": 6.156006336212158, |
| "learning_rate": 1.9167757131640933e-05, |
| "logits/chosen": -2.88226580619812, |
| "logits/rejected": -2.913170099258423, |
| "logps/chosen": -277.84765625, |
| "logps/rejected": -283.1264343261719, |
| "loss": 0.549, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.8374231457710266, |
| "rewards/margins": 1.1124131679534912, |
| "rewards/rejected": -1.9498363733291626, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.043182412980895056, |
| "grad_norm": 13.543861389160156, |
| "learning_rate": 1.914158597225857e-05, |
| "logits/chosen": -3.1270055770874023, |
| "logits/rejected": -3.066056728363037, |
| "logps/chosen": -258.69879150390625, |
| "logps/rejected": -248.0587921142578, |
| "loss": 0.5938, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.8121991157531738, |
| "rewards/margins": 0.9060171246528625, |
| "rewards/rejected": -1.7182163000106812, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.04449097095001309, |
| "grad_norm": 11.699880599975586, |
| "learning_rate": 1.9115414812876213e-05, |
| "logits/chosen": -2.890881299972534, |
| "logits/rejected": -2.9870123863220215, |
| "logps/chosen": -280.4810485839844, |
| "logps/rejected": -268.2397155761719, |
| "loss": 0.5342, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.5369232892990112, |
| "rewards/margins": 1.2705774307250977, |
| "rewards/rejected": -1.8075008392333984, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.04579952891913112, |
| "grad_norm": 6.853356838226318, |
| "learning_rate": 1.908924365349385e-05, |
| "logits/chosen": -2.9231629371643066, |
| "logits/rejected": -2.9792449474334717, |
| "logps/chosen": -299.10687255859375, |
| "logps/rejected": -262.38604736328125, |
| "loss": 0.385, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.3845639228820801, |
| "rewards/margins": 1.6383775472640991, |
| "rewards/rejected": -2.0229413509368896, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.04710808688824915, |
| "grad_norm": 13.25985050201416, |
| "learning_rate": 1.906307249411149e-05, |
| "logits/chosen": -3.018812894821167, |
| "logits/rejected": -3.0761470794677734, |
| "logps/chosen": -262.8817443847656, |
| "logps/rejected": -241.5967559814453, |
| "loss": 0.7499, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.7485982775688171, |
| "rewards/margins": 0.9674726724624634, |
| "rewards/rejected": -1.7160708904266357, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.04841664485736718, |
| "grad_norm": 8.176429748535156, |
| "learning_rate": 1.9036901334729128e-05, |
| "logits/chosen": -2.7412192821502686, |
| "logits/rejected": -2.875842571258545, |
| "logps/chosen": -287.76531982421875, |
| "logps/rejected": -286.1810302734375, |
| "loss": 0.5304, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.9107913970947266, |
| "rewards/margins": 1.261719822883606, |
| "rewards/rejected": -2.172511339187622, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.04972520282648522, |
| "grad_norm": 6.8885416984558105, |
| "learning_rate": 1.901073017534677e-05, |
| "logits/chosen": -2.882361888885498, |
| "logits/rejected": -2.919163227081299, |
| "logps/chosen": -283.5059509277344, |
| "logps/rejected": -279.5335998535156, |
| "loss": 0.539, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.9788363575935364, |
| "rewards/margins": 1.1429237127304077, |
| "rewards/rejected": -2.1217598915100098, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.05103376079560325, |
| "grad_norm": 7.295929908752441, |
| "learning_rate": 1.898455901596441e-05, |
| "logits/chosen": -2.9931766986846924, |
| "logits/rejected": -3.0010311603546143, |
| "logps/chosen": -278.93463134765625, |
| "logps/rejected": -269.3666076660156, |
| "loss": 0.4351, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.090458631515503, |
| "rewards/margins": 1.1227349042892456, |
| "rewards/rejected": -2.213193893432617, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.05234231876472128, |
| "grad_norm": 7.319633483886719, |
| "learning_rate": 1.8958387856582047e-05, |
| "logits/chosen": -2.991227626800537, |
| "logits/rejected": -2.9963135719299316, |
| "logps/chosen": -261.43463134765625, |
| "logps/rejected": -284.7808837890625, |
| "loss": 0.479, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.3308995962142944, |
| "rewards/margins": 1.1909126043319702, |
| "rewards/rejected": -2.5218122005462646, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05365087673383931, |
| "grad_norm": 8.167856216430664, |
| "learning_rate": 1.893221669719969e-05, |
| "logits/chosen": -3.0124361515045166, |
| "logits/rejected": -3.044581890106201, |
| "logps/chosen": -263.484619140625, |
| "logps/rejected": -291.4689636230469, |
| "loss": 0.6073, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.635348916053772, |
| "rewards/margins": 0.6897547245025635, |
| "rewards/rejected": -2.325103521347046, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.05495943470295734, |
| "grad_norm": 10.466708183288574, |
| "learning_rate": 1.8906045537817327e-05, |
| "logits/chosen": -2.9249165058135986, |
| "logits/rejected": -3.049833059310913, |
| "logps/chosen": -294.9049072265625, |
| "logps/rejected": -312.395263671875, |
| "loss": 0.5377, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.1728683710098267, |
| "rewards/margins": 1.2662220001220703, |
| "rewards/rejected": -2.4390902519226074, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.05626799267207537, |
| "grad_norm": 11.2478666305542, |
| "learning_rate": 1.8879874378434965e-05, |
| "logits/chosen": -2.9483745098114014, |
| "logits/rejected": -2.929673194885254, |
| "logps/chosen": -274.67230224609375, |
| "logps/rejected": -265.0277404785156, |
| "loss": 0.748, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -1.8842582702636719, |
| "rewards/margins": 0.5387625694274902, |
| "rewards/rejected": -2.423020839691162, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.05757655064119341, |
| "grad_norm": 9.180577278137207, |
| "learning_rate": 1.8853703219052607e-05, |
| "logits/chosen": -2.966609477996826, |
| "logits/rejected": -3.0011610984802246, |
| "logps/chosen": -294.72418212890625, |
| "logps/rejected": -295.13134765625, |
| "loss": 0.4851, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.5100880861282349, |
| "rewards/margins": 1.0742136240005493, |
| "rewards/rejected": -2.5843019485473633, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.05888510861031144, |
| "grad_norm": 9.102991104125977, |
| "learning_rate": 1.8827532059670245e-05, |
| "logits/chosen": -3.008378028869629, |
| "logits/rejected": -3.0497703552246094, |
| "logps/chosen": -292.7835388183594, |
| "logps/rejected": -260.39215087890625, |
| "loss": 0.526, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.0618221759796143, |
| "rewards/margins": 1.2599637508392334, |
| "rewards/rejected": -2.3217859268188477, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.06019366657942947, |
| "grad_norm": 7.269233226776123, |
| "learning_rate": 1.8801360900287884e-05, |
| "logits/chosen": -2.9787087440490723, |
| "logits/rejected": -3.0631184577941895, |
| "logps/chosen": -322.45257568359375, |
| "logps/rejected": -326.454345703125, |
| "loss": 0.5484, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.6545368432998657, |
| "rewards/margins": 1.1309837102890015, |
| "rewards/rejected": -1.7855205535888672, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0615022245485475, |
| "grad_norm": 10.517141342163086, |
| "learning_rate": 1.8775189740905526e-05, |
| "logits/chosen": -2.922563076019287, |
| "logits/rejected": -2.997480869293213, |
| "logps/chosen": -262.8779602050781, |
| "logps/rejected": -304.7298278808594, |
| "loss": 0.5244, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.6560412645339966, |
| "rewards/margins": 1.1671555042266846, |
| "rewards/rejected": -1.8231967687606812, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.06281078251766553, |
| "grad_norm": 6.655497074127197, |
| "learning_rate": 1.8749018581523164e-05, |
| "logits/chosen": -2.994204521179199, |
| "logits/rejected": -3.054076671600342, |
| "logps/chosen": -240.95181274414062, |
| "logps/rejected": -258.10052490234375, |
| "loss": 0.5248, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.3981846868991852, |
| "rewards/margins": 1.1931384801864624, |
| "rewards/rejected": -1.5913231372833252, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.06411934048678357, |
| "grad_norm": 13.732154846191406, |
| "learning_rate": 1.8722847422140802e-05, |
| "logits/chosen": -2.8550612926483154, |
| "logits/rejected": -3.0134494304656982, |
| "logps/chosen": -261.8082580566406, |
| "logps/rejected": -251.3233184814453, |
| "loss": 0.576, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.26595190167427063, |
| "rewards/margins": 1.036379337310791, |
| "rewards/rejected": -1.3023312091827393, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.06542789845590159, |
| "grad_norm": 6.477427005767822, |
| "learning_rate": 1.869667626275844e-05, |
| "logits/chosen": -2.8698863983154297, |
| "logits/rejected": -3.006420850753784, |
| "logps/chosen": -277.37860107421875, |
| "logps/rejected": -293.40899658203125, |
| "loss": 0.4292, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.00966604333370924, |
| "rewards/margins": 1.26931631565094, |
| "rewards/rejected": -1.278982400894165, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.06673645642501963, |
| "grad_norm": 4.57761812210083, |
| "learning_rate": 1.8670505103376082e-05, |
| "logits/chosen": -3.044351100921631, |
| "logits/rejected": -3.0704281330108643, |
| "logps/chosen": -306.3280029296875, |
| "logps/rejected": -274.84344482421875, |
| "loss": 0.5014, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.05158301070332527, |
| "rewards/margins": 1.2034839391708374, |
| "rewards/rejected": -1.1519008874893188, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.06804501439413765, |
| "grad_norm": 10.730956077575684, |
| "learning_rate": 1.864433394399372e-05, |
| "logits/chosen": -2.7571253776550293, |
| "logits/rejected": -2.827652931213379, |
| "logps/chosen": -326.50445556640625, |
| "logps/rejected": -288.9650573730469, |
| "loss": 0.4991, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.46068209409713745, |
| "rewards/margins": 1.4844446182250977, |
| "rewards/rejected": -1.9451268911361694, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.06935357236325569, |
| "grad_norm": 7.003467082977295, |
| "learning_rate": 1.861816278461136e-05, |
| "logits/chosen": -2.933865547180176, |
| "logits/rejected": -3.005654811859131, |
| "logps/chosen": -247.2929229736328, |
| "logps/rejected": -237.02456665039062, |
| "loss": 0.5385, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.32561081647872925, |
| "rewards/margins": 1.441275954246521, |
| "rewards/rejected": -1.7668869495391846, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.07066213033237373, |
| "grad_norm": 9.393101692199707, |
| "learning_rate": 1.8591991625229e-05, |
| "logits/chosen": -2.863271713256836, |
| "logits/rejected": -2.964902877807617, |
| "logps/chosen": -292.7139892578125, |
| "logps/rejected": -270.59466552734375, |
| "loss": 0.5143, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.8659523129463196, |
| "rewards/margins": 1.305040717124939, |
| "rewards/rejected": -2.1709930896759033, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.07197068830149175, |
| "grad_norm": 7.726457595825195, |
| "learning_rate": 1.856582046584664e-05, |
| "logits/chosen": -2.9840664863586426, |
| "logits/rejected": -3.0086734294891357, |
| "logps/chosen": -323.1566467285156, |
| "logps/rejected": -292.984375, |
| "loss": 0.488, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.8014192581176758, |
| "rewards/margins": 1.3756850957870483, |
| "rewards/rejected": -2.1771044731140137, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.07327924627060979, |
| "grad_norm": 11.760858535766602, |
| "learning_rate": 1.8539649306464278e-05, |
| "logits/chosen": -3.0070672035217285, |
| "logits/rejected": -3.066682815551758, |
| "logps/chosen": -269.5534362792969, |
| "logps/rejected": -247.11062622070312, |
| "loss": 0.5206, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.8253080248832703, |
| "rewards/margins": 1.3371561765670776, |
| "rewards/rejected": -2.1624643802642822, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.07458780423972781, |
| "grad_norm": 8.769774436950684, |
| "learning_rate": 1.851347814708192e-05, |
| "logits/chosen": -2.873136281967163, |
| "logits/rejected": -2.9881954193115234, |
| "logps/chosen": -270.26043701171875, |
| "logps/rejected": -258.38629150390625, |
| "loss": 0.5568, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.9168025851249695, |
| "rewards/margins": 1.2144734859466553, |
| "rewards/rejected": -2.1312763690948486, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.07589636220884585, |
| "grad_norm": 9.61667251586914, |
| "learning_rate": 1.8487306987699554e-05, |
| "logits/chosen": -2.9167819023132324, |
| "logits/rejected": -2.8649868965148926, |
| "logps/chosen": -285.44305419921875, |
| "logps/rejected": -276.2369384765625, |
| "loss": 0.5117, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.127011775970459, |
| "rewards/margins": 1.2268073558807373, |
| "rewards/rejected": -2.3538193702697754, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.07720492017796389, |
| "grad_norm": 9.920550346374512, |
| "learning_rate": 1.8461135828317196e-05, |
| "logits/chosen": -2.9163198471069336, |
| "logits/rejected": -2.803847074508667, |
| "logps/chosen": -303.9400939941406, |
| "logps/rejected": -318.45843505859375, |
| "loss": 0.6123, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.005907654762268, |
| "rewards/margins": 0.9369108080863953, |
| "rewards/rejected": -1.9428186416625977, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.07851347814708191, |
| "grad_norm": 8.180704116821289, |
| "learning_rate": 1.8434964668934835e-05, |
| "logits/chosen": -2.8901426792144775, |
| "logits/rejected": -2.998375415802002, |
| "logps/chosen": -291.26788330078125, |
| "logps/rejected": -256.09100341796875, |
| "loss": 0.6608, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -1.0462286472320557, |
| "rewards/margins": 0.6313639879226685, |
| "rewards/rejected": -1.6775926351547241, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07982203611619995, |
| "grad_norm": 8.73984146118164, |
| "learning_rate": 1.8408793509552473e-05, |
| "logits/chosen": -2.893742322921753, |
| "logits/rejected": -2.846432685852051, |
| "logps/chosen": -326.13775634765625, |
| "logps/rejected": -305.88873291015625, |
| "loss": 0.5684, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.74317467212677, |
| "rewards/margins": 0.9149740934371948, |
| "rewards/rejected": -1.658149003982544, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.08113059408531798, |
| "grad_norm": 10.25295639038086, |
| "learning_rate": 1.8382622350170115e-05, |
| "logits/chosen": -2.9782931804656982, |
| "logits/rejected": -3.011655330657959, |
| "logps/chosen": -317.3922119140625, |
| "logps/rejected": -312.5651550292969, |
| "loss": 0.6222, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.720477283000946, |
| "rewards/margins": 1.068196415901184, |
| "rewards/rejected": -1.7886736392974854, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.08243915205443601, |
| "grad_norm": 5.97982120513916, |
| "learning_rate": 1.8356451190787753e-05, |
| "logits/chosen": -2.924208879470825, |
| "logits/rejected": -2.9455435276031494, |
| "logps/chosen": -293.01611328125, |
| "logps/rejected": -308.312255859375, |
| "loss": 0.442, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.49260687828063965, |
| "rewards/margins": 1.3362938165664673, |
| "rewards/rejected": -1.8289005756378174, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.08374771002355404, |
| "grad_norm": 10.534534454345703, |
| "learning_rate": 1.833028003140539e-05, |
| "logits/chosen": -2.9009976387023926, |
| "logits/rejected": -2.8593392372131348, |
| "logps/chosen": -241.1309356689453, |
| "logps/rejected": -300.59051513671875, |
| "loss": 0.5321, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.9334520101547241, |
| "rewards/margins": 1.1237605810165405, |
| "rewards/rejected": -2.0572123527526855, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.08505626799267207, |
| "grad_norm": 5.939411163330078, |
| "learning_rate": 1.8304108872023033e-05, |
| "logits/chosen": -2.9997639656066895, |
| "logits/rejected": -3.0202760696411133, |
| "logps/chosen": -269.6910400390625, |
| "logps/rejected": -314.3893127441406, |
| "loss": 0.5945, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.7280455827713013, |
| "rewards/margins": 1.2291756868362427, |
| "rewards/rejected": -1.957221269607544, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.08636482596179011, |
| "grad_norm": 11.953298568725586, |
| "learning_rate": 1.827793771264067e-05, |
| "logits/chosen": -2.9474053382873535, |
| "logits/rejected": -2.8666484355926514, |
| "logps/chosen": -305.43157958984375, |
| "logps/rejected": -280.44232177734375, |
| "loss": 0.6738, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.7594338059425354, |
| "rewards/margins": 1.0998165607452393, |
| "rewards/rejected": -1.8592504262924194, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.08767338393090814, |
| "grad_norm": 5.373962879180908, |
| "learning_rate": 1.825176655325831e-05, |
| "logits/chosen": -2.947441816329956, |
| "logits/rejected": -3.092377185821533, |
| "logps/chosen": -309.4542236328125, |
| "logps/rejected": -287.61309814453125, |
| "loss": 0.5345, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.5140593647956848, |
| "rewards/margins": 1.1911535263061523, |
| "rewards/rejected": -1.705212950706482, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.08898194190002617, |
| "grad_norm": 7.115630149841309, |
| "learning_rate": 1.822559539387595e-05, |
| "logits/chosen": -2.958962917327881, |
| "logits/rejected": -3.1099159717559814, |
| "logps/chosen": -279.24053955078125, |
| "logps/rejected": -241.4726104736328, |
| "loss": 0.6099, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.852770984172821, |
| "rewards/margins": 0.7456468343734741, |
| "rewards/rejected": -1.59841787815094, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0902904998691442, |
| "grad_norm": 10.483302116394043, |
| "learning_rate": 1.819942423449359e-05, |
| "logits/chosen": -2.8903632164001465, |
| "logits/rejected": -2.9951415061950684, |
| "logps/chosen": -306.63531494140625, |
| "logps/rejected": -260.45281982421875, |
| "loss": 0.4238, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.48541754484176636, |
| "rewards/margins": 1.6238024234771729, |
| "rewards/rejected": -2.109220027923584, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.09159905783826224, |
| "grad_norm": 5.090464115142822, |
| "learning_rate": 1.817325307511123e-05, |
| "logits/chosen": -2.9965295791625977, |
| "logits/rejected": -3.0568604469299316, |
| "logps/chosen": -282.74853515625, |
| "logps/rejected": -271.244384765625, |
| "loss": 0.4535, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.5810127258300781, |
| "rewards/margins": 1.4132329225540161, |
| "rewards/rejected": -1.9942457675933838, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.09290761580738027, |
| "grad_norm": 9.859427452087402, |
| "learning_rate": 1.8147081915728867e-05, |
| "logits/chosen": -2.8435680866241455, |
| "logits/rejected": -2.930812120437622, |
| "logps/chosen": -268.23638916015625, |
| "logps/rejected": -276.4360046386719, |
| "loss": 0.5494, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.8862309455871582, |
| "rewards/margins": 1.2033555507659912, |
| "rewards/rejected": -2.0895867347717285, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.0942161737764983, |
| "grad_norm": 6.459297180175781, |
| "learning_rate": 1.812091075634651e-05, |
| "logits/chosen": -2.76271915435791, |
| "logits/rejected": -2.815265417098999, |
| "logps/chosen": -275.8890380859375, |
| "logps/rejected": -268.1244201660156, |
| "loss": 0.4645, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.765187680721283, |
| "rewards/margins": 1.524695634841919, |
| "rewards/rejected": -2.289883613586426, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.09552473174561633, |
| "grad_norm": 5.793435096740723, |
| "learning_rate": 1.8094739596964147e-05, |
| "logits/chosen": -2.9098358154296875, |
| "logits/rejected": -2.8405723571777344, |
| "logps/chosen": -273.9772033691406, |
| "logps/rejected": -330.71588134765625, |
| "loss": 0.5838, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.3435543179512024, |
| "rewards/margins": 1.3868069648742676, |
| "rewards/rejected": -1.7303612232208252, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.09683328971473436, |
| "grad_norm": 7.4615159034729, |
| "learning_rate": 1.8068568437581785e-05, |
| "logits/chosen": -3.007171392440796, |
| "logits/rejected": -2.9444351196289062, |
| "logps/chosen": -317.48297119140625, |
| "logps/rejected": -327.6475830078125, |
| "loss": 0.5003, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.00199909508228302, |
| "rewards/margins": 1.4891326427459717, |
| "rewards/rejected": -1.4871336221694946, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0981418476838524, |
| "grad_norm": 12.441634178161621, |
| "learning_rate": 1.8042397278199427e-05, |
| "logits/chosen": -2.8079023361206055, |
| "logits/rejected": -2.825000286102295, |
| "logps/chosen": -310.08551025390625, |
| "logps/rejected": -294.376220703125, |
| "loss": 0.657, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.27615275979042053, |
| "rewards/margins": 1.0713586807250977, |
| "rewards/rejected": -1.3475112915039062, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.09945040565297043, |
| "grad_norm": 7.3414764404296875, |
| "learning_rate": 1.8016226118817065e-05, |
| "logits/chosen": -2.9461495876312256, |
| "logits/rejected": -3.022108554840088, |
| "logps/chosen": -261.1568908691406, |
| "logps/rejected": -277.7593994140625, |
| "loss": 0.5084, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.24008110165596008, |
| "rewards/margins": 1.1185321807861328, |
| "rewards/rejected": -0.8784511685371399, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.10075896362208846, |
| "grad_norm": 6.003385066986084, |
| "learning_rate": 1.7990054959434704e-05, |
| "logits/chosen": -2.936765193939209, |
| "logits/rejected": -2.9935214519500732, |
| "logps/chosen": -303.5323791503906, |
| "logps/rejected": -254.99526977539062, |
| "loss": 0.4317, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.10325083881616592, |
| "rewards/margins": 1.691622018814087, |
| "rewards/rejected": -1.7948728799819946, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.1020675215912065, |
| "grad_norm": 11.351863861083984, |
| "learning_rate": 1.7963883800052346e-05, |
| "logits/chosen": -2.944807529449463, |
| "logits/rejected": -2.961432933807373, |
| "logps/chosen": -303.90130615234375, |
| "logps/rejected": -256.1380310058594, |
| "loss": 0.5825, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.37615787982940674, |
| "rewards/margins": 1.2104028463363647, |
| "rewards/rejected": -1.586560606956482, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.10337607956032452, |
| "grad_norm": 5.774983882904053, |
| "learning_rate": 1.7937712640669984e-05, |
| "logits/chosen": -2.9214158058166504, |
| "logits/rejected": -2.9814064502716064, |
| "logps/chosen": -239.17587280273438, |
| "logps/rejected": -241.3479766845703, |
| "loss": 0.4779, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.5130228400230408, |
| "rewards/margins": 1.39810311794281, |
| "rewards/rejected": -1.911125898361206, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.10468463752944256, |
| "grad_norm": 7.241608142852783, |
| "learning_rate": 1.7911541481287622e-05, |
| "logits/chosen": -2.7809369564056396, |
| "logits/rejected": -2.818983554840088, |
| "logps/chosen": -297.4482116699219, |
| "logps/rejected": -268.3238220214844, |
| "loss": 0.426, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.6951895356178284, |
| "rewards/margins": 1.5675487518310547, |
| "rewards/rejected": -2.2627382278442383, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.10468463752944256, |
| "eval_logits/chosen": -2.9021153450012207, |
| "eval_logits/rejected": -2.9161083698272705, |
| "eval_logps/chosen": -294.0876159667969, |
| "eval_logps/rejected": -286.1116027832031, |
| "eval_loss": 0.5800156593322754, |
| "eval_rewards/accuracies": 0.7139999866485596, |
| "eval_rewards/chosen": -1.085422396659851, |
| "eval_rewards/margins": 1.1322746276855469, |
| "eval_rewards/rejected": -2.2176966667175293, |
| "eval_runtime": 763.418, |
| "eval_samples_per_second": 2.62, |
| "eval_steps_per_second": 0.327, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.10599319549856058, |
| "grad_norm": 7.2183356285095215, |
| "learning_rate": 1.788537032190526e-05, |
| "logits/chosen": -2.7751684188842773, |
| "logits/rejected": -2.7491390705108643, |
| "logps/chosen": -269.92083740234375, |
| "logps/rejected": -294.2773742675781, |
| "loss": 0.5384, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.0581508874893188, |
| "rewards/margins": 1.3911430835723877, |
| "rewards/rejected": -2.449293851852417, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.10730175346767862, |
| "grad_norm": 6.803300857543945, |
| "learning_rate": 1.7859199162522902e-05, |
| "logits/chosen": -2.8796913623809814, |
| "logits/rejected": -2.933354616165161, |
| "logps/chosen": -330.7289123535156, |
| "logps/rejected": -279.0171813964844, |
| "loss": 0.5261, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.6245895624160767, |
| "rewards/margins": 1.040086030960083, |
| "rewards/rejected": -2.664675235748291, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.10861031143679666, |
| "grad_norm": 10.539965629577637, |
| "learning_rate": 1.783302800314054e-05, |
| "logits/chosen": -2.959507942199707, |
| "logits/rejected": -3.0030369758605957, |
| "logps/chosen": -310.83880615234375, |
| "logps/rejected": -307.89508056640625, |
| "loss": 0.5226, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.2687180042266846, |
| "rewards/margins": 1.3614869117736816, |
| "rewards/rejected": -2.630204916000366, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.10991886940591468, |
| "grad_norm": 4.8235626220703125, |
| "learning_rate": 1.780685684375818e-05, |
| "logits/chosen": -3.0115878582000732, |
| "logits/rejected": -3.0866780281066895, |
| "logps/chosen": -313.1065673828125, |
| "logps/rejected": -276.90252685546875, |
| "loss": 0.3901, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.8931556940078735, |
| "rewards/margins": 1.7108110189437866, |
| "rewards/rejected": -3.6039669513702393, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.11122742737503272, |
| "grad_norm": 7.7981390953063965, |
| "learning_rate": 1.778068568437582e-05, |
| "logits/chosen": -2.925339937210083, |
| "logits/rejected": -2.993244171142578, |
| "logps/chosen": -278.63165283203125, |
| "logps/rejected": -298.1899108886719, |
| "loss": 0.5171, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.9866344928741455, |
| "rewards/margins": 1.4827523231506348, |
| "rewards/rejected": -3.469386577606201, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.11253598534415074, |
| "grad_norm": 8.36640739440918, |
| "learning_rate": 1.775451452499346e-05, |
| "logits/chosen": -2.83201265335083, |
| "logits/rejected": -2.883605480194092, |
| "logps/chosen": -292.6142883300781, |
| "logps/rejected": -290.2992248535156, |
| "loss": 0.6229, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -2.152479410171509, |
| "rewards/margins": 1.368148922920227, |
| "rewards/rejected": -3.5206284523010254, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.11384454331326878, |
| "grad_norm": 8.783513069152832, |
| "learning_rate": 1.7728343365611098e-05, |
| "logits/chosen": -2.845114231109619, |
| "logits/rejected": -2.8904240131378174, |
| "logps/chosen": -291.27093505859375, |
| "logps/rejected": -297.92047119140625, |
| "loss": 0.5382, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.8989918231964111, |
| "rewards/margins": 1.32305908203125, |
| "rewards/rejected": -3.222050905227661, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.11515310128238682, |
| "grad_norm": 3.8981902599334717, |
| "learning_rate": 1.770217220622874e-05, |
| "logits/chosen": -2.915076494216919, |
| "logits/rejected": -2.996788263320923, |
| "logps/chosen": -285.4013977050781, |
| "logps/rejected": -287.89410400390625, |
| "loss": 0.4423, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.5970932245254517, |
| "rewards/margins": 1.7360804080963135, |
| "rewards/rejected": -3.3331737518310547, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.11646165925150484, |
| "grad_norm": 8.515145301818848, |
| "learning_rate": 1.7676001046846374e-05, |
| "logits/chosen": -2.9851067066192627, |
| "logits/rejected": -2.9686992168426514, |
| "logps/chosen": -326.7468566894531, |
| "logps/rejected": -294.19927978515625, |
| "loss": 0.5876, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -2.047184467315674, |
| "rewards/margins": 1.3992159366607666, |
| "rewards/rejected": -3.4464004039764404, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.11777021722062288, |
| "grad_norm": 10.733110427856445, |
| "learning_rate": 1.7649829887464016e-05, |
| "logits/chosen": -2.961892604827881, |
| "logits/rejected": -3.0297560691833496, |
| "logps/chosen": -320.75885009765625, |
| "logps/rejected": -314.3056640625, |
| "loss": 0.7165, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -2.082216501235962, |
| "rewards/margins": 0.8804648518562317, |
| "rewards/rejected": -2.962681293487549, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1190787751897409, |
| "grad_norm": 8.966882705688477, |
| "learning_rate": 1.7623658728081658e-05, |
| "logits/chosen": -2.9676575660705566, |
| "logits/rejected": -3.0381922721862793, |
| "logps/chosen": -338.7352600097656, |
| "logps/rejected": -317.4922790527344, |
| "loss": 0.6433, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.862184762954712, |
| "rewards/margins": 1.0298666954040527, |
| "rewards/rejected": -2.8920512199401855, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.12038733315885894, |
| "grad_norm": 5.6694865226745605, |
| "learning_rate": 1.7597487568699293e-05, |
| "logits/chosen": -2.8035426139831543, |
| "logits/rejected": -2.9555842876434326, |
| "logps/chosen": -264.0455322265625, |
| "logps/rejected": -252.0572967529297, |
| "loss": 0.5003, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.9079868793487549, |
| "rewards/margins": 1.1119083166122437, |
| "rewards/rejected": -3.019895076751709, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.12169589112797696, |
| "grad_norm": 7.887356758117676, |
| "learning_rate": 1.7571316409316935e-05, |
| "logits/chosen": -3.0163044929504395, |
| "logits/rejected": -3.06129789352417, |
| "logps/chosen": -325.7791748046875, |
| "logps/rejected": -265.0768127441406, |
| "loss": 0.4852, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.4090251922607422, |
| "rewards/margins": 1.3811366558074951, |
| "rewards/rejected": -2.7901618480682373, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.123004449097095, |
| "grad_norm": 7.931512832641602, |
| "learning_rate": 1.7545145249934573e-05, |
| "logits/chosen": -2.864602565765381, |
| "logits/rejected": -2.876664161682129, |
| "logps/chosen": -297.8940734863281, |
| "logps/rejected": -293.0927429199219, |
| "loss": 0.3987, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.5729058980941772, |
| "rewards/margins": 1.4379829168319702, |
| "rewards/rejected": -3.0108885765075684, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.12431300706621304, |
| "grad_norm": 5.773271083831787, |
| "learning_rate": 1.751897409055221e-05, |
| "logits/chosen": -2.9812722206115723, |
| "logits/rejected": -2.986359119415283, |
| "logps/chosen": -307.86053466796875, |
| "logps/rejected": -298.8404541015625, |
| "loss": 0.5465, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.1548845767974854, |
| "rewards/margins": 1.2083876132965088, |
| "rewards/rejected": -2.363272190093994, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.12562156503533106, |
| "grad_norm": 9.784249305725098, |
| "learning_rate": 1.7492802931169853e-05, |
| "logits/chosen": -2.872807025909424, |
| "logits/rejected": -2.967390298843384, |
| "logps/chosen": -277.0250244140625, |
| "logps/rejected": -278.3902893066406, |
| "loss": 0.6129, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -1.5591986179351807, |
| "rewards/margins": 1.1454228162765503, |
| "rewards/rejected": -2.7046213150024414, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.12693012300444909, |
| "grad_norm": 8.737171173095703, |
| "learning_rate": 1.746663177178749e-05, |
| "logits/chosen": -2.9234728813171387, |
| "logits/rejected": -2.9303388595581055, |
| "logps/chosen": -334.78704833984375, |
| "logps/rejected": -279.1730651855469, |
| "loss": 0.5064, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.4824053049087524, |
| "rewards/margins": 1.2012741565704346, |
| "rewards/rejected": -2.6836795806884766, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.12823868097356714, |
| "grad_norm": 6.572619915008545, |
| "learning_rate": 1.744046061240513e-05, |
| "logits/chosen": -2.853210926055908, |
| "logits/rejected": -2.9061577320098877, |
| "logps/chosen": -311.03680419921875, |
| "logps/rejected": -303.3858947753906, |
| "loss": 0.4983, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.1631245613098145, |
| "rewards/margins": 1.1259291172027588, |
| "rewards/rejected": -2.289053440093994, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.12954723894268516, |
| "grad_norm": 9.202119827270508, |
| "learning_rate": 1.741428945302277e-05, |
| "logits/chosen": -2.9796509742736816, |
| "logits/rejected": -3.069392204284668, |
| "logps/chosen": -335.2552185058594, |
| "logps/rejected": -280.06353759765625, |
| "loss": 0.6068, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.4664899110794067, |
| "rewards/margins": 0.9758504629135132, |
| "rewards/rejected": -2.442340135574341, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.13085579691180318, |
| "grad_norm": 12.070145606994629, |
| "learning_rate": 1.738811829364041e-05, |
| "logits/chosen": -2.8466262817382812, |
| "logits/rejected": -2.9542205333709717, |
| "logps/chosen": -297.8977966308594, |
| "logps/rejected": -246.33316040039062, |
| "loss": 0.5466, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8024070262908936, |
| "rewards/margins": 1.1843513250350952, |
| "rewards/rejected": -2.9867584705352783, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.13216435488092124, |
| "grad_norm": 5.3597331047058105, |
| "learning_rate": 1.736194713425805e-05, |
| "logits/chosen": -2.9541573524475098, |
| "logits/rejected": -2.9777073860168457, |
| "logps/chosen": -330.4364013671875, |
| "logps/rejected": -300.50970458984375, |
| "loss": 0.4779, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.542964220046997, |
| "rewards/margins": 1.2471535205841064, |
| "rewards/rejected": -2.7901177406311035, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.13347291285003926, |
| "grad_norm": 6.6305060386657715, |
| "learning_rate": 1.7335775974875687e-05, |
| "logits/chosen": -2.8647067546844482, |
| "logits/rejected": -2.9916813373565674, |
| "logps/chosen": -320.9283752441406, |
| "logps/rejected": -306.7779541015625, |
| "loss": 0.4591, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.785896897315979, |
| "rewards/margins": 1.2584455013275146, |
| "rewards/rejected": -3.044342517852783, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.13478147081915728, |
| "grad_norm": 9.145403861999512, |
| "learning_rate": 1.730960481549333e-05, |
| "logits/chosen": -2.910454750061035, |
| "logits/rejected": -2.956942081451416, |
| "logps/chosen": -306.08074951171875, |
| "logps/rejected": -320.6488952636719, |
| "loss": 0.6325, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -2.1406688690185547, |
| "rewards/margins": 0.9226516485214233, |
| "rewards/rejected": -3.0633203983306885, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.1360900287882753, |
| "grad_norm": 6.541450023651123, |
| "learning_rate": 1.7283433656110967e-05, |
| "logits/chosen": -2.9004507064819336, |
| "logits/rejected": -2.8952460289001465, |
| "logps/chosen": -241.9619140625, |
| "logps/rejected": -298.10394287109375, |
| "loss": 0.5313, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.0367915630340576, |
| "rewards/margins": 1.4055403470993042, |
| "rewards/rejected": -3.4423320293426514, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.13739858675739336, |
| "grad_norm": 7.66124963760376, |
| "learning_rate": 1.7257262496728605e-05, |
| "logits/chosen": -3.003178596496582, |
| "logits/rejected": -3.0736184120178223, |
| "logps/chosen": -271.2985534667969, |
| "logps/rejected": -264.1616516113281, |
| "loss": 0.6178, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -1.4967647790908813, |
| "rewards/margins": 0.7704972624778748, |
| "rewards/rejected": -2.2672622203826904, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.13870714472651138, |
| "grad_norm": 6.042169570922852, |
| "learning_rate": 1.7231091337346247e-05, |
| "logits/chosen": -2.7864420413970947, |
| "logits/rejected": -2.8688900470733643, |
| "logps/chosen": -328.5660095214844, |
| "logps/rejected": -304.4143981933594, |
| "loss": 0.475, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.1899480819702148, |
| "rewards/margins": 1.6948280334472656, |
| "rewards/rejected": -2.8847761154174805, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.1400157026956294, |
| "grad_norm": 7.055963039398193, |
| "learning_rate": 1.7204920177963885e-05, |
| "logits/chosen": -2.9281864166259766, |
| "logits/rejected": -3.05684232711792, |
| "logps/chosen": -321.26922607421875, |
| "logps/rejected": -301.43731689453125, |
| "loss": 0.4043, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.954908549785614, |
| "rewards/margins": 1.4210506677627563, |
| "rewards/rejected": -2.375959634780884, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.14132426066474746, |
| "grad_norm": 10.064764976501465, |
| "learning_rate": 1.7178749018581524e-05, |
| "logits/chosen": -2.8885293006896973, |
| "logits/rejected": -2.951124668121338, |
| "logps/chosen": -265.4410400390625, |
| "logps/rejected": -301.2450866699219, |
| "loss": 0.5462, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.6822071075439453, |
| "rewards/margins": 1.1935334205627441, |
| "rewards/rejected": -2.8757405281066895, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.14263281863386548, |
| "grad_norm": 9.2780122756958, |
| "learning_rate": 1.7152577859199166e-05, |
| "logits/chosen": -3.063042640686035, |
| "logits/rejected": -3.0384624004364014, |
| "logps/chosen": -240.93661499023438, |
| "logps/rejected": -255.59017944335938, |
| "loss": 0.5752, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.7355148792266846, |
| "rewards/margins": 1.3015474081039429, |
| "rewards/rejected": -3.037062406539917, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.1439413766029835, |
| "grad_norm": 9.463279724121094, |
| "learning_rate": 1.7126406699816804e-05, |
| "logits/chosen": -2.814814567565918, |
| "logits/rejected": -2.95234751701355, |
| "logps/chosen": -288.8094177246094, |
| "logps/rejected": -302.408447265625, |
| "loss": 0.6, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.7943483591079712, |
| "rewards/margins": 1.192277193069458, |
| "rewards/rejected": -2.9866251945495605, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.14524993457210156, |
| "grad_norm": 9.526763916015625, |
| "learning_rate": 1.7100235540434442e-05, |
| "logits/chosen": -3.0983662605285645, |
| "logits/rejected": -3.1188201904296875, |
| "logps/chosen": -273.67181396484375, |
| "logps/rejected": -264.828369140625, |
| "loss": 0.5915, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.6144587993621826, |
| "rewards/margins": 1.0379388332366943, |
| "rewards/rejected": -2.652397632598877, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.14655849254121958, |
| "grad_norm": 5.127192497253418, |
| "learning_rate": 1.707406438105208e-05, |
| "logits/chosen": -2.8516077995300293, |
| "logits/rejected": -2.908602476119995, |
| "logps/chosen": -267.1623229980469, |
| "logps/rejected": -243.82681274414062, |
| "loss": 0.4718, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.203351616859436, |
| "rewards/margins": 1.4361515045166016, |
| "rewards/rejected": -2.639503002166748, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.1478670505103376, |
| "grad_norm": 13.090658187866211, |
| "learning_rate": 1.7047893221669722e-05, |
| "logits/chosen": -2.9258511066436768, |
| "logits/rejected": -2.9709384441375732, |
| "logps/chosen": -276.4967346191406, |
| "logps/rejected": -312.01348876953125, |
| "loss": 0.6656, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.651240587234497, |
| "rewards/margins": 1.1099951267242432, |
| "rewards/rejected": -2.7612357139587402, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.14917560847945563, |
| "grad_norm": 7.876342296600342, |
| "learning_rate": 1.702172206228736e-05, |
| "logits/chosen": -2.9246954917907715, |
| "logits/rejected": -2.9212677478790283, |
| "logps/chosen": -297.02996826171875, |
| "logps/rejected": -288.9739074707031, |
| "loss": 0.4067, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.2050755023956299, |
| "rewards/margins": 1.5973553657531738, |
| "rewards/rejected": -2.8024308681488037, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.15048416644857368, |
| "grad_norm": 7.590128421783447, |
| "learning_rate": 1.6995550902905e-05, |
| "logits/chosen": -2.969716787338257, |
| "logits/rejected": -2.8748910427093506, |
| "logps/chosen": -319.4613342285156, |
| "logps/rejected": -333.3774719238281, |
| "loss": 0.5323, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.351219892501831, |
| "rewards/margins": 1.1748894453048706, |
| "rewards/rejected": -2.526109218597412, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.1517927244176917, |
| "grad_norm": 10.58719539642334, |
| "learning_rate": 1.696937974352264e-05, |
| "logits/chosen": -3.0049643516540527, |
| "logits/rejected": -3.038872241973877, |
| "logps/chosen": -276.7691650390625, |
| "logps/rejected": -242.3045654296875, |
| "loss": 0.4836, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.9573993682861328, |
| "rewards/margins": 1.4744302034378052, |
| "rewards/rejected": -2.4318299293518066, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.15310128238680973, |
| "grad_norm": 5.224278926849365, |
| "learning_rate": 1.694320858414028e-05, |
| "logits/chosen": -3.024735689163208, |
| "logits/rejected": -2.978595733642578, |
| "logps/chosen": -285.96759033203125, |
| "logps/rejected": -268.8092041015625, |
| "loss": 0.5066, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.8840298652648926, |
| "rewards/margins": 1.2104597091674805, |
| "rewards/rejected": -2.094489574432373, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.15440984035592778, |
| "grad_norm": 5.459282398223877, |
| "learning_rate": 1.6917037424757918e-05, |
| "logits/chosen": -2.83048415184021, |
| "logits/rejected": -2.8999111652374268, |
| "logps/chosen": -262.3257751464844, |
| "logps/rejected": -246.4630584716797, |
| "loss": 0.5232, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9185107946395874, |
| "rewards/margins": 1.1749193668365479, |
| "rewards/rejected": -2.0934300422668457, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.1557183983250458, |
| "grad_norm": 10.723037719726562, |
| "learning_rate": 1.689086626537556e-05, |
| "logits/chosen": -2.9317269325256348, |
| "logits/rejected": -2.9059762954711914, |
| "logps/chosen": -254.2577667236328, |
| "logps/rejected": -269.9380798339844, |
| "loss": 0.4659, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.6864464282989502, |
| "rewards/margins": 1.4080874919891357, |
| "rewards/rejected": -2.094534158706665, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.15702695629416383, |
| "grad_norm": 4.004018306732178, |
| "learning_rate": 1.6864695105993198e-05, |
| "logits/chosen": -2.922083616256714, |
| "logits/rejected": -3.023949146270752, |
| "logps/chosen": -296.1883544921875, |
| "logps/rejected": -307.1870422363281, |
| "loss": 0.544, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.974421501159668, |
| "rewards/margins": 1.086411714553833, |
| "rewards/rejected": -2.06083345413208, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.15833551426328185, |
| "grad_norm": 6.0055623054504395, |
| "learning_rate": 1.6838523946610836e-05, |
| "logits/chosen": -2.6909618377685547, |
| "logits/rejected": -2.8049652576446533, |
| "logps/chosen": -254.7784881591797, |
| "logps/rejected": -252.23828125, |
| "loss": 0.6389, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.2264916896820068, |
| "rewards/margins": 1.0717966556549072, |
| "rewards/rejected": -2.298288106918335, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.1596440722323999, |
| "grad_norm": 5.655783176422119, |
| "learning_rate": 1.6812352787228478e-05, |
| "logits/chosen": -2.911292552947998, |
| "logits/rejected": -2.9565579891204834, |
| "logps/chosen": -319.2688293457031, |
| "logps/rejected": -331.54315185546875, |
| "loss": 0.6664, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.3095886707305908, |
| "rewards/margins": 1.2178889513015747, |
| "rewards/rejected": -2.527477502822876, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.16095263020151793, |
| "grad_norm": 9.346418380737305, |
| "learning_rate": 1.6786181627846113e-05, |
| "logits/chosen": -2.83856463432312, |
| "logits/rejected": -2.893974781036377, |
| "logps/chosen": -323.4009704589844, |
| "logps/rejected": -296.84783935546875, |
| "loss": 0.468, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.903913140296936, |
| "rewards/margins": 1.8124818801879883, |
| "rewards/rejected": -2.7163949012756348, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.16226118817063595, |
| "grad_norm": 7.140948295593262, |
| "learning_rate": 1.6760010468463755e-05, |
| "logits/chosen": -3.0348329544067383, |
| "logits/rejected": -2.941676378250122, |
| "logps/chosen": -276.62664794921875, |
| "logps/rejected": -271.2499084472656, |
| "loss": 0.4796, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1876803636550903, |
| "rewards/margins": 1.2585867643356323, |
| "rewards/rejected": -2.4462671279907227, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.163569746139754, |
| "grad_norm": 5.661865711212158, |
| "learning_rate": 1.6733839309081393e-05, |
| "logits/chosen": -2.9504222869873047, |
| "logits/rejected": -3.029871940612793, |
| "logps/chosen": -366.68597412109375, |
| "logps/rejected": -306.84674072265625, |
| "loss": 0.4189, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.0696208477020264, |
| "rewards/margins": 1.6653257608413696, |
| "rewards/rejected": -2.7349467277526855, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.16487830410887203, |
| "grad_norm": 11.919709205627441, |
| "learning_rate": 1.670766814969903e-05, |
| "logits/chosen": -2.9389662742614746, |
| "logits/rejected": -2.9783272743225098, |
| "logps/chosen": -321.91021728515625, |
| "logps/rejected": -358.46343994140625, |
| "loss": 0.4938, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.0782363414764404, |
| "rewards/margins": 1.602421522140503, |
| "rewards/rejected": -2.6806578636169434, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.16618686207799005, |
| "grad_norm": 6.807126998901367, |
| "learning_rate": 1.6681496990316673e-05, |
| "logits/chosen": -2.7277305126190186, |
| "logits/rejected": -2.8635544776916504, |
| "logps/chosen": -276.2630615234375, |
| "logps/rejected": -287.2210388183594, |
| "loss": 0.5649, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.179365634918213, |
| "rewards/margins": 1.416500449180603, |
| "rewards/rejected": -2.5958662033081055, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.16749542004710807, |
| "grad_norm": 5.412283897399902, |
| "learning_rate": 1.665532583093431e-05, |
| "logits/chosen": -2.9597675800323486, |
| "logits/rejected": -2.948847532272339, |
| "logps/chosen": -269.4257507324219, |
| "logps/rejected": -257.947021484375, |
| "loss": 0.4222, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.7173232436180115, |
| "rewards/margins": 1.5117555856704712, |
| "rewards/rejected": -2.229078769683838, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.16880397801622612, |
| "grad_norm": 8.06993293762207, |
| "learning_rate": 1.662915467155195e-05, |
| "logits/chosen": -2.844407558441162, |
| "logits/rejected": -2.8795688152313232, |
| "logps/chosen": -237.12722778320312, |
| "logps/rejected": -251.5007781982422, |
| "loss": 0.4479, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.5313388705253601, |
| "rewards/margins": 1.5623525381088257, |
| "rewards/rejected": -2.09369158744812, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.17011253598534415, |
| "grad_norm": 8.971646308898926, |
| "learning_rate": 1.660298351216959e-05, |
| "logits/chosen": -2.9815077781677246, |
| "logits/rejected": -2.9581987857818604, |
| "logps/chosen": -240.3534698486328, |
| "logps/rejected": -255.795166015625, |
| "loss": 0.6226, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.010803407058119774, |
| "rewards/margins": 1.1583298444747925, |
| "rewards/rejected": -1.147526502609253, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.17142109395446217, |
| "grad_norm": 8.448005676269531, |
| "learning_rate": 1.657681235278723e-05, |
| "logits/chosen": -2.9446420669555664, |
| "logits/rejected": -2.990194797515869, |
| "logps/chosen": -240.1005859375, |
| "logps/rejected": -285.579345703125, |
| "loss": 0.501, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 0.36709222197532654, |
| "rewards/margins": 1.4954938888549805, |
| "rewards/rejected": -1.1284016370773315, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.17272965192358022, |
| "grad_norm": 7.867677688598633, |
| "learning_rate": 1.655064119340487e-05, |
| "logits/chosen": -2.9015254974365234, |
| "logits/rejected": -2.964930772781372, |
| "logps/chosen": -318.57244873046875, |
| "logps/rejected": -296.36236572265625, |
| "loss": 0.6188, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.12054802477359772, |
| "rewards/margins": 1.1496269702911377, |
| "rewards/rejected": -1.2701750993728638, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.17403820989269825, |
| "grad_norm": 6.955239772796631, |
| "learning_rate": 1.6524470034022507e-05, |
| "logits/chosen": -2.933335781097412, |
| "logits/rejected": -2.9919040203094482, |
| "logps/chosen": -257.498779296875, |
| "logps/rejected": -255.42098999023438, |
| "loss": 0.4597, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.10350354015827179, |
| "rewards/margins": 1.4865562915802002, |
| "rewards/rejected": -1.5900598764419556, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.17534676786181627, |
| "grad_norm": 4.296855449676514, |
| "learning_rate": 1.649829887464015e-05, |
| "logits/chosen": -2.9155821800231934, |
| "logits/rejected": -2.9966659545898438, |
| "logps/chosen": -281.1880187988281, |
| "logps/rejected": -312.45404052734375, |
| "loss": 0.5136, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.11840929836034775, |
| "rewards/margins": 1.6070137023925781, |
| "rewards/rejected": -1.7254230976104736, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.17665532583093432, |
| "grad_norm": 9.262114524841309, |
| "learning_rate": 1.6472127715257787e-05, |
| "logits/chosen": -2.967158317565918, |
| "logits/rejected": -2.952332019805908, |
| "logps/chosen": -253.4345245361328, |
| "logps/rejected": -320.2381591796875, |
| "loss": 0.5256, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.6660485863685608, |
| "rewards/margins": 1.4134299755096436, |
| "rewards/rejected": -2.0794787406921387, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.17796388380005235, |
| "grad_norm": 10.10685920715332, |
| "learning_rate": 1.6445956555875425e-05, |
| "logits/chosen": -2.8754544258117676, |
| "logits/rejected": -2.988997459411621, |
| "logps/chosen": -271.6341552734375, |
| "logps/rejected": -260.13323974609375, |
| "loss": 0.5957, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.0736002922058105, |
| "rewards/margins": 1.3498318195343018, |
| "rewards/rejected": -2.4234321117401123, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.17927244176917037, |
| "grad_norm": 8.677872657775879, |
| "learning_rate": 1.6419785396493067e-05, |
| "logits/chosen": -2.8993847370147705, |
| "logits/rejected": -2.964925527572632, |
| "logps/chosen": -289.35986328125, |
| "logps/rejected": -288.6343078613281, |
| "loss": 0.6984, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.4944241046905518, |
| "rewards/margins": 1.0044472217559814, |
| "rewards/rejected": -2.498871326446533, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.1805809997382884, |
| "grad_norm": 8.92746353149414, |
| "learning_rate": 1.6393614237110705e-05, |
| "logits/chosen": -2.9617457389831543, |
| "logits/rejected": -2.9055941104888916, |
| "logps/chosen": -218.28567504882812, |
| "logps/rejected": -230.8361053466797, |
| "loss": 0.6033, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.201545000076294, |
| "rewards/margins": 1.0764273405075073, |
| "rewards/rejected": -2.277972459793091, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.18188955770740645, |
| "grad_norm": 8.533876419067383, |
| "learning_rate": 1.6367443077728344e-05, |
| "logits/chosen": -2.8661141395568848, |
| "logits/rejected": -3.0062363147735596, |
| "logps/chosen": -310.6184997558594, |
| "logps/rejected": -263.51409912109375, |
| "loss": 0.525, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.0192677974700928, |
| "rewards/margins": 1.2579354047775269, |
| "rewards/rejected": -2.277203321456909, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.18319811567652447, |
| "grad_norm": 8.52597713470459, |
| "learning_rate": 1.6341271918345986e-05, |
| "logits/chosen": -2.9457011222839355, |
| "logits/rejected": -2.949756145477295, |
| "logps/chosen": -312.6112060546875, |
| "logps/rejected": -284.3760986328125, |
| "loss": 0.6767, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.9324936866760254, |
| "rewards/margins": 0.9121745824813843, |
| "rewards/rejected": -1.8446681499481201, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.1845066736456425, |
| "grad_norm": 10.356490135192871, |
| "learning_rate": 1.6315100758963624e-05, |
| "logits/chosen": -2.836061477661133, |
| "logits/rejected": -2.839916229248047, |
| "logps/chosen": -278.6529235839844, |
| "logps/rejected": -308.8656921386719, |
| "loss": 0.553, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.8688641786575317, |
| "rewards/margins": 1.2454078197479248, |
| "rewards/rejected": -2.114271879196167, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.18581523161476055, |
| "grad_norm": 7.835634231567383, |
| "learning_rate": 1.6288929599581262e-05, |
| "logits/chosen": -2.892359495162964, |
| "logits/rejected": -2.923079490661621, |
| "logps/chosen": -360.86151123046875, |
| "logps/rejected": -314.2693786621094, |
| "loss": 0.5471, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9443033337593079, |
| "rewards/margins": 1.0037552118301392, |
| "rewards/rejected": -1.9480584859848022, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.18712378958387857, |
| "grad_norm": 12.043888092041016, |
| "learning_rate": 1.6262758440198904e-05, |
| "logits/chosen": -2.9467732906341553, |
| "logits/rejected": -2.9871439933776855, |
| "logps/chosen": -285.0309753417969, |
| "logps/rejected": -277.4990539550781, |
| "loss": 0.6576, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.7473507523536682, |
| "rewards/margins": 0.9092914462089539, |
| "rewards/rejected": -1.6566423177719116, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.1884323475529966, |
| "grad_norm": 7.492431163787842, |
| "learning_rate": 1.6236587280816542e-05, |
| "logits/chosen": -2.9607863426208496, |
| "logits/rejected": -2.986611843109131, |
| "logps/chosen": -304.36065673828125, |
| "logps/rejected": -293.6368103027344, |
| "loss": 0.524, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.8549752235412598, |
| "rewards/margins": 1.1655179262161255, |
| "rewards/rejected": -2.0204930305480957, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.18974090552211462, |
| "grad_norm": 7.2737274169921875, |
| "learning_rate": 1.621041612143418e-05, |
| "logits/chosen": -2.8971705436706543, |
| "logits/rejected": -2.9472594261169434, |
| "logps/chosen": -296.6607971191406, |
| "logps/rejected": -282.5213928222656, |
| "loss": 0.4271, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.9705104827880859, |
| "rewards/margins": 1.591907262802124, |
| "rewards/rejected": -2.56241774559021, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.19104946349123267, |
| "grad_norm": 8.259176254272461, |
| "learning_rate": 1.618424496205182e-05, |
| "logits/chosen": -3.0036911964416504, |
| "logits/rejected": -3.027312755584717, |
| "logps/chosen": -242.5350799560547, |
| "logps/rejected": -247.4747772216797, |
| "loss": 0.4891, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.1831446886062622, |
| "rewards/margins": 1.270116925239563, |
| "rewards/rejected": -2.453261613845825, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.1923580214603507, |
| "grad_norm": 6.371599197387695, |
| "learning_rate": 1.615807380266946e-05, |
| "logits/chosen": -3.0131289958953857, |
| "logits/rejected": -3.0459389686584473, |
| "logps/chosen": -289.00909423828125, |
| "logps/rejected": -269.96038818359375, |
| "loss": 0.6098, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.491188883781433, |
| "rewards/margins": 1.4899593591690063, |
| "rewards/rejected": -2.9811482429504395, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.19366657942946872, |
| "grad_norm": 8.84294605255127, |
| "learning_rate": 1.61319026432871e-05, |
| "logits/chosen": -2.978916645050049, |
| "logits/rejected": -3.041909694671631, |
| "logps/chosen": -317.71484375, |
| "logps/rejected": -323.35638427734375, |
| "loss": 0.5665, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.6817235946655273, |
| "rewards/margins": 1.4670751094818115, |
| "rewards/rejected": -3.148798942565918, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.19497513739858677, |
| "grad_norm": 8.727971076965332, |
| "learning_rate": 1.6105731483904738e-05, |
| "logits/chosen": -2.951904773712158, |
| "logits/rejected": -2.9564967155456543, |
| "logps/chosen": -327.0289611816406, |
| "logps/rejected": -340.25408935546875, |
| "loss": 0.4344, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -2.040027141571045, |
| "rewards/margins": 1.7067238092422485, |
| "rewards/rejected": -3.746751070022583, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.1962836953677048, |
| "grad_norm": 7.898123264312744, |
| "learning_rate": 1.607956032452238e-05, |
| "logits/chosen": -2.6445517539978027, |
| "logits/rejected": -2.82399845123291, |
| "logps/chosen": -311.4280090332031, |
| "logps/rejected": -291.5944519042969, |
| "loss": 0.5745, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -2.266785144805908, |
| "rewards/margins": 1.2938086986541748, |
| "rewards/rejected": -3.560593843460083, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.19759225333682282, |
| "grad_norm": 10.65861701965332, |
| "learning_rate": 1.6053389165140018e-05, |
| "logits/chosen": -2.853435516357422, |
| "logits/rejected": -2.9262051582336426, |
| "logps/chosen": -313.5797119140625, |
| "logps/rejected": -305.50750732421875, |
| "loss": 0.6388, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.798832893371582, |
| "rewards/margins": 1.235661268234253, |
| "rewards/rejected": -3.034493923187256, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.19890081130594087, |
| "grad_norm": 6.924118518829346, |
| "learning_rate": 1.6027218005757656e-05, |
| "logits/chosen": -2.8775856494903564, |
| "logits/rejected": -2.9751439094543457, |
| "logps/chosen": -257.1426086425781, |
| "logps/rejected": -287.93536376953125, |
| "loss": 0.6444, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -2.176374912261963, |
| "rewards/margins": 1.34031081199646, |
| "rewards/rejected": -3.516685962677002, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2002093692750589, |
| "grad_norm": 7.09359073638916, |
| "learning_rate": 1.6001046846375298e-05, |
| "logits/chosen": -2.904832363128662, |
| "logits/rejected": -2.8512656688690186, |
| "logps/chosen": -313.9578552246094, |
| "logps/rejected": -327.7096862792969, |
| "loss": 0.5336, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.0932674407958984, |
| "rewards/margins": 1.5766878128051758, |
| "rewards/rejected": -3.669955015182495, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.20151792724417691, |
| "grad_norm": 10.785176277160645, |
| "learning_rate": 1.5974875686992933e-05, |
| "logits/chosen": -2.8301949501037598, |
| "logits/rejected": -2.922959566116333, |
| "logps/chosen": -298.0000915527344, |
| "logps/rejected": -271.7386474609375, |
| "loss": 0.6569, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -2.130873441696167, |
| "rewards/margins": 1.5558631420135498, |
| "rewards/rejected": -3.686736583709717, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.20282648521329494, |
| "grad_norm": 6.668851375579834, |
| "learning_rate": 1.5948704527610575e-05, |
| "logits/chosen": -2.9211020469665527, |
| "logits/rejected": -3.0021309852600098, |
| "logps/chosen": -292.0265197753906, |
| "logps/rejected": -315.9195251464844, |
| "loss": 0.4928, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.8320987224578857, |
| "rewards/margins": 1.6694583892822266, |
| "rewards/rejected": -3.5015571117401123, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.204135043182413, |
| "grad_norm": 5.2693586349487305, |
| "learning_rate": 1.5922533368228213e-05, |
| "logits/chosen": -2.7756457328796387, |
| "logits/rejected": -2.890991687774658, |
| "logps/chosen": -311.8628845214844, |
| "logps/rejected": -321.1475524902344, |
| "loss": 0.4051, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.8370088338851929, |
| "rewards/margins": 1.7155002355575562, |
| "rewards/rejected": -3.552509307861328, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.205443601151531, |
| "grad_norm": 3.826359987258911, |
| "learning_rate": 1.589636220884585e-05, |
| "logits/chosen": -2.9222168922424316, |
| "logits/rejected": -3.0205399990081787, |
| "logps/chosen": -267.26727294921875, |
| "logps/rejected": -281.6890869140625, |
| "loss": 0.511, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.8738367557525635, |
| "rewards/margins": 1.5201383829116821, |
| "rewards/rejected": -3.393975019454956, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.20675215912064904, |
| "grad_norm": 5.202337741851807, |
| "learning_rate": 1.5870191049463493e-05, |
| "logits/chosen": -2.9837005138397217, |
| "logits/rejected": -2.9869465827941895, |
| "logps/chosen": -303.33209228515625, |
| "logps/rejected": -274.7121276855469, |
| "loss": 0.485, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.9972702264785767, |
| "rewards/margins": 1.458465337753296, |
| "rewards/rejected": -3.455735683441162, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.2080607170897671, |
| "grad_norm": 7.0012006759643555, |
| "learning_rate": 1.584401989008113e-05, |
| "logits/chosen": -2.9645447731018066, |
| "logits/rejected": -3.017319917678833, |
| "logps/chosen": -259.61328125, |
| "logps/rejected": -239.63583374023438, |
| "loss": 0.6769, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -2.050851345062256, |
| "rewards/margins": 0.9884527325630188, |
| "rewards/rejected": -3.039304256439209, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.2093692750588851, |
| "grad_norm": 5.7104997634887695, |
| "learning_rate": 1.581784873069877e-05, |
| "logits/chosen": -2.81542706489563, |
| "logits/rejected": -2.957751750946045, |
| "logps/chosen": -299.0074462890625, |
| "logps/rejected": -303.9309997558594, |
| "loss": 0.4352, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.7871536016464233, |
| "rewards/margins": 1.4007420539855957, |
| "rewards/rejected": -3.1878955364227295, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2093692750588851, |
| "eval_logits/chosen": -2.9802186489105225, |
| "eval_logits/rejected": -2.9955716133117676, |
| "eval_logps/chosen": -301.6160888671875, |
| "eval_logps/rejected": -295.63427734375, |
| "eval_loss": 0.5342944860458374, |
| "eval_rewards/accuracies": 0.7335000038146973, |
| "eval_rewards/chosen": -1.8382680416107178, |
| "eval_rewards/margins": 1.3316991329193115, |
| "eval_rewards/rejected": -3.16996693611145, |
| "eval_runtime": 762.4379, |
| "eval_samples_per_second": 2.623, |
| "eval_steps_per_second": 0.328, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.21067783302800314, |
| "grad_norm": 11.304238319396973, |
| "learning_rate": 1.579167757131641e-05, |
| "logits/chosen": -2.933737277984619, |
| "logits/rejected": -3.042451858520508, |
| "logps/chosen": -332.7558898925781, |
| "logps/rejected": -288.9327087402344, |
| "loss": 0.5466, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.800733208656311, |
| "rewards/margins": 1.2069900035858154, |
| "rewards/rejected": -3.007722854614258, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.21198639099712116, |
| "grad_norm": 9.210540771484375, |
| "learning_rate": 1.576550641193405e-05, |
| "logits/chosen": -2.868955135345459, |
| "logits/rejected": -2.874704122543335, |
| "logps/chosen": -280.4917907714844, |
| "logps/rejected": -300.5773010253906, |
| "loss": 0.4237, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -2.1213161945343018, |
| "rewards/margins": 1.7197173833847046, |
| "rewards/rejected": -3.841033458709717, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.2132949489662392, |
| "grad_norm": 8.686461448669434, |
| "learning_rate": 1.573933525255169e-05, |
| "logits/chosen": -2.9677047729492188, |
| "logits/rejected": -2.894029378890991, |
| "logps/chosen": -287.06121826171875, |
| "logps/rejected": -304.01068115234375, |
| "loss": 0.4436, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.7622085809707642, |
| "rewards/margins": 1.928234338760376, |
| "rewards/rejected": -3.6904425621032715, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.21460350693535724, |
| "grad_norm": 14.101777076721191, |
| "learning_rate": 1.571316409316933e-05, |
| "logits/chosen": -2.800630807876587, |
| "logits/rejected": -2.8711049556732178, |
| "logps/chosen": -312.0852966308594, |
| "logps/rejected": -299.11114501953125, |
| "loss": 0.5871, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.8398411273956299, |
| "rewards/margins": 1.5045777559280396, |
| "rewards/rejected": -3.34441876411438, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.21591206490447526, |
| "grad_norm": 7.489319801330566, |
| "learning_rate": 1.568699293378697e-05, |
| "logits/chosen": -3.0238049030303955, |
| "logits/rejected": -3.048868417739868, |
| "logps/chosen": -312.80792236328125, |
| "logps/rejected": -281.77056884765625, |
| "loss": 0.5911, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.506508469581604, |
| "rewards/margins": 1.415809988975525, |
| "rewards/rejected": -2.922318458557129, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.2172206228735933, |
| "grad_norm": 6.227260112762451, |
| "learning_rate": 1.5660821774404607e-05, |
| "logits/chosen": -3.0102522373199463, |
| "logits/rejected": -3.1055586338043213, |
| "logps/chosen": -323.2419128417969, |
| "logps/rejected": -277.99188232421875, |
| "loss": 0.5915, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.2429955005645752, |
| "rewards/margins": 1.4162633419036865, |
| "rewards/rejected": -2.6592588424682617, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.21852918084271133, |
| "grad_norm": 7.631740093231201, |
| "learning_rate": 1.5634650615022245e-05, |
| "logits/chosen": -2.921370029449463, |
| "logits/rejected": -2.9625651836395264, |
| "logps/chosen": -267.8898010253906, |
| "logps/rejected": -277.86322021484375, |
| "loss": 0.5891, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.3811520338058472, |
| "rewards/margins": 1.4056289196014404, |
| "rewards/rejected": -2.786780834197998, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.21983773881182936, |
| "grad_norm": 8.195059776306152, |
| "learning_rate": 1.5608479455639887e-05, |
| "logits/chosen": -2.9599719047546387, |
| "logits/rejected": -2.9905598163604736, |
| "logps/chosen": -273.840576171875, |
| "logps/rejected": -254.96310424804688, |
| "loss": 0.5733, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.299445390701294, |
| "rewards/margins": 1.0443214178085327, |
| "rewards/rejected": -2.343766927719116, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.22114629678094738, |
| "grad_norm": 10.264986038208008, |
| "learning_rate": 1.5582308296257525e-05, |
| "logits/chosen": -2.9382224082946777, |
| "logits/rejected": -2.994401454925537, |
| "logps/chosen": -305.3935546875, |
| "logps/rejected": -325.06201171875, |
| "loss": 0.6201, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.1238162517547607, |
| "rewards/margins": 0.8402196764945984, |
| "rewards/rejected": -1.964035987854004, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.22245485475006543, |
| "grad_norm": 4.49017858505249, |
| "learning_rate": 1.5556137136875164e-05, |
| "logits/chosen": -2.9530601501464844, |
| "logits/rejected": -2.9725661277770996, |
| "logps/chosen": -247.1348114013672, |
| "logps/rejected": -260.9407653808594, |
| "loss": 0.5108, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.8431808352470398, |
| "rewards/margins": 1.3323025703430176, |
| "rewards/rejected": -2.1754837036132812, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.22376341271918346, |
| "grad_norm": 8.495135307312012, |
| "learning_rate": 1.5529965977492806e-05, |
| "logits/chosen": -3.0015053749084473, |
| "logits/rejected": -2.9371116161346436, |
| "logps/chosen": -293.3446350097656, |
| "logps/rejected": -295.124755859375, |
| "loss": 0.5258, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.7353643774986267, |
| "rewards/margins": 0.9804586172103882, |
| "rewards/rejected": -1.7158231735229492, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.22507197068830148, |
| "grad_norm": 6.842737674713135, |
| "learning_rate": 1.5503794818110444e-05, |
| "logits/chosen": -2.9119091033935547, |
| "logits/rejected": -2.943918228149414, |
| "logps/chosen": -296.51153564453125, |
| "logps/rejected": -366.8010559082031, |
| "loss": 0.5271, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.7795664668083191, |
| "rewards/margins": 1.3591724634170532, |
| "rewards/rejected": -2.1387391090393066, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.22638052865741953, |
| "grad_norm": 7.191964149475098, |
| "learning_rate": 1.5477623658728082e-05, |
| "logits/chosen": -2.9336752891540527, |
| "logits/rejected": -3.028198719024658, |
| "logps/chosen": -345.4011535644531, |
| "logps/rejected": -314.87322998046875, |
| "loss": 0.4365, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.0388554334640503, |
| "rewards/margins": 1.4215975999832153, |
| "rewards/rejected": -2.4604530334472656, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.22768908662653756, |
| "grad_norm": 6.653241157531738, |
| "learning_rate": 1.5451452499345724e-05, |
| "logits/chosen": -2.873534917831421, |
| "logits/rejected": -2.9371185302734375, |
| "logps/chosen": -257.2475891113281, |
| "logps/rejected": -265.35906982421875, |
| "loss": 0.5493, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.2732057571411133, |
| "rewards/margins": 1.3971422910690308, |
| "rewards/rejected": -2.6703481674194336, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.22899764459565558, |
| "grad_norm": 7.598719120025635, |
| "learning_rate": 1.5425281339963362e-05, |
| "logits/chosen": -2.9009196758270264, |
| "logits/rejected": -3.0005578994750977, |
| "logps/chosen": -316.24188232421875, |
| "logps/rejected": -311.5219421386719, |
| "loss": 0.4582, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.5340157747268677, |
| "rewards/margins": 1.5491135120391846, |
| "rewards/rejected": -3.083129405975342, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.23030620256477363, |
| "grad_norm": 5.385990142822266, |
| "learning_rate": 1.5399110180581e-05, |
| "logits/chosen": -2.87817120552063, |
| "logits/rejected": -2.9192519187927246, |
| "logps/chosen": -280.2408752441406, |
| "logps/rejected": -259.8953857421875, |
| "loss": 0.2971, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.4804073572158813, |
| "rewards/margins": 2.1618080139160156, |
| "rewards/rejected": -3.6422152519226074, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.23161476053389166, |
| "grad_norm": 9.287992477416992, |
| "learning_rate": 1.537293902119864e-05, |
| "logits/chosen": -3.0223453044891357, |
| "logits/rejected": -3.060854434967041, |
| "logps/chosen": -325.62457275390625, |
| "logps/rejected": -286.83599853515625, |
| "loss": 0.4514, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.2279030084609985, |
| "rewards/margins": 1.9207531213760376, |
| "rewards/rejected": -3.1486563682556152, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.23292331850300968, |
| "grad_norm": 6.760774612426758, |
| "learning_rate": 1.534676786181628e-05, |
| "logits/chosen": -2.885204553604126, |
| "logits/rejected": -2.9592041969299316, |
| "logps/chosen": -306.09747314453125, |
| "logps/rejected": -291.3219299316406, |
| "loss": 0.5089, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.0230804681777954, |
| "rewards/margins": 1.92485773563385, |
| "rewards/rejected": -2.9479384422302246, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.2342318764721277, |
| "grad_norm": 6.3509521484375, |
| "learning_rate": 1.532059670243392e-05, |
| "logits/chosen": -3.0189361572265625, |
| "logits/rejected": -3.0262978076934814, |
| "logps/chosen": -272.2768859863281, |
| "logps/rejected": -317.0359802246094, |
| "loss": 0.5246, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.8292263746261597, |
| "rewards/margins": 1.4462980031967163, |
| "rewards/rejected": -2.275524377822876, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.23554043444124576, |
| "grad_norm": 7.06498908996582, |
| "learning_rate": 1.5294425543051558e-05, |
| "logits/chosen": -2.9776549339294434, |
| "logits/rejected": -3.023850202560425, |
| "logps/chosen": -232.8544158935547, |
| "logps/rejected": -277.17779541015625, |
| "loss": 0.3988, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.6818987727165222, |
| "rewards/margins": 2.0731732845306396, |
| "rewards/rejected": -2.7550721168518066, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.23684899241036378, |
| "grad_norm": 10.507072448730469, |
| "learning_rate": 1.52682543836692e-05, |
| "logits/chosen": -2.891435384750366, |
| "logits/rejected": -2.8695714473724365, |
| "logps/chosen": -305.70037841796875, |
| "logps/rejected": -349.24957275390625, |
| "loss": 0.4944, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.0776326656341553, |
| "rewards/margins": 1.8689689636230469, |
| "rewards/rejected": -2.9466018676757812, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.2381575503794818, |
| "grad_norm": 6.330070495605469, |
| "learning_rate": 1.5242083224286836e-05, |
| "logits/chosen": -3.0263266563415527, |
| "logits/rejected": -3.0248868465423584, |
| "logps/chosen": -348.4727478027344, |
| "logps/rejected": -341.72454833984375, |
| "loss": 0.5856, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.3683652877807617, |
| "rewards/margins": 1.5177398920059204, |
| "rewards/rejected": -2.8861050605773926, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.23946610834859985, |
| "grad_norm": 10.05215072631836, |
| "learning_rate": 1.5215912064904476e-05, |
| "logits/chosen": -3.062643051147461, |
| "logits/rejected": -3.0395474433898926, |
| "logps/chosen": -288.37176513671875, |
| "logps/rejected": -341.9781188964844, |
| "loss": 0.5938, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.2461017370224, |
| "rewards/margins": 1.4891635179519653, |
| "rewards/rejected": -2.7352652549743652, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.24077466631771788, |
| "grad_norm": 5.560635566711426, |
| "learning_rate": 1.5189740905522116e-05, |
| "logits/chosen": -2.876089096069336, |
| "logits/rejected": -2.888617753982544, |
| "logps/chosen": -309.76898193359375, |
| "logps/rejected": -306.857421875, |
| "loss": 0.5375, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.8361619114875793, |
| "rewards/margins": 1.5215009450912476, |
| "rewards/rejected": -2.3576629161834717, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.2420832242868359, |
| "grad_norm": 6.002974510192871, |
| "learning_rate": 1.5163569746139755e-05, |
| "logits/chosen": -2.9480457305908203, |
| "logits/rejected": -2.96376895904541, |
| "logps/chosen": -240.8140106201172, |
| "logps/rejected": -266.04876708984375, |
| "loss": 0.4586, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.0198220014572144, |
| "rewards/margins": 1.3288692235946655, |
| "rewards/rejected": -2.348691463470459, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.24339178225595393, |
| "grad_norm": 9.024993896484375, |
| "learning_rate": 1.5137398586757395e-05, |
| "logits/chosen": -2.9898533821105957, |
| "logits/rejected": -3.0677971839904785, |
| "logps/chosen": -277.70867919921875, |
| "logps/rejected": -248.4989776611328, |
| "loss": 0.5183, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9063308835029602, |
| "rewards/margins": 1.4718434810638428, |
| "rewards/rejected": -2.3781745433807373, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.24470034022507198, |
| "grad_norm": 7.851377010345459, |
| "learning_rate": 1.5111227427375035e-05, |
| "logits/chosen": -3.0159552097320557, |
| "logits/rejected": -3.011725902557373, |
| "logps/chosen": -317.4552001953125, |
| "logps/rejected": -295.1143798828125, |
| "loss": 0.5266, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.4428759813308716, |
| "rewards/margins": 0.9541980028152466, |
| "rewards/rejected": -2.397073984146118, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.24600889819419, |
| "grad_norm": 8.614380836486816, |
| "learning_rate": 1.5085056267992673e-05, |
| "logits/chosen": -3.000847578048706, |
| "logits/rejected": -2.930838108062744, |
| "logps/chosen": -258.88580322265625, |
| "logps/rejected": -253.4350128173828, |
| "loss": 0.4261, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.52020263671875, |
| "rewards/margins": 1.513375997543335, |
| "rewards/rejected": -3.033578634262085, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.24731745616330802, |
| "grad_norm": 6.095282554626465, |
| "learning_rate": 1.5058885108610313e-05, |
| "logits/chosen": -2.8448147773742676, |
| "logits/rejected": -3.0329713821411133, |
| "logps/chosen": -328.8890075683594, |
| "logps/rejected": -273.93780517578125, |
| "loss": 0.6222, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.6622378826141357, |
| "rewards/margins": 1.036049723625183, |
| "rewards/rejected": -2.6982877254486084, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.24862601413242608, |
| "grad_norm": 7.413900852203369, |
| "learning_rate": 1.5032713949227953e-05, |
| "logits/chosen": -3.015958547592163, |
| "logits/rejected": -3.03420090675354, |
| "logps/chosen": -283.6328430175781, |
| "logps/rejected": -314.6015625, |
| "loss": 0.5066, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.7141430377960205, |
| "rewards/margins": 1.4117655754089355, |
| "rewards/rejected": -3.125908613204956, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.2499345721015441, |
| "grad_norm": 9.373759269714355, |
| "learning_rate": 1.500654278984559e-05, |
| "logits/chosen": -3.109154224395752, |
| "logits/rejected": -3.100782871246338, |
| "logps/chosen": -352.9435729980469, |
| "logps/rejected": -289.6947326660156, |
| "loss": 0.662, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -2.0580523014068604, |
| "rewards/margins": 1.144476294517517, |
| "rewards/rejected": -3.202528715133667, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.2512431300706621, |
| "grad_norm": 7.236301898956299, |
| "learning_rate": 1.498037163046323e-05, |
| "logits/chosen": -3.0416085720062256, |
| "logits/rejected": -2.942519426345825, |
| "logps/chosen": -284.47186279296875, |
| "logps/rejected": -271.6186218261719, |
| "loss": 0.5027, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -2.4203197956085205, |
| "rewards/margins": 1.4996702671051025, |
| "rewards/rejected": -3.919990062713623, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.2525516880397802, |
| "grad_norm": 8.457993507385254, |
| "learning_rate": 1.495420047108087e-05, |
| "logits/chosen": -3.023918628692627, |
| "logits/rejected": -3.0495004653930664, |
| "logps/chosen": -327.3899841308594, |
| "logps/rejected": -347.85992431640625, |
| "loss": 0.4475, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -2.4540486335754395, |
| "rewards/margins": 1.992296814918518, |
| "rewards/rejected": -4.446345329284668, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.25386024600889817, |
| "grad_norm": 4.348357677459717, |
| "learning_rate": 1.4928029311698508e-05, |
| "logits/chosen": -3.017470359802246, |
| "logits/rejected": -3.073237895965576, |
| "logps/chosen": -384.67791748046875, |
| "logps/rejected": -363.98028564453125, |
| "loss": 0.4786, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.603308916091919, |
| "rewards/margins": 1.511370301246643, |
| "rewards/rejected": -4.114678859710693, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.2551688039780162, |
| "grad_norm": 8.246343612670898, |
| "learning_rate": 1.4901858152316149e-05, |
| "logits/chosen": -3.0291495323181152, |
| "logits/rejected": -3.1045758724212646, |
| "logps/chosen": -331.7242736816406, |
| "logps/rejected": -312.28240966796875, |
| "loss": 0.3814, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -2.5157318115234375, |
| "rewards/margins": 1.9870193004608154, |
| "rewards/rejected": -4.502751350402832, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.2564773619471343, |
| "grad_norm": 5.465985298156738, |
| "learning_rate": 1.4875686992933789e-05, |
| "logits/chosen": -2.9358432292938232, |
| "logits/rejected": -2.9799187183380127, |
| "logps/chosen": -305.95867919921875, |
| "logps/rejected": -268.3275146484375, |
| "loss": 0.5552, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -2.049027681350708, |
| "rewards/margins": 1.7989801168441772, |
| "rewards/rejected": -3.848007917404175, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.25778591991625227, |
| "grad_norm": 10.069610595703125, |
| "learning_rate": 1.4849515833551427e-05, |
| "logits/chosen": -2.8917248249053955, |
| "logits/rejected": -3.050568103790283, |
| "logps/chosen": -314.1599426269531, |
| "logps/rejected": -358.89093017578125, |
| "loss": 0.4803, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.7643229961395264, |
| "rewards/margins": 1.844347596168518, |
| "rewards/rejected": -3.608670711517334, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.2590944778853703, |
| "grad_norm": 7.225062370300293, |
| "learning_rate": 1.4823344674169067e-05, |
| "logits/chosen": -3.099228620529175, |
| "logits/rejected": -3.1308112144470215, |
| "logps/chosen": -294.34185791015625, |
| "logps/rejected": -274.89483642578125, |
| "loss": 0.687, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.4007724523544312, |
| "rewards/margins": 0.9705973863601685, |
| "rewards/rejected": -2.3713696002960205, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.2604030358544884, |
| "grad_norm": 4.582951068878174, |
| "learning_rate": 1.4797173514786707e-05, |
| "logits/chosen": -2.9205706119537354, |
| "logits/rejected": -2.8099303245544434, |
| "logps/chosen": -232.9236297607422, |
| "logps/rejected": -277.83404541015625, |
| "loss": 0.5797, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.636836051940918, |
| "rewards/margins": 0.9393714666366577, |
| "rewards/rejected": -2.5762076377868652, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.26171159382360637, |
| "grad_norm": 7.905261516571045, |
| "learning_rate": 1.4771002355404345e-05, |
| "logits/chosen": -3.0076744556427, |
| "logits/rejected": -3.063244581222534, |
| "logps/chosen": -247.02780151367188, |
| "logps/rejected": -313.2117919921875, |
| "loss": 0.4223, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.349962830543518, |
| "rewards/margins": 1.5588710308074951, |
| "rewards/rejected": -2.9088339805603027, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2630201517927244, |
| "grad_norm": 6.2216644287109375, |
| "learning_rate": 1.4744831196021986e-05, |
| "logits/chosen": -2.932487964630127, |
| "logits/rejected": -3.0266571044921875, |
| "logps/chosen": -322.1878356933594, |
| "logps/rejected": -319.5711975097656, |
| "loss": 0.5979, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.4996315240859985, |
| "rewards/margins": 1.2307758331298828, |
| "rewards/rejected": -2.730407238006592, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.2643287097618425, |
| "grad_norm": 9.049347877502441, |
| "learning_rate": 1.4718660036639626e-05, |
| "logits/chosen": -3.0737674236297607, |
| "logits/rejected": -3.073376178741455, |
| "logps/chosen": -263.8631896972656, |
| "logps/rejected": -273.49334716796875, |
| "loss": 0.6037, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.218382477760315, |
| "rewards/margins": 1.3037409782409668, |
| "rewards/rejected": -2.5221235752105713, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.26563726773096047, |
| "grad_norm": 8.119050025939941, |
| "learning_rate": 1.4692488877257262e-05, |
| "logits/chosen": -3.072826623916626, |
| "logits/rejected": -3.0864005088806152, |
| "logps/chosen": -239.6600341796875, |
| "logps/rejected": -260.7843933105469, |
| "loss": 0.4576, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.3692705631256104, |
| "rewards/margins": 1.317340612411499, |
| "rewards/rejected": -2.6866109371185303, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.2669458257000785, |
| "grad_norm": 6.507713317871094, |
| "learning_rate": 1.4666317717874902e-05, |
| "logits/chosen": -3.034135341644287, |
| "logits/rejected": -3.121675729751587, |
| "logps/chosen": -296.17181396484375, |
| "logps/rejected": -308.36956787109375, |
| "loss": 0.5734, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.3819348812103271, |
| "rewards/margins": 0.8500627279281616, |
| "rewards/rejected": -2.2319977283477783, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.26825438366919657, |
| "grad_norm": 7.960349082946777, |
| "learning_rate": 1.4640146558492542e-05, |
| "logits/chosen": -2.9665277004241943, |
| "logits/rejected": -3.0263547897338867, |
| "logps/chosen": -286.6892395019531, |
| "logps/rejected": -301.81182861328125, |
| "loss": 0.519, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.1299912929534912, |
| "rewards/margins": 1.5969384908676147, |
| "rewards/rejected": -2.7269299030303955, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.26956294163831457, |
| "grad_norm": 8.107889175415039, |
| "learning_rate": 1.461397539911018e-05, |
| "logits/chosen": -2.971383571624756, |
| "logits/rejected": -3.0405492782592773, |
| "logps/chosen": -300.4283752441406, |
| "logps/rejected": -304.0221252441406, |
| "loss": 0.485, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.752074122428894, |
| "rewards/margins": 1.4214041233062744, |
| "rewards/rejected": -2.173478364944458, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.2708714996074326, |
| "grad_norm": 4.233426094055176, |
| "learning_rate": 1.458780423972782e-05, |
| "logits/chosen": -2.9646596908569336, |
| "logits/rejected": -2.9252395629882812, |
| "logps/chosen": -302.5656433105469, |
| "logps/rejected": -310.8838195800781, |
| "loss": 0.4718, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.9153076410293579, |
| "rewards/margins": 1.480229139328003, |
| "rewards/rejected": -2.3955366611480713, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.2721800575765506, |
| "grad_norm": 7.866308689117432, |
| "learning_rate": 1.4561633080345461e-05, |
| "logits/chosen": -3.0362677574157715, |
| "logits/rejected": -3.028848886489868, |
| "logps/chosen": -285.7720947265625, |
| "logps/rejected": -300.82073974609375, |
| "loss": 0.5479, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.9976667165756226, |
| "rewards/margins": 1.399741530418396, |
| "rewards/rejected": -2.3974080085754395, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.27348861554566867, |
| "grad_norm": 6.74353551864624, |
| "learning_rate": 1.4535461920963101e-05, |
| "logits/chosen": -3.0779507160186768, |
| "logits/rejected": -3.0220227241516113, |
| "logps/chosen": -300.1296691894531, |
| "logps/rejected": -300.6394348144531, |
| "loss": 0.6002, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.1509724855422974, |
| "rewards/margins": 1.479297161102295, |
| "rewards/rejected": -2.6302695274353027, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.2747971735147867, |
| "grad_norm": 5.20580530166626, |
| "learning_rate": 1.450929076158074e-05, |
| "logits/chosen": -3.0654895305633545, |
| "logits/rejected": -3.1023030281066895, |
| "logps/chosen": -315.8076171875, |
| "logps/rejected": -290.9166564941406, |
| "loss": 0.4704, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1931313276290894, |
| "rewards/margins": 1.4464247226715088, |
| "rewards/rejected": -2.6395561695098877, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2761057314839047, |
| "grad_norm": 10.180657386779785, |
| "learning_rate": 1.448311960219838e-05, |
| "logits/chosen": -3.0087831020355225, |
| "logits/rejected": -3.116145372390747, |
| "logps/chosen": -279.8875732421875, |
| "logps/rejected": -296.86968994140625, |
| "loss": 0.6297, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -1.051395297050476, |
| "rewards/margins": 1.0588595867156982, |
| "rewards/rejected": -2.110255002975464, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.27741428945302277, |
| "grad_norm": 8.839851379394531, |
| "learning_rate": 1.445694844281602e-05, |
| "logits/chosen": -3.0243630409240723, |
| "logits/rejected": -3.0204248428344727, |
| "logps/chosen": -283.5610046386719, |
| "logps/rejected": -266.9760437011719, |
| "loss": 0.5281, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.0122487545013428, |
| "rewards/margins": 1.085057020187378, |
| "rewards/rejected": -2.0973057746887207, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.2787228474221408, |
| "grad_norm": 11.794388771057129, |
| "learning_rate": 1.4430777283433656e-05, |
| "logits/chosen": -3.078611373901367, |
| "logits/rejected": -3.032027006149292, |
| "logps/chosen": -295.5868225097656, |
| "logps/rejected": -251.0193634033203, |
| "loss": 0.6133, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.292945384979248, |
| "rewards/margins": 0.9534968137741089, |
| "rewards/rejected": -2.2464423179626465, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.2800314053912588, |
| "grad_norm": 7.8189778327941895, |
| "learning_rate": 1.4404606124051296e-05, |
| "logits/chosen": -3.0526416301727295, |
| "logits/rejected": -2.993549346923828, |
| "logps/chosen": -308.3221740722656, |
| "logps/rejected": -313.1771240234375, |
| "loss": 0.4449, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.091350793838501, |
| "rewards/margins": 1.4200166463851929, |
| "rewards/rejected": -2.5113673210144043, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.28133996336037687, |
| "grad_norm": 5.642181396484375, |
| "learning_rate": 1.4378434964668936e-05, |
| "logits/chosen": -2.9637482166290283, |
| "logits/rejected": -2.9531664848327637, |
| "logps/chosen": -280.0079040527344, |
| "logps/rejected": -269.2755432128906, |
| "loss": 0.5285, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.3164184093475342, |
| "rewards/margins": 1.267393708229065, |
| "rewards/rejected": -2.5838122367858887, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.2826485213294949, |
| "grad_norm": 7.877563953399658, |
| "learning_rate": 1.4352263805286575e-05, |
| "logits/chosen": -2.9859046936035156, |
| "logits/rejected": -3.0330402851104736, |
| "logps/chosen": -278.54620361328125, |
| "logps/rejected": -315.25311279296875, |
| "loss": 0.5361, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.5659092664718628, |
| "rewards/margins": 1.165024995803833, |
| "rewards/rejected": -2.7309341430664062, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.2839570792986129, |
| "grad_norm": 8.220552444458008, |
| "learning_rate": 1.4326092645904215e-05, |
| "logits/chosen": -3.093210458755493, |
| "logits/rejected": -3.0550553798675537, |
| "logps/chosen": -265.4081726074219, |
| "logps/rejected": -305.39404296875, |
| "loss": 0.5215, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.9443448781967163, |
| "rewards/margins": 1.2935254573822021, |
| "rewards/rejected": -3.237870454788208, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.28526563726773096, |
| "grad_norm": 5.695536136627197, |
| "learning_rate": 1.4299921486521855e-05, |
| "logits/chosen": -2.920870065689087, |
| "logits/rejected": -3.0504040718078613, |
| "logps/chosen": -290.29327392578125, |
| "logps/rejected": -273.7704162597656, |
| "loss": 0.4375, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.6533771753311157, |
| "rewards/margins": 1.6679798364639282, |
| "rewards/rejected": -3.3213565349578857, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.286574195236849, |
| "grad_norm": 6.138227939605713, |
| "learning_rate": 1.4273750327139493e-05, |
| "logits/chosen": -3.021458148956299, |
| "logits/rejected": -3.085855484008789, |
| "logps/chosen": -271.1431884765625, |
| "logps/rejected": -241.7474365234375, |
| "loss": 0.4496, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.505995512008667, |
| "rewards/margins": 1.7411110401153564, |
| "rewards/rejected": -3.2471065521240234, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.287882753205967, |
| "grad_norm": 6.326923847198486, |
| "learning_rate": 1.4247579167757133e-05, |
| "logits/chosen": -2.997809886932373, |
| "logits/rejected": -3.044062376022339, |
| "logps/chosen": -252.74203491210938, |
| "logps/rejected": -284.060546875, |
| "loss": 0.4742, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.7201992273330688, |
| "rewards/margins": 1.2570292949676514, |
| "rewards/rejected": -2.9772286415100098, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.28919131117508506, |
| "grad_norm": 9.969533920288086, |
| "learning_rate": 1.4221408008374773e-05, |
| "logits/chosen": -2.8809401988983154, |
| "logits/rejected": -3.0844621658325195, |
| "logps/chosen": -296.0122375488281, |
| "logps/rejected": -286.38775634765625, |
| "loss": 0.606, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.8422445058822632, |
| "rewards/margins": 1.1577017307281494, |
| "rewards/rejected": -2.999946117401123, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.2904998691442031, |
| "grad_norm": 6.7198591232299805, |
| "learning_rate": 1.4195236848992412e-05, |
| "logits/chosen": -3.018711566925049, |
| "logits/rejected": -3.08083438873291, |
| "logps/chosen": -279.4359436035156, |
| "logps/rejected": -273.54571533203125, |
| "loss": 0.3774, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.371663212776184, |
| "rewards/margins": 2.2079691886901855, |
| "rewards/rejected": -3.579632520675659, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.2918084271133211, |
| "grad_norm": 12.09412956237793, |
| "learning_rate": 1.4169065689610052e-05, |
| "logits/chosen": -3.0689175128936768, |
| "logits/rejected": -3.059790849685669, |
| "logps/chosen": -310.78375244140625, |
| "logps/rejected": -268.98004150390625, |
| "loss": 0.5401, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.4807078838348389, |
| "rewards/margins": 1.5228500366210938, |
| "rewards/rejected": -3.0035579204559326, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.29311698508243916, |
| "grad_norm": 6.893748760223389, |
| "learning_rate": 1.4142894530227692e-05, |
| "logits/chosen": -3.032919406890869, |
| "logits/rejected": -3.0950205326080322, |
| "logps/chosen": -242.1670379638672, |
| "logps/rejected": -239.2816619873047, |
| "loss": 0.5792, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.5399396419525146, |
| "rewards/margins": 1.3476312160491943, |
| "rewards/rejected": -2.887570858001709, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.29442554305155716, |
| "grad_norm": 7.757107734680176, |
| "learning_rate": 1.4116723370845328e-05, |
| "logits/chosen": -2.9414639472961426, |
| "logits/rejected": -3.0114121437072754, |
| "logps/chosen": -257.2752380371094, |
| "logps/rejected": -283.72088623046875, |
| "loss": 0.5001, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.2555142641067505, |
| "rewards/margins": 1.4964978694915771, |
| "rewards/rejected": -2.752012252807617, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.2957341010206752, |
| "grad_norm": 10.208292961120605, |
| "learning_rate": 1.4090552211462969e-05, |
| "logits/chosen": -2.973788261413574, |
| "logits/rejected": -3.0658910274505615, |
| "logps/chosen": -373.2972717285156, |
| "logps/rejected": -342.21929931640625, |
| "loss": 0.5265, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.2128429412841797, |
| "rewards/margins": 1.3734642267227173, |
| "rewards/rejected": -2.5863070487976074, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.29704265898979326, |
| "grad_norm": 9.619853973388672, |
| "learning_rate": 1.4064381052080609e-05, |
| "logits/chosen": -2.830082893371582, |
| "logits/rejected": -2.9827170372009277, |
| "logps/chosen": -301.67999267578125, |
| "logps/rejected": -302.10540771484375, |
| "loss": 0.6362, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.3880640268325806, |
| "rewards/margins": 1.111306071281433, |
| "rewards/rejected": -2.4993700981140137, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.29835121695891126, |
| "grad_norm": 7.463226795196533, |
| "learning_rate": 1.4038209892698247e-05, |
| "logits/chosen": -3.006840229034424, |
| "logits/rejected": -3.0112249851226807, |
| "logps/chosen": -324.7442932128906, |
| "logps/rejected": -299.2283630371094, |
| "loss": 0.4186, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.5023479461669922, |
| "rewards/margins": 1.465083360671997, |
| "rewards/rejected": -2.9674313068389893, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.2996597749280293, |
| "grad_norm": 8.654081344604492, |
| "learning_rate": 1.4012038733315887e-05, |
| "logits/chosen": -2.949709415435791, |
| "logits/rejected": -2.9855611324310303, |
| "logps/chosen": -293.55523681640625, |
| "logps/rejected": -316.7439880371094, |
| "loss": 0.4152, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.4069695472717285, |
| "rewards/margins": 1.7172313928604126, |
| "rewards/rejected": -3.1242008209228516, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.30096833289714736, |
| "grad_norm": 5.447267532348633, |
| "learning_rate": 1.3985867573933527e-05, |
| "logits/chosen": -2.9884190559387207, |
| "logits/rejected": -3.067103862762451, |
| "logps/chosen": -300.8186950683594, |
| "logps/rejected": -308.9688415527344, |
| "loss": 0.3237, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.4933931827545166, |
| "rewards/margins": 2.1875128746032715, |
| "rewards/rejected": -3.680905818939209, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.30227689086626536, |
| "grad_norm": 7.315623760223389, |
| "learning_rate": 1.3959696414551165e-05, |
| "logits/chosen": -2.877784013748169, |
| "logits/rejected": -2.967470645904541, |
| "logps/chosen": -245.8937530517578, |
| "logps/rejected": -253.87759399414062, |
| "loss": 0.5557, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.9919893741607666, |
| "rewards/margins": 1.786003828048706, |
| "rewards/rejected": -3.7779934406280518, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.3035854488353834, |
| "grad_norm": 9.264678955078125, |
| "learning_rate": 1.3933525255168806e-05, |
| "logits/chosen": -2.937941074371338, |
| "logits/rejected": -3.082321882247925, |
| "logps/chosen": -267.06365966796875, |
| "logps/rejected": -274.55108642578125, |
| "loss": 0.5468, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.7004693746566772, |
| "rewards/margins": 2.156771183013916, |
| "rewards/rejected": -3.8572402000427246, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.30489400680450146, |
| "grad_norm": 5.902349472045898, |
| "learning_rate": 1.3907354095786446e-05, |
| "logits/chosen": -3.035977602005005, |
| "logits/rejected": -3.0683465003967285, |
| "logps/chosen": -292.7044982910156, |
| "logps/rejected": -259.0909118652344, |
| "loss": 0.4338, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.780005693435669, |
| "rewards/margins": 2.0152504444122314, |
| "rewards/rejected": -3.7952563762664795, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.30620256477361946, |
| "grad_norm": 4.958015441894531, |
| "learning_rate": 1.3881182936404082e-05, |
| "logits/chosen": -2.9945685863494873, |
| "logits/rejected": -3.0434579849243164, |
| "logps/chosen": -329.266357421875, |
| "logps/rejected": -357.74798583984375, |
| "loss": 0.4398, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.4882758855819702, |
| "rewards/margins": 1.6685161590576172, |
| "rewards/rejected": -3.156792163848877, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.3075111227427375, |
| "grad_norm": 7.654452323913574, |
| "learning_rate": 1.3855011777021722e-05, |
| "logits/chosen": -3.058605670928955, |
| "logits/rejected": -3.0306811332702637, |
| "logps/chosen": -263.643310546875, |
| "logps/rejected": -307.02520751953125, |
| "loss": 0.6095, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.639232873916626, |
| "rewards/margins": 1.4032961130142212, |
| "rewards/rejected": -3.0425288677215576, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.30881968071185556, |
| "grad_norm": 5.414381504058838, |
| "learning_rate": 1.3828840617639362e-05, |
| "logits/chosen": -3.0163514614105225, |
| "logits/rejected": -3.0375030040740967, |
| "logps/chosen": -300.1225891113281, |
| "logps/rejected": -287.8301696777344, |
| "loss": 0.5153, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.1381218433380127, |
| "rewards/margins": 1.5844132900238037, |
| "rewards/rejected": -2.7225348949432373, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.31012823868097356, |
| "grad_norm": 7.217351913452148, |
| "learning_rate": 1.3802669458257e-05, |
| "logits/chosen": -2.8652729988098145, |
| "logits/rejected": -2.965222120285034, |
| "logps/chosen": -316.91094970703125, |
| "logps/rejected": -331.66265869140625, |
| "loss": 0.5718, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.3781057596206665, |
| "rewards/margins": 1.089707612991333, |
| "rewards/rejected": -2.467813014984131, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.3114367966500916, |
| "grad_norm": 8.356551170349121, |
| "learning_rate": 1.377649829887464e-05, |
| "logits/chosen": -3.049598217010498, |
| "logits/rejected": -3.1053690910339355, |
| "logps/chosen": -315.70916748046875, |
| "logps/rejected": -300.895751953125, |
| "loss": 0.4796, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.1529004573822021, |
| "rewards/margins": 1.447994589805603, |
| "rewards/rejected": -2.6008951663970947, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.3127453546192096, |
| "grad_norm": 10.391530990600586, |
| "learning_rate": 1.3750327139492281e-05, |
| "logits/chosen": -3.0718863010406494, |
| "logits/rejected": -3.013375759124756, |
| "logps/chosen": -283.4698181152344, |
| "logps/rejected": -284.2854309082031, |
| "loss": 0.6442, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -1.585439920425415, |
| "rewards/margins": 1.2667933702468872, |
| "rewards/rejected": -2.8522331714630127, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.31405391258832765, |
| "grad_norm": 10.304366111755371, |
| "learning_rate": 1.372415598010992e-05, |
| "logits/chosen": -2.875305414199829, |
| "logits/rejected": -2.9016032218933105, |
| "logps/chosen": -338.66583251953125, |
| "logps/rejected": -296.04669189453125, |
| "loss": 0.5551, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.2871284484863281, |
| "rewards/margins": 1.2437200546264648, |
| "rewards/rejected": -2.530848264694214, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.31405391258832765, |
| "eval_logits/chosen": -3.0260488986968994, |
| "eval_logits/rejected": -3.0437211990356445, |
| "eval_logps/chosen": -299.2873840332031, |
| "eval_logps/rejected": -293.77947998046875, |
| "eval_loss": 0.5235101580619812, |
| "eval_rewards/accuracies": 0.7335000038146973, |
| "eval_rewards/chosen": -1.6054028272628784, |
| "eval_rewards/margins": 1.3790825605392456, |
| "eval_rewards/rejected": -2.984485387802124, |
| "eval_runtime": 763.0817, |
| "eval_samples_per_second": 2.621, |
| "eval_steps_per_second": 0.328, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3153624705574457, |
| "grad_norm": 7.599167346954346, |
| "learning_rate": 1.369798482072756e-05, |
| "logits/chosen": -2.9609599113464355, |
| "logits/rejected": -3.05143666267395, |
| "logps/chosen": -327.25323486328125, |
| "logps/rejected": -286.7802429199219, |
| "loss": 0.4555, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.5968466997146606, |
| "rewards/margins": 1.700269103050232, |
| "rewards/rejected": -3.2971160411834717, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.3166710285265637, |
| "grad_norm": 6.793883323669434, |
| "learning_rate": 1.36718136613452e-05, |
| "logits/chosen": -3.022416591644287, |
| "logits/rejected": -3.0568621158599854, |
| "logps/chosen": -291.0765075683594, |
| "logps/rejected": -259.074951171875, |
| "loss": 0.5201, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.7141826152801514, |
| "rewards/margins": 1.5414519309997559, |
| "rewards/rejected": -3.255634307861328, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.31797958649568175, |
| "grad_norm": 3.9788708686828613, |
| "learning_rate": 1.364564250196284e-05, |
| "logits/chosen": -2.8990914821624756, |
| "logits/rejected": -2.990156888961792, |
| "logps/chosen": -347.00177001953125, |
| "logps/rejected": -328.04791259765625, |
| "loss": 0.3949, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.5140724182128906, |
| "rewards/margins": 2.129075527191162, |
| "rewards/rejected": -3.6431479454040527, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.3192881444647998, |
| "grad_norm": 7.489226818084717, |
| "learning_rate": 1.3619471342580476e-05, |
| "logits/chosen": -3.049175977706909, |
| "logits/rejected": -3.0629820823669434, |
| "logps/chosen": -338.41302490234375, |
| "logps/rejected": -340.4408264160156, |
| "loss": 0.4952, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.8001915216445923, |
| "rewards/margins": 1.876043677330017, |
| "rewards/rejected": -3.6762351989746094, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.3205967024339178, |
| "grad_norm": 4.567310333251953, |
| "learning_rate": 1.3593300183198118e-05, |
| "logits/chosen": -2.9615187644958496, |
| "logits/rejected": -3.008460521697998, |
| "logps/chosen": -295.421142578125, |
| "logps/rejected": -277.5303039550781, |
| "loss": 0.4912, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8743877410888672, |
| "rewards/margins": 1.695482850074768, |
| "rewards/rejected": -3.5698704719543457, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.32190526040303585, |
| "grad_norm": 8.410323143005371, |
| "learning_rate": 1.3567129023815758e-05, |
| "logits/chosen": -3.09279465675354, |
| "logits/rejected": -3.0178253650665283, |
| "logps/chosen": -343.17034912109375, |
| "logps/rejected": -322.3753356933594, |
| "loss": 0.715, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.4411779642105103, |
| "rewards/margins": 1.4588241577148438, |
| "rewards/rejected": -2.9000020027160645, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.3232138183721539, |
| "grad_norm": 8.890939712524414, |
| "learning_rate": 1.3540957864433395e-05, |
| "logits/chosen": -3.0198092460632324, |
| "logits/rejected": -3.1039481163024902, |
| "logps/chosen": -303.96820068359375, |
| "logps/rejected": -279.45831298828125, |
| "loss": 0.6816, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -1.2015838623046875, |
| "rewards/margins": 1.0614570379257202, |
| "rewards/rejected": -2.2630410194396973, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.3245223763412719, |
| "grad_norm": 8.1312894821167, |
| "learning_rate": 1.3514786705051035e-05, |
| "logits/chosen": -3.032458782196045, |
| "logits/rejected": -3.077017068862915, |
| "logps/chosen": -297.26678466796875, |
| "logps/rejected": -258.3316650390625, |
| "loss": 0.5992, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.8163844347000122, |
| "rewards/margins": 1.0739367008209229, |
| "rewards/rejected": -1.8903210163116455, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.32583093431038995, |
| "grad_norm": 9.6901273727417, |
| "learning_rate": 1.3488615545668675e-05, |
| "logits/chosen": -3.017458200454712, |
| "logits/rejected": -3.1017868518829346, |
| "logps/chosen": -301.77484130859375, |
| "logps/rejected": -290.51373291015625, |
| "loss": 0.4924, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.7117307782173157, |
| "rewards/margins": 1.4712412357330322, |
| "rewards/rejected": -2.182971954345703, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.327139492279508, |
| "grad_norm": 10.449470520019531, |
| "learning_rate": 1.3462444386286313e-05, |
| "logits/chosen": -3.0146572589874268, |
| "logits/rejected": -3.053591012954712, |
| "logps/chosen": -311.7223815917969, |
| "logps/rejected": -352.6505432128906, |
| "loss": 0.5767, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.2211498022079468, |
| "rewards/margins": 1.2701174020767212, |
| "rewards/rejected": -2.491266965866089, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.328448050248626, |
| "grad_norm": 7.611923694610596, |
| "learning_rate": 1.3436273226903953e-05, |
| "logits/chosen": -3.033237934112549, |
| "logits/rejected": -3.0723347663879395, |
| "logps/chosen": -262.8450012207031, |
| "logps/rejected": -245.06201171875, |
| "loss": 0.4277, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.213969111442566, |
| "rewards/margins": 1.6178230047225952, |
| "rewards/rejected": -2.8317923545837402, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.32975660821774405, |
| "grad_norm": 10.214200973510742, |
| "learning_rate": 1.3410102067521593e-05, |
| "logits/chosen": -3.102858066558838, |
| "logits/rejected": -3.180795192718506, |
| "logps/chosen": -277.609130859375, |
| "logps/rejected": -312.61041259765625, |
| "loss": 0.5196, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.1733005046844482, |
| "rewards/margins": 1.6840053796768188, |
| "rewards/rejected": -2.8573060035705566, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.3310651661868621, |
| "grad_norm": 6.558178901672363, |
| "learning_rate": 1.3383930908139232e-05, |
| "logits/chosen": -3.1322267055511475, |
| "logits/rejected": -3.180755138397217, |
| "logps/chosen": -310.9773864746094, |
| "logps/rejected": -311.936767578125, |
| "loss": 0.5532, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.5742419958114624, |
| "rewards/margins": 1.6253976821899414, |
| "rewards/rejected": -3.1996397972106934, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.3323737241559801, |
| "grad_norm": 6.47090482711792, |
| "learning_rate": 1.3357759748756872e-05, |
| "logits/chosen": -3.085824966430664, |
| "logits/rejected": -3.1216368675231934, |
| "logps/chosen": -326.1288146972656, |
| "logps/rejected": -289.3221435546875, |
| "loss": 0.4169, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.6582863330841064, |
| "rewards/margins": 1.6689634323120117, |
| "rewards/rejected": -3.3272500038146973, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.33368228212509815, |
| "grad_norm": 5.296478748321533, |
| "learning_rate": 1.3331588589374512e-05, |
| "logits/chosen": -3.14270281791687, |
| "logits/rejected": -3.174440383911133, |
| "logps/chosen": -262.1026611328125, |
| "logps/rejected": -269.72503662109375, |
| "loss": 0.4051, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.4073164463043213, |
| "rewards/margins": 1.6645927429199219, |
| "rewards/rejected": -3.0719094276428223, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.33499084009421615, |
| "grad_norm": 5.415592670440674, |
| "learning_rate": 1.3305417429992148e-05, |
| "logits/chosen": -3.004148006439209, |
| "logits/rejected": -3.151362657546997, |
| "logps/chosen": -307.1809997558594, |
| "logps/rejected": -300.0960998535156, |
| "loss": 0.4607, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.156396746635437, |
| "rewards/margins": 1.7337665557861328, |
| "rewards/rejected": -2.8901631832122803, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.3362993980633342, |
| "grad_norm": 8.379344940185547, |
| "learning_rate": 1.3279246270609789e-05, |
| "logits/chosen": -3.0923662185668945, |
| "logits/rejected": -3.1500816345214844, |
| "logps/chosen": -295.65936279296875, |
| "logps/rejected": -311.3245849609375, |
| "loss": 0.3996, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.008171558380127, |
| "rewards/margins": 1.8367351293563843, |
| "rewards/rejected": -2.84490704536438, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.33760795603245225, |
| "grad_norm": 9.645184516906738, |
| "learning_rate": 1.3253075111227429e-05, |
| "logits/chosen": -2.793687343597412, |
| "logits/rejected": -3.0188405513763428, |
| "logps/chosen": -317.2360534667969, |
| "logps/rejected": -257.33026123046875, |
| "loss": 0.5077, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.5827356576919556, |
| "rewards/margins": 1.593719482421875, |
| "rewards/rejected": -3.176455020904541, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.33891651400157025, |
| "grad_norm": 6.084274768829346, |
| "learning_rate": 1.3226903951845067e-05, |
| "logits/chosen": -3.1023552417755127, |
| "logits/rejected": -3.086031436920166, |
| "logps/chosen": -280.6290588378906, |
| "logps/rejected": -294.7873229980469, |
| "loss": 0.4734, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.627698540687561, |
| "rewards/margins": 1.7080399990081787, |
| "rewards/rejected": -3.33573842048645, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.3402250719706883, |
| "grad_norm": 5.004266262054443, |
| "learning_rate": 1.3200732792462707e-05, |
| "logits/chosen": -3.0339324474334717, |
| "logits/rejected": -2.9874372482299805, |
| "logps/chosen": -282.74798583984375, |
| "logps/rejected": -281.6959228515625, |
| "loss": 0.6804, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.755840539932251, |
| "rewards/margins": 1.4509661197662354, |
| "rewards/rejected": -3.2068066596984863, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.34153362993980635, |
| "grad_norm": 4.275951385498047, |
| "learning_rate": 1.3174561633080347e-05, |
| "logits/chosen": -3.0189881324768066, |
| "logits/rejected": -3.025322437286377, |
| "logps/chosen": -319.19903564453125, |
| "logps/rejected": -289.3116455078125, |
| "loss": 0.4103, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.3581633567810059, |
| "rewards/margins": 1.6812947988510132, |
| "rewards/rejected": -3.0394580364227295, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.34284218790892435, |
| "grad_norm": 7.874205112457275, |
| "learning_rate": 1.3148390473697985e-05, |
| "logits/chosen": -3.078430652618408, |
| "logits/rejected": -3.1446545124053955, |
| "logps/chosen": -335.8284606933594, |
| "logps/rejected": -299.3724060058594, |
| "loss": 0.3688, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.0086791515350342, |
| "rewards/margins": 1.6358997821807861, |
| "rewards/rejected": -2.644578695297241, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.3441507458780424, |
| "grad_norm": 7.652405738830566, |
| "learning_rate": 1.3122219314315626e-05, |
| "logits/chosen": -3.027405261993408, |
| "logits/rejected": -2.970729112625122, |
| "logps/chosen": -301.2220764160156, |
| "logps/rejected": -305.47210693359375, |
| "loss": 0.5549, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.775042176246643, |
| "rewards/margins": 1.3307900428771973, |
| "rewards/rejected": -3.105832099914551, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.34545930384716045, |
| "grad_norm": 6.982247829437256, |
| "learning_rate": 1.3096048154933266e-05, |
| "logits/chosen": -2.9840004444122314, |
| "logits/rejected": -3.0685667991638184, |
| "logps/chosen": -269.3052673339844, |
| "logps/rejected": -255.6454315185547, |
| "loss": 0.5786, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.399322509765625, |
| "rewards/margins": 1.243403434753418, |
| "rewards/rejected": -2.642725944519043, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.34676786181627844, |
| "grad_norm": 6.584117412567139, |
| "learning_rate": 1.3069876995550902e-05, |
| "logits/chosen": -2.9669992923736572, |
| "logits/rejected": -3.0108094215393066, |
| "logps/chosen": -289.9788818359375, |
| "logps/rejected": -285.2430419921875, |
| "loss": 0.5317, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.6822948455810547, |
| "rewards/margins": 1.5722761154174805, |
| "rewards/rejected": -3.2545711994171143, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.3480764197853965, |
| "grad_norm": 7.847695827484131, |
| "learning_rate": 1.3043705836168542e-05, |
| "logits/chosen": -2.9359116554260254, |
| "logits/rejected": -2.916821241378784, |
| "logps/chosen": -279.84820556640625, |
| "logps/rejected": -288.5198669433594, |
| "loss": 0.5384, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1366277933120728, |
| "rewards/margins": 1.640825629234314, |
| "rewards/rejected": -2.7774531841278076, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.34938497775451455, |
| "grad_norm": 9.220524787902832, |
| "learning_rate": 1.3017534676786182e-05, |
| "logits/chosen": -3.050198793411255, |
| "logits/rejected": -3.1035842895507812, |
| "logps/chosen": -271.7259826660156, |
| "logps/rejected": -224.1414794921875, |
| "loss": 0.5167, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.0262686014175415, |
| "rewards/margins": 1.3584415912628174, |
| "rewards/rejected": -2.3847105503082275, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.35069353572363254, |
| "grad_norm": 8.469073295593262, |
| "learning_rate": 1.299136351740382e-05, |
| "logits/chosen": -3.018068313598633, |
| "logits/rejected": -3.0691208839416504, |
| "logps/chosen": -305.9311218261719, |
| "logps/rejected": -271.46624755859375, |
| "loss": 0.5274, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.2072837352752686, |
| "rewards/margins": 1.4922826290130615, |
| "rewards/rejected": -2.69956636428833, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.3520020936927506, |
| "grad_norm": 8.611955642700195, |
| "learning_rate": 1.296519235802146e-05, |
| "logits/chosen": -2.9702281951904297, |
| "logits/rejected": -3.02331280708313, |
| "logps/chosen": -271.5640869140625, |
| "logps/rejected": -277.6800231933594, |
| "loss": 0.4417, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.3191261291503906, |
| "rewards/margins": 1.439081072807312, |
| "rewards/rejected": -2.758207082748413, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.35331065166186865, |
| "grad_norm": 9.181700706481934, |
| "learning_rate": 1.2939021198639101e-05, |
| "logits/chosen": -3.0127110481262207, |
| "logits/rejected": -3.1290624141693115, |
| "logps/chosen": -271.6927185058594, |
| "logps/rejected": -245.85733032226562, |
| "loss": 0.4842, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.475193738937378, |
| "rewards/margins": 1.6307731866836548, |
| "rewards/rejected": -3.1059670448303223, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.35461920963098664, |
| "grad_norm": 4.142209529876709, |
| "learning_rate": 1.291285003925674e-05, |
| "logits/chosen": -2.8960788249969482, |
| "logits/rejected": -2.8379464149475098, |
| "logps/chosen": -291.3749084472656, |
| "logps/rejected": -328.5154724121094, |
| "loss": 0.3166, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.1906328201293945, |
| "rewards/margins": 2.0907857418060303, |
| "rewards/rejected": -3.281418561935425, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.3559277676001047, |
| "grad_norm": 7.561165809631348, |
| "learning_rate": 1.288667887987438e-05, |
| "logits/chosen": -2.9457709789276123, |
| "logits/rejected": -3.032975673675537, |
| "logps/chosen": -313.8270568847656, |
| "logps/rejected": -306.0509338378906, |
| "loss": 0.4867, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.628435492515564, |
| "rewards/margins": 1.5076749324798584, |
| "rewards/rejected": -3.136110782623291, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.3572363255692227, |
| "grad_norm": 4.377691745758057, |
| "learning_rate": 1.286050772049202e-05, |
| "logits/chosen": -2.9811158180236816, |
| "logits/rejected": -3.0263185501098633, |
| "logps/chosen": -264.35443115234375, |
| "logps/rejected": -254.58731079101562, |
| "loss": 0.3929, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.4240038394927979, |
| "rewards/margins": 1.7809474468231201, |
| "rewards/rejected": -3.204951524734497, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.35854488353834074, |
| "grad_norm": 10.971863746643066, |
| "learning_rate": 1.283433656110966e-05, |
| "logits/chosen": -2.9616293907165527, |
| "logits/rejected": -3.0293776988983154, |
| "logps/chosen": -290.515625, |
| "logps/rejected": -293.56170654296875, |
| "loss": 0.7177, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -1.722092628479004, |
| "rewards/margins": 1.0726094245910645, |
| "rewards/rejected": -2.7947020530700684, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.3598534415074588, |
| "grad_norm": 6.668981075286865, |
| "learning_rate": 1.2808165401727298e-05, |
| "logits/chosen": -3.0443549156188965, |
| "logits/rejected": -3.052109479904175, |
| "logps/chosen": -285.5496826171875, |
| "logps/rejected": -281.496337890625, |
| "loss": 0.3757, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.2626408338546753, |
| "rewards/margins": 1.825859785079956, |
| "rewards/rejected": -3.0885009765625, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.3611619994765768, |
| "grad_norm": 8.7103910446167, |
| "learning_rate": 1.2781994242344938e-05, |
| "logits/chosen": -2.9803757667541504, |
| "logits/rejected": -3.045557737350464, |
| "logps/chosen": -335.71868896484375, |
| "logps/rejected": -303.4695739746094, |
| "loss": 0.4753, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.295375108718872, |
| "rewards/margins": 1.8782527446746826, |
| "rewards/rejected": -3.1736278533935547, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.36247055744569484, |
| "grad_norm": 6.965200424194336, |
| "learning_rate": 1.2755823082962578e-05, |
| "logits/chosen": -3.007631540298462, |
| "logits/rejected": -3.0434601306915283, |
| "logps/chosen": -306.3366394042969, |
| "logps/rejected": -323.5538330078125, |
| "loss": 0.5288, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.3271119594573975, |
| "rewards/margins": 1.4354439973831177, |
| "rewards/rejected": -2.7625560760498047, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.3637791154148129, |
| "grad_norm": 4.985629081726074, |
| "learning_rate": 1.2729651923580215e-05, |
| "logits/chosen": -2.9433679580688477, |
| "logits/rejected": -3.051299810409546, |
| "logps/chosen": -275.38519287109375, |
| "logps/rejected": -241.19888305664062, |
| "loss": 0.5702, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.9085711240768433, |
| "rewards/margins": 1.2991278171539307, |
| "rewards/rejected": -2.2076992988586426, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.3650876733839309, |
| "grad_norm": 7.742268085479736, |
| "learning_rate": 1.2703480764197855e-05, |
| "logits/chosen": -2.9358208179473877, |
| "logits/rejected": -2.947000026702881, |
| "logps/chosen": -286.78033447265625, |
| "logps/rejected": -308.0422058105469, |
| "loss": 0.6321, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.871289074420929, |
| "rewards/margins": 1.0567841529846191, |
| "rewards/rejected": -1.9280732870101929, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.36639623135304894, |
| "grad_norm": 7.653586387634277, |
| "learning_rate": 1.2677309604815495e-05, |
| "logits/chosen": -2.9908015727996826, |
| "logits/rejected": -2.9822075366973877, |
| "logps/chosen": -286.308837890625, |
| "logps/rejected": -272.92156982421875, |
| "loss": 0.5557, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.609847366809845, |
| "rewards/margins": 1.0760449171066284, |
| "rewards/rejected": -1.6858923435211182, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.367704789322167, |
| "grad_norm": 7.872668266296387, |
| "learning_rate": 1.2651138445433133e-05, |
| "logits/chosen": -3.0261340141296387, |
| "logits/rejected": -3.0703518390655518, |
| "logps/chosen": -290.16070556640625, |
| "logps/rejected": -282.38336181640625, |
| "loss": 0.6173, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.7330259084701538, |
| "rewards/margins": 1.229697585105896, |
| "rewards/rejected": -1.962723731994629, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.369013347291285, |
| "grad_norm": 11.86557388305664, |
| "learning_rate": 1.2624967286050773e-05, |
| "logits/chosen": -3.050842761993408, |
| "logits/rejected": -3.036444902420044, |
| "logps/chosen": -289.84393310546875, |
| "logps/rejected": -261.6834411621094, |
| "loss": 0.6361, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.7127243280410767, |
| "rewards/margins": 0.9847332835197449, |
| "rewards/rejected": -1.6974576711654663, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.37032190526040304, |
| "grad_norm": 12.028075218200684, |
| "learning_rate": 1.2598796126668413e-05, |
| "logits/chosen": -2.887280225753784, |
| "logits/rejected": -2.9627726078033447, |
| "logps/chosen": -277.523681640625, |
| "logps/rejected": -255.8815460205078, |
| "loss": 0.472, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7555484771728516, |
| "rewards/margins": 1.6585193872451782, |
| "rewards/rejected": -2.4140677452087402, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.3716304632295211, |
| "grad_norm": 6.440135955810547, |
| "learning_rate": 1.2572624967286052e-05, |
| "logits/chosen": -2.875539779663086, |
| "logits/rejected": -3.007462978363037, |
| "logps/chosen": -356.36224365234375, |
| "logps/rejected": -322.9365539550781, |
| "loss": 0.3654, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.6236860156059265, |
| "rewards/margins": 2.0594842433929443, |
| "rewards/rejected": -2.6831705570220947, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.3729390211986391, |
| "grad_norm": 7.960480690002441, |
| "learning_rate": 1.2546453807903692e-05, |
| "logits/chosen": -2.993530035018921, |
| "logits/rejected": -3.0551657676696777, |
| "logps/chosen": -304.01422119140625, |
| "logps/rejected": -298.3731384277344, |
| "loss": 0.6268, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.3483511209487915, |
| "rewards/margins": 1.3386003971099854, |
| "rewards/rejected": -2.6869513988494873, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.37424757916775714, |
| "grad_norm": 6.786019802093506, |
| "learning_rate": 1.2520282648521332e-05, |
| "logits/chosen": -3.001875400543213, |
| "logits/rejected": -3.0228638648986816, |
| "logps/chosen": -248.81704711914062, |
| "logps/rejected": -263.12738037109375, |
| "loss": 0.581, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.7031877040863037, |
| "rewards/margins": 1.1887766122817993, |
| "rewards/rejected": -2.8919644355773926, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.3755561371368752, |
| "grad_norm": 8.263105392456055, |
| "learning_rate": 1.2494111489138968e-05, |
| "logits/chosen": -3.038954973220825, |
| "logits/rejected": -3.062197685241699, |
| "logps/chosen": -313.73321533203125, |
| "logps/rejected": -283.0350646972656, |
| "loss": 0.619, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.5002611875534058, |
| "rewards/margins": 1.2015249729156494, |
| "rewards/rejected": -2.7017860412597656, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.3768646951059932, |
| "grad_norm": 6.546252727508545, |
| "learning_rate": 1.2467940329756609e-05, |
| "logits/chosen": -2.9777824878692627, |
| "logits/rejected": -3.0632386207580566, |
| "logps/chosen": -308.88482666015625, |
| "logps/rejected": -304.6534729003906, |
| "loss": 0.4268, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.5263839960098267, |
| "rewards/margins": 1.5267943143844604, |
| "rewards/rejected": -3.053178310394287, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.37817325307511124, |
| "grad_norm": 10.60797119140625, |
| "learning_rate": 1.2441769170374249e-05, |
| "logits/chosen": -2.8980348110198975, |
| "logits/rejected": -2.9805784225463867, |
| "logps/chosen": -300.85955810546875, |
| "logps/rejected": -279.0732116699219, |
| "loss": 0.5448, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.6329902410507202, |
| "rewards/margins": 1.6282199621200562, |
| "rewards/rejected": -3.2612102031707764, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.37948181104422923, |
| "grad_norm": 6.1115217208862305, |
| "learning_rate": 1.2415598010991887e-05, |
| "logits/chosen": -2.7547950744628906, |
| "logits/rejected": -2.8454792499542236, |
| "logps/chosen": -292.1466064453125, |
| "logps/rejected": -284.0655517578125, |
| "loss": 0.5144, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.6082839965820312, |
| "rewards/margins": 1.9727370738983154, |
| "rewards/rejected": -3.5810210704803467, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3807903690133473, |
| "grad_norm": 10.30879020690918, |
| "learning_rate": 1.2389426851609527e-05, |
| "logits/chosen": -2.8726394176483154, |
| "logits/rejected": -2.970806360244751, |
| "logps/chosen": -343.87860107421875, |
| "logps/rejected": -320.80615234375, |
| "loss": 0.4237, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.5264251232147217, |
| "rewards/margins": 2.020369052886963, |
| "rewards/rejected": -3.5467944145202637, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.38209892698246534, |
| "grad_norm": 6.011262893676758, |
| "learning_rate": 1.2363255692227167e-05, |
| "logits/chosen": -2.860525131225586, |
| "logits/rejected": -2.7346343994140625, |
| "logps/chosen": -310.897216796875, |
| "logps/rejected": -351.1201171875, |
| "loss": 0.5297, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8876367807388306, |
| "rewards/margins": 1.8191211223602295, |
| "rewards/rejected": -3.7067580223083496, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.38340748495158333, |
| "grad_norm": 6.5547566413879395, |
| "learning_rate": 1.2337084532844805e-05, |
| "logits/chosen": -2.7850310802459717, |
| "logits/rejected": -2.8178579807281494, |
| "logps/chosen": -280.5059814453125, |
| "logps/rejected": -279.82525634765625, |
| "loss": 0.4661, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.7109603881835938, |
| "rewards/margins": 1.4891334772109985, |
| "rewards/rejected": -3.2000937461853027, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.3847160429207014, |
| "grad_norm": 5.547062873840332, |
| "learning_rate": 1.2310913373462446e-05, |
| "logits/chosen": -2.890692710876465, |
| "logits/rejected": -2.9257652759552, |
| "logps/chosen": -237.19577026367188, |
| "logps/rejected": -270.4554748535156, |
| "loss": 0.4804, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.332466721534729, |
| "rewards/margins": 1.3955414295196533, |
| "rewards/rejected": -2.7280080318450928, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.38602460088981944, |
| "grad_norm": 7.388108253479004, |
| "learning_rate": 1.2284742214080086e-05, |
| "logits/chosen": -2.9597058296203613, |
| "logits/rejected": -2.9998531341552734, |
| "logps/chosen": -262.3876037597656, |
| "logps/rejected": -249.7728729248047, |
| "loss": 0.5758, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.322337031364441, |
| "rewards/margins": 1.3156559467315674, |
| "rewards/rejected": -2.6379926204681396, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.38733315885893743, |
| "grad_norm": 6.45510196685791, |
| "learning_rate": 1.2258571054697724e-05, |
| "logits/chosen": -3.0368587970733643, |
| "logits/rejected": -3.017752170562744, |
| "logps/chosen": -272.9659729003906, |
| "logps/rejected": -285.7885437011719, |
| "loss": 0.5161, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.1351183652877808, |
| "rewards/margins": 1.5519227981567383, |
| "rewards/rejected": -2.6870410442352295, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.3886417168280555, |
| "grad_norm": 4.70168399810791, |
| "learning_rate": 1.2232399895315364e-05, |
| "logits/chosen": -2.9754042625427246, |
| "logits/rejected": -3.0704946517944336, |
| "logps/chosen": -308.5724792480469, |
| "logps/rejected": -273.3597412109375, |
| "loss": 0.4308, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.7960319519042969, |
| "rewards/margins": 1.450454592704773, |
| "rewards/rejected": -2.2464864253997803, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.38995027479717354, |
| "grad_norm": 7.209184169769287, |
| "learning_rate": 1.2206228735933004e-05, |
| "logits/chosen": -3.016166925430298, |
| "logits/rejected": -3.0774433612823486, |
| "logps/chosen": -293.2237243652344, |
| "logps/rejected": -322.7275390625, |
| "loss": 0.4859, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1815975904464722, |
| "rewards/margins": 1.4720970392227173, |
| "rewards/rejected": -2.6536946296691895, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.39125883276629153, |
| "grad_norm": 6.350897789001465, |
| "learning_rate": 1.218005757655064e-05, |
| "logits/chosen": -2.953706741333008, |
| "logits/rejected": -3.063767671585083, |
| "logps/chosen": -305.0714416503906, |
| "logps/rejected": -276.573486328125, |
| "loss": 0.4575, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.3502719402313232, |
| "rewards/margins": 1.5017601251602173, |
| "rewards/rejected": -2.852031946182251, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.3925673907354096, |
| "grad_norm": 8.479778289794922, |
| "learning_rate": 1.2153886417168281e-05, |
| "logits/chosen": -2.9702401161193848, |
| "logits/rejected": -2.8454244136810303, |
| "logps/chosen": -281.9237365722656, |
| "logps/rejected": -288.2297668457031, |
| "loss": 0.4503, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.5473536252975464, |
| "rewards/margins": 1.5539802312850952, |
| "rewards/rejected": -3.1013338565826416, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.39387594870452763, |
| "grad_norm": 6.467402935028076, |
| "learning_rate": 1.2127715257785921e-05, |
| "logits/chosen": -3.0142505168914795, |
| "logits/rejected": -3.0140280723571777, |
| "logps/chosen": -242.34933471679688, |
| "logps/rejected": -276.01068115234375, |
| "loss": 0.552, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.787334680557251, |
| "rewards/margins": 1.3136866092681885, |
| "rewards/rejected": -3.1010212898254395, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.39518450667364563, |
| "grad_norm": 4.064186096191406, |
| "learning_rate": 1.210154409840356e-05, |
| "logits/chosen": -3.0537848472595215, |
| "logits/rejected": -2.9990789890289307, |
| "logps/chosen": -273.3089904785156, |
| "logps/rejected": -318.97015380859375, |
| "loss": 0.6266, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.3775596618652344, |
| "rewards/margins": 1.1523305177688599, |
| "rewards/rejected": -2.529890298843384, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.3964930646427637, |
| "grad_norm": 10.651188850402832, |
| "learning_rate": 1.20753729390212e-05, |
| "logits/chosen": -3.0355381965637207, |
| "logits/rejected": -3.023265838623047, |
| "logps/chosen": -302.71124267578125, |
| "logps/rejected": -282.6175537109375, |
| "loss": 0.5721, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.462561011314392, |
| "rewards/margins": 1.2974321842193604, |
| "rewards/rejected": -2.759993076324463, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.39780162261188173, |
| "grad_norm": 6.771826267242432, |
| "learning_rate": 1.204920177963884e-05, |
| "logits/chosen": -3.036020040512085, |
| "logits/rejected": -3.0711162090301514, |
| "logps/chosen": -354.17230224609375, |
| "logps/rejected": -338.1542053222656, |
| "loss": 0.4863, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.067155361175537, |
| "rewards/margins": 1.3505350351333618, |
| "rewards/rejected": -2.4176905155181885, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.39911018058099973, |
| "grad_norm": 6.441925048828125, |
| "learning_rate": 1.2023030620256478e-05, |
| "logits/chosen": -3.0521793365478516, |
| "logits/rejected": -3.059692859649658, |
| "logps/chosen": -253.529296875, |
| "logps/rejected": -257.99017333984375, |
| "loss": 0.4195, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.8811559677124023, |
| "rewards/margins": 1.760744333267212, |
| "rewards/rejected": -2.6419005393981934, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.4004187385501178, |
| "grad_norm": 10.745322227478027, |
| "learning_rate": 1.1996859460874118e-05, |
| "logits/chosen": -3.0835869312286377, |
| "logits/rejected": -3.089754581451416, |
| "logps/chosen": -272.4412536621094, |
| "logps/rejected": -269.2596740722656, |
| "loss": 0.6076, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.4502677917480469, |
| "rewards/margins": 0.9978004693984985, |
| "rewards/rejected": -2.448068857192993, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.4017272965192358, |
| "grad_norm": 7.261354446411133, |
| "learning_rate": 1.1970688301491758e-05, |
| "logits/chosen": -2.9888062477111816, |
| "logits/rejected": -2.9264347553253174, |
| "logps/chosen": -276.4285888671875, |
| "logps/rejected": -300.2745056152344, |
| "loss": 0.5551, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.3649812936782837, |
| "rewards/margins": 1.3235492706298828, |
| "rewards/rejected": -2.688530445098877, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.40303585448835383, |
| "grad_norm": 10.943098068237305, |
| "learning_rate": 1.1944517142109398e-05, |
| "logits/chosen": -2.9183526039123535, |
| "logits/rejected": -2.9452197551727295, |
| "logps/chosen": -241.52294921875, |
| "logps/rejected": -289.0009765625, |
| "loss": 0.5724, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.5533595085144043, |
| "rewards/margins": 1.3406226634979248, |
| "rewards/rejected": -2.893982172012329, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.4043444124574719, |
| "grad_norm": 4.496222972869873, |
| "learning_rate": 1.1918345982727035e-05, |
| "logits/chosen": -2.9785215854644775, |
| "logits/rejected": -2.9904141426086426, |
| "logps/chosen": -242.4897918701172, |
| "logps/rejected": -264.7366027832031, |
| "loss": 0.5285, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.193482518196106, |
| "rewards/margins": 1.3687217235565186, |
| "rewards/rejected": -2.562204122543335, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.4056529704265899, |
| "grad_norm": 6.670289516448975, |
| "learning_rate": 1.1892174823344675e-05, |
| "logits/chosen": -2.9499192237854004, |
| "logits/rejected": -2.9757332801818848, |
| "logps/chosen": -264.1338806152344, |
| "logps/rejected": -285.5947265625, |
| "loss": 0.397, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.3419907093048096, |
| "rewards/margins": 1.739337682723999, |
| "rewards/rejected": -3.0813281536102295, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.4069615283957079, |
| "grad_norm": 8.708571434020996, |
| "learning_rate": 1.1866003663962315e-05, |
| "logits/chosen": -2.891721248626709, |
| "logits/rejected": -2.9555718898773193, |
| "logps/chosen": -292.3914489746094, |
| "logps/rejected": -285.41632080078125, |
| "loss": 0.5604, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.3459782600402832, |
| "rewards/margins": 1.5892525911331177, |
| "rewards/rejected": -2.9352307319641113, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.408270086364826, |
| "grad_norm": 7.487111568450928, |
| "learning_rate": 1.1839832504579953e-05, |
| "logits/chosen": -2.8305745124816895, |
| "logits/rejected": -2.858198642730713, |
| "logps/chosen": -298.46734619140625, |
| "logps/rejected": -303.1842346191406, |
| "loss": 0.5079, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.168373703956604, |
| "rewards/margins": 1.9308793544769287, |
| "rewards/rejected": -3.0992531776428223, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.409578644333944, |
| "grad_norm": 6.141557216644287, |
| "learning_rate": 1.1813661345197593e-05, |
| "logits/chosen": -2.9441263675689697, |
| "logits/rejected": -2.9874396324157715, |
| "logps/chosen": -328.82275390625, |
| "logps/rejected": -282.8536376953125, |
| "loss": 0.3869, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.0537731647491455, |
| "rewards/margins": 2.476039409637451, |
| "rewards/rejected": -3.5298125743865967, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.410887202303062, |
| "grad_norm": 7.910996913909912, |
| "learning_rate": 1.1787490185815233e-05, |
| "logits/chosen": -3.0016915798187256, |
| "logits/rejected": -2.9591012001037598, |
| "logps/chosen": -251.32180786132812, |
| "logps/rejected": -303.64031982421875, |
| "loss": 0.5086, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.980921983718872, |
| "rewards/margins": 1.729113221168518, |
| "rewards/rejected": -3.7100348472595215, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.4121957602721801, |
| "grad_norm": 6.531032562255859, |
| "learning_rate": 1.1761319026432872e-05, |
| "logits/chosen": -2.9189350605010986, |
| "logits/rejected": -2.996328353881836, |
| "logps/chosen": -271.4986572265625, |
| "logps/rejected": -244.9109344482422, |
| "loss": 0.4335, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.749506950378418, |
| "rewards/margins": 2.123915672302246, |
| "rewards/rejected": -3.8734230995178223, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.4135043182412981, |
| "grad_norm": 8.456581115722656, |
| "learning_rate": 1.1735147867050512e-05, |
| "logits/chosen": -3.066283941268921, |
| "logits/rejected": -3.0604310035705566, |
| "logps/chosen": -283.52801513671875, |
| "logps/rejected": -289.96990966796875, |
| "loss": 0.5287, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.6674515008926392, |
| "rewards/margins": 1.9076945781707764, |
| "rewards/rejected": -3.575146198272705, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.4148128762104161, |
| "grad_norm": 9.457367897033691, |
| "learning_rate": 1.1708976707668152e-05, |
| "logits/chosen": -2.846237897872925, |
| "logits/rejected": -2.9507431983947754, |
| "logps/chosen": -326.3301086425781, |
| "logps/rejected": -293.23577880859375, |
| "loss": 0.3963, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.5260112285614014, |
| "rewards/margins": 1.8521969318389893, |
| "rewards/rejected": -3.3782081604003906, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.4161214341795342, |
| "grad_norm": 5.837276458740234, |
| "learning_rate": 1.168280554828579e-05, |
| "logits/chosen": -2.9573163986206055, |
| "logits/rejected": -3.001347064971924, |
| "logps/chosen": -325.12139892578125, |
| "logps/rejected": -266.9141540527344, |
| "loss": 0.4964, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1669349670410156, |
| "rewards/margins": 1.709183931350708, |
| "rewards/rejected": -2.8761186599731445, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.4174299921486522, |
| "grad_norm": 7.193180561065674, |
| "learning_rate": 1.165663438890343e-05, |
| "logits/chosen": -2.921961784362793, |
| "logits/rejected": -2.9729673862457275, |
| "logps/chosen": -324.3099060058594, |
| "logps/rejected": -299.9588317871094, |
| "loss": 0.4785, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.1953010559082031, |
| "rewards/margins": 1.7333354949951172, |
| "rewards/rejected": -2.9286367893218994, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.4187385501177702, |
| "grad_norm": 9.289549827575684, |
| "learning_rate": 1.163046322952107e-05, |
| "logits/chosen": -3.059566020965576, |
| "logits/rejected": -3.0449271202087402, |
| "logps/chosen": -311.70147705078125, |
| "logps/rejected": -314.4573669433594, |
| "loss": 0.4369, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.9875648617744446, |
| "rewards/margins": 1.5768333673477173, |
| "rewards/rejected": -2.5643982887268066, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.4187385501177702, |
| "eval_logits/chosen": -3.0115966796875, |
| "eval_logits/rejected": -3.030435085296631, |
| "eval_logps/chosen": -295.0363464355469, |
| "eval_logps/rejected": -289.9421691894531, |
| "eval_loss": 0.5138216614723206, |
| "eval_rewards/accuracies": 0.7390000224113464, |
| "eval_rewards/chosen": -1.1802964210510254, |
| "eval_rewards/margins": 1.4204589128494263, |
| "eval_rewards/rejected": -2.600755453109741, |
| "eval_runtime": 763.512, |
| "eval_samples_per_second": 2.619, |
| "eval_steps_per_second": 0.327, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.4200471080868883, |
| "grad_norm": 5.8733625411987305, |
| "learning_rate": 1.1604292070138707e-05, |
| "logits/chosen": -2.9307186603546143, |
| "logits/rejected": -2.9098129272460938, |
| "logps/chosen": -273.51361083984375, |
| "logps/rejected": -329.70135498046875, |
| "loss": 0.4702, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.1704580783843994, |
| "rewards/margins": 1.3907597064971924, |
| "rewards/rejected": -2.561217784881592, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.4213556660560063, |
| "grad_norm": 10.016422271728516, |
| "learning_rate": 1.1578120910756347e-05, |
| "logits/chosen": -3.039794921875, |
| "logits/rejected": -3.1136136054992676, |
| "logps/chosen": -261.3140869140625, |
| "logps/rejected": -261.41033935546875, |
| "loss": 0.5391, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.0977901220321655, |
| "rewards/margins": 1.2529916763305664, |
| "rewards/rejected": -2.3507819175720215, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.4226642240251243, |
| "grad_norm": 7.613191604614258, |
| "learning_rate": 1.1551949751373987e-05, |
| "logits/chosen": -2.9766361713409424, |
| "logits/rejected": -3.0103912353515625, |
| "logps/chosen": -278.36700439453125, |
| "logps/rejected": -298.30010986328125, |
| "loss": 0.5107, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.2617706060409546, |
| "rewards/margins": 1.5523849725723267, |
| "rewards/rejected": -2.8141555786132812, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.4239727819942423, |
| "grad_norm": 6.346404552459717, |
| "learning_rate": 1.1525778591991625e-05, |
| "logits/chosen": -2.930680751800537, |
| "logits/rejected": -3.0358388423919678, |
| "logps/chosen": -280.64837646484375, |
| "logps/rejected": -283.0661315917969, |
| "loss": 0.3852, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.1420165300369263, |
| "rewards/margins": 1.7675631046295166, |
| "rewards/rejected": -2.9095797538757324, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.42528133996336037, |
| "grad_norm": 5.357637882232666, |
| "learning_rate": 1.1499607432609266e-05, |
| "logits/chosen": -2.919867515563965, |
| "logits/rejected": -2.999633312225342, |
| "logps/chosen": -260.5415344238281, |
| "logps/rejected": -286.1955261230469, |
| "loss": 0.5426, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.837245762348175, |
| "rewards/margins": 1.5137102603912354, |
| "rewards/rejected": -2.3509559631347656, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.4265898979324784, |
| "grad_norm": 7.890727519989014, |
| "learning_rate": 1.1473436273226906e-05, |
| "logits/chosen": -2.9257941246032715, |
| "logits/rejected": -2.988372802734375, |
| "logps/chosen": -318.0213623046875, |
| "logps/rejected": -305.0336608886719, |
| "loss": 0.493, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.134577989578247, |
| "rewards/margins": 1.7011257410049438, |
| "rewards/rejected": -2.8357038497924805, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.4278984559015964, |
| "grad_norm": 10.864569664001465, |
| "learning_rate": 1.1447265113844544e-05, |
| "logits/chosen": -2.940944194793701, |
| "logits/rejected": -2.9746077060699463, |
| "logps/chosen": -291.7694091796875, |
| "logps/rejected": -308.31719970703125, |
| "loss": 0.6127, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.0913641452789307, |
| "rewards/margins": 1.432383418083191, |
| "rewards/rejected": -2.523747682571411, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.42920701387071447, |
| "grad_norm": 9.867708206176758, |
| "learning_rate": 1.1421093954462184e-05, |
| "logits/chosen": -2.9872641563415527, |
| "logits/rejected": -3.0981907844543457, |
| "logps/chosen": -314.36566162109375, |
| "logps/rejected": -270.13153076171875, |
| "loss": 0.5011, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.106109380722046, |
| "rewards/margins": 1.637105941772461, |
| "rewards/rejected": -2.743215322494507, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.4305155718398325, |
| "grad_norm": 8.497987747192383, |
| "learning_rate": 1.1394922795079824e-05, |
| "logits/chosen": -3.008741617202759, |
| "logits/rejected": -3.0486505031585693, |
| "logps/chosen": -318.6070861816406, |
| "logps/rejected": -341.03662109375, |
| "loss": 0.6013, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.4147374629974365, |
| "rewards/margins": 1.432152271270752, |
| "rewards/rejected": -2.8468894958496094, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.4318241298089505, |
| "grad_norm": 7.426600456237793, |
| "learning_rate": 1.136875163569746e-05, |
| "logits/chosen": -2.854031562805176, |
| "logits/rejected": -2.9757132530212402, |
| "logps/chosen": -286.28009033203125, |
| "logps/rejected": -309.74456787109375, |
| "loss": 0.4337, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.4343945980072021, |
| "rewards/margins": 1.5377739667892456, |
| "rewards/rejected": -2.9721689224243164, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.43313268777806857, |
| "grad_norm": 10.064818382263184, |
| "learning_rate": 1.1342580476315101e-05, |
| "logits/chosen": -2.9875996112823486, |
| "logits/rejected": -3.022408962249756, |
| "logps/chosen": -319.95794677734375, |
| "logps/rejected": -289.5148620605469, |
| "loss": 0.5412, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.296550989151001, |
| "rewards/margins": 1.3865230083465576, |
| "rewards/rejected": -2.6830739974975586, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.4344412457471866, |
| "grad_norm": 8.107555389404297, |
| "learning_rate": 1.1316409316932741e-05, |
| "logits/chosen": -2.915742874145508, |
| "logits/rejected": -2.943075656890869, |
| "logps/chosen": -273.85931396484375, |
| "logps/rejected": -286.5356140136719, |
| "loss": 0.4829, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.364490270614624, |
| "rewards/margins": 1.4877455234527588, |
| "rewards/rejected": -2.852235794067383, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.4357498037163046, |
| "grad_norm": 6.44484281539917, |
| "learning_rate": 1.129023815755038e-05, |
| "logits/chosen": -2.88740611076355, |
| "logits/rejected": -3.0205512046813965, |
| "logps/chosen": -264.7518615722656, |
| "logps/rejected": -284.6231384277344, |
| "loss": 0.466, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.3103464841842651, |
| "rewards/margins": 1.4976861476898193, |
| "rewards/rejected": -2.808032751083374, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.43705836168542267, |
| "grad_norm": 4.719677925109863, |
| "learning_rate": 1.126406699816802e-05, |
| "logits/chosen": -3.025489330291748, |
| "logits/rejected": -3.0286028385162354, |
| "logps/chosen": -285.00445556640625, |
| "logps/rejected": -307.9769592285156, |
| "loss": 0.5365, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.5843397378921509, |
| "rewards/margins": 1.2534945011138916, |
| "rewards/rejected": -2.837834358215332, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.4383669196545407, |
| "grad_norm": 4.686459541320801, |
| "learning_rate": 1.123789583878566e-05, |
| "logits/chosen": -2.8781819343566895, |
| "logits/rejected": -2.9757750034332275, |
| "logps/chosen": -272.54534912109375, |
| "logps/rejected": -261.4615478515625, |
| "loss": 0.3518, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.2964471578598022, |
| "rewards/margins": 2.0138936042785645, |
| "rewards/rejected": -3.3103408813476562, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.4396754776236587, |
| "grad_norm": 9.84245777130127, |
| "learning_rate": 1.1211724679403298e-05, |
| "logits/chosen": -3.0288212299346924, |
| "logits/rejected": -3.009911060333252, |
| "logps/chosen": -314.16424560546875, |
| "logps/rejected": -322.6404724121094, |
| "loss": 0.6753, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.9346834421157837, |
| "rewards/margins": 1.1214497089385986, |
| "rewards/rejected": -3.05613374710083, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.44098403559277677, |
| "grad_norm": 10.62396240234375, |
| "learning_rate": 1.1185553520020938e-05, |
| "logits/chosen": -3.026965618133545, |
| "logits/rejected": -2.9794087409973145, |
| "logps/chosen": -311.1338195800781, |
| "logps/rejected": -302.8864440917969, |
| "loss": 0.5711, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.7278903722763062, |
| "rewards/margins": 1.2248623371124268, |
| "rewards/rejected": -2.9527530670166016, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.44229259356189476, |
| "grad_norm": 11.11765193939209, |
| "learning_rate": 1.1159382360638578e-05, |
| "logits/chosen": -2.982098340988159, |
| "logits/rejected": -3.0129332542419434, |
| "logps/chosen": -263.9950256347656, |
| "logps/rejected": -278.71240234375, |
| "loss": 0.4873, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.5713714361190796, |
| "rewards/margins": 1.3477413654327393, |
| "rewards/rejected": -2.9191126823425293, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.4436011515310128, |
| "grad_norm": 6.4845194816589355, |
| "learning_rate": 1.1133211201256216e-05, |
| "logits/chosen": -2.9548566341400146, |
| "logits/rejected": -3.0447587966918945, |
| "logps/chosen": -256.5769958496094, |
| "logps/rejected": -324.57464599609375, |
| "loss": 0.4231, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.5655686855316162, |
| "rewards/margins": 1.5172860622406006, |
| "rewards/rejected": -3.082854747772217, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.44490970950013087, |
| "grad_norm": 8.451342582702637, |
| "learning_rate": 1.1107040041873856e-05, |
| "logits/chosen": -2.9164369106292725, |
| "logits/rejected": -2.9938321113586426, |
| "logps/chosen": -264.7562561035156, |
| "logps/rejected": -290.6343688964844, |
| "loss": 0.6177, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -2.135646343231201, |
| "rewards/margins": 1.4988205432891846, |
| "rewards/rejected": -3.6344668865203857, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.44621826746924886, |
| "grad_norm": 7.753484725952148, |
| "learning_rate": 1.1080868882491496e-05, |
| "logits/chosen": -2.8294832706451416, |
| "logits/rejected": -2.9608490467071533, |
| "logps/chosen": -277.0535888671875, |
| "logps/rejected": -280.58575439453125, |
| "loss": 0.5377, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -2.4995322227478027, |
| "rewards/margins": 1.5786781311035156, |
| "rewards/rejected": -4.078210353851318, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.4475268254383669, |
| "grad_norm": 8.849166870117188, |
| "learning_rate": 1.1054697723109137e-05, |
| "logits/chosen": -3.094038248062134, |
| "logits/rejected": -3.051264524459839, |
| "logps/chosen": -291.5003356933594, |
| "logps/rejected": -312.36102294921875, |
| "loss": 0.5622, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.875213384628296, |
| "rewards/margins": 1.4177398681640625, |
| "rewards/rejected": -4.292952537536621, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.44883538340748497, |
| "grad_norm": 7.06804084777832, |
| "learning_rate": 1.1028526563726773e-05, |
| "logits/chosen": -3.0508930683135986, |
| "logits/rejected": -3.085616111755371, |
| "logps/chosen": -298.27947998046875, |
| "logps/rejected": -285.8116760253906, |
| "loss": 0.4942, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -2.573955535888672, |
| "rewards/margins": 1.687349557876587, |
| "rewards/rejected": -4.261305332183838, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.45014394137660296, |
| "grad_norm": 9.525605201721191, |
| "learning_rate": 1.1002355404344413e-05, |
| "logits/chosen": -3.011475086212158, |
| "logits/rejected": -2.99470591545105, |
| "logps/chosen": -324.5032958984375, |
| "logps/rejected": -346.570068359375, |
| "loss": 0.4977, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -2.779179096221924, |
| "rewards/margins": 1.9469417333602905, |
| "rewards/rejected": -4.726120948791504, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.451452499345721, |
| "grad_norm": 7.508541107177734, |
| "learning_rate": 1.0976184244962053e-05, |
| "logits/chosen": -3.010701894760132, |
| "logits/rejected": -3.016294479370117, |
| "logps/chosen": -337.5274963378906, |
| "logps/rejected": -315.5035705566406, |
| "loss": 0.5408, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -2.488309621810913, |
| "rewards/margins": 1.4098851680755615, |
| "rewards/rejected": -3.8981945514678955, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.45276105731483907, |
| "grad_norm": 8.462430953979492, |
| "learning_rate": 1.0950013085579692e-05, |
| "logits/chosen": -2.9911746978759766, |
| "logits/rejected": -3.018477201461792, |
| "logps/chosen": -363.7921447753906, |
| "logps/rejected": -355.11370849609375, |
| "loss": 0.378, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -2.2156341075897217, |
| "rewards/margins": 2.0937671661376953, |
| "rewards/rejected": -4.309401512145996, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.45406961528395706, |
| "grad_norm": 10.691130638122559, |
| "learning_rate": 1.0923841926197332e-05, |
| "logits/chosen": -2.9475789070129395, |
| "logits/rejected": -3.0337929725646973, |
| "logps/chosen": -320.70965576171875, |
| "logps/rejected": -280.87640380859375, |
| "loss": 0.4101, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -2.593120574951172, |
| "rewards/margins": 2.064436197280884, |
| "rewards/rejected": -4.657556533813477, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.4553781732530751, |
| "grad_norm": 11.62152099609375, |
| "learning_rate": 1.0897670766814972e-05, |
| "logits/chosen": -2.9737391471862793, |
| "logits/rejected": -2.951845645904541, |
| "logps/chosen": -318.7091369628906, |
| "logps/rejected": -347.60369873046875, |
| "loss": 0.4556, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -3.3738818168640137, |
| "rewards/margins": 1.9359108209609985, |
| "rewards/rejected": -5.309792995452881, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.45668673122219317, |
| "grad_norm": 8.028305053710938, |
| "learning_rate": 1.087149960743261e-05, |
| "logits/chosen": -2.9262282848358154, |
| "logits/rejected": -3.0200741291046143, |
| "logps/chosen": -309.8990783691406, |
| "logps/rejected": -284.8616027832031, |
| "loss": 0.6053, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -3.4491634368896484, |
| "rewards/margins": 1.6944007873535156, |
| "rewards/rejected": -5.143564224243164, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.45799528919131116, |
| "grad_norm": 8.402384757995605, |
| "learning_rate": 1.084532844805025e-05, |
| "logits/chosen": -2.9701414108276367, |
| "logits/rejected": -3.0136189460754395, |
| "logps/chosen": -313.6144104003906, |
| "logps/rejected": -314.5592346191406, |
| "loss": 0.5168, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -3.197741746902466, |
| "rewards/margins": 1.6113052368164062, |
| "rewards/rejected": -4.809047222137451, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.4593038471604292, |
| "grad_norm": 6.7802300453186035, |
| "learning_rate": 1.081915728866789e-05, |
| "logits/chosen": -3.0216054916381836, |
| "logits/rejected": -2.97468638420105, |
| "logps/chosen": -314.1781311035156, |
| "logps/rejected": -302.1847229003906, |
| "loss": 0.4397, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -2.9468579292297363, |
| "rewards/margins": 1.776281714439392, |
| "rewards/rejected": -4.723139762878418, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.46061240512954726, |
| "grad_norm": 8.857020378112793, |
| "learning_rate": 1.0792986129285527e-05, |
| "logits/chosen": -2.968179225921631, |
| "logits/rejected": -3.0504894256591797, |
| "logps/chosen": -324.0634460449219, |
| "logps/rejected": -351.1040344238281, |
| "loss": 0.4999, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.852264881134033, |
| "rewards/margins": 1.665531873703003, |
| "rewards/rejected": -4.517796993255615, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.46192096309866526, |
| "grad_norm": 10.597983360290527, |
| "learning_rate": 1.0766814969903167e-05, |
| "logits/chosen": -2.9628982543945312, |
| "logits/rejected": -2.9831249713897705, |
| "logps/chosen": -315.58612060546875, |
| "logps/rejected": -304.35260009765625, |
| "loss": 0.5103, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -3.043048143386841, |
| "rewards/margins": 1.534234881401062, |
| "rewards/rejected": -4.577282905578613, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.4632295210677833, |
| "grad_norm": 6.825730323791504, |
| "learning_rate": 1.0740643810520807e-05, |
| "logits/chosen": -2.9639222621917725, |
| "logits/rejected": -2.9877357482910156, |
| "logps/chosen": -349.4540100097656, |
| "logps/rejected": -332.49505615234375, |
| "loss": 0.5114, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -2.425964832305908, |
| "rewards/margins": 1.4884039163589478, |
| "rewards/rejected": -3.9143683910369873, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.4645380790369013, |
| "grad_norm": 7.913449764251709, |
| "learning_rate": 1.0714472651138445e-05, |
| "logits/chosen": -3.0387954711914062, |
| "logits/rejected": -2.9809329509735107, |
| "logps/chosen": -348.6420593261719, |
| "logps/rejected": -361.21490478515625, |
| "loss": 0.4134, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -2.1633763313293457, |
| "rewards/margins": 1.880319595336914, |
| "rewards/rejected": -4.043695449829102, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.46584663700601936, |
| "grad_norm": 6.947761535644531, |
| "learning_rate": 1.0688301491756086e-05, |
| "logits/chosen": -3.0049796104431152, |
| "logits/rejected": -3.0774781703948975, |
| "logps/chosen": -317.28387451171875, |
| "logps/rejected": -278.49609375, |
| "loss": 0.4753, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.159966468811035, |
| "rewards/margins": 1.6736412048339844, |
| "rewards/rejected": -3.8336079120635986, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.4671551949751374, |
| "grad_norm": 7.269952774047852, |
| "learning_rate": 1.0662130332373726e-05, |
| "logits/chosen": -3.0013515949249268, |
| "logits/rejected": -3.048266649246216, |
| "logps/chosen": -316.55950927734375, |
| "logps/rejected": -275.30780029296875, |
| "loss": 0.4839, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8857390880584717, |
| "rewards/margins": 1.6397733688354492, |
| "rewards/rejected": -3.5255126953125, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.4684637529442554, |
| "grad_norm": 7.783169269561768, |
| "learning_rate": 1.0635959172991364e-05, |
| "logits/chosen": -2.9019923210144043, |
| "logits/rejected": -2.9270970821380615, |
| "logps/chosen": -232.925048828125, |
| "logps/rejected": -278.81976318359375, |
| "loss": 0.5087, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.096328020095825, |
| "rewards/margins": 1.481418251991272, |
| "rewards/rejected": -3.5777459144592285, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.46977231091337346, |
| "grad_norm": 5.545890808105469, |
| "learning_rate": 1.0609788013609004e-05, |
| "logits/chosen": -2.8715972900390625, |
| "logits/rejected": -3.0421102046966553, |
| "logps/chosen": -276.5980224609375, |
| "logps/rejected": -261.9891662597656, |
| "loss": 0.5702, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.9896501302719116, |
| "rewards/margins": 1.5085725784301758, |
| "rewards/rejected": -3.498222827911377, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.4710808688824915, |
| "grad_norm": 7.721518039703369, |
| "learning_rate": 1.0583616854226644e-05, |
| "logits/chosen": -2.9710397720336914, |
| "logits/rejected": -3.0190649032592773, |
| "logps/chosen": -342.3359069824219, |
| "logps/rejected": -313.8819274902344, |
| "loss": 0.5255, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.6568866968154907, |
| "rewards/margins": 1.3916410207748413, |
| "rewards/rejected": -3.048527479171753, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.4723894268516095, |
| "grad_norm": 6.621272563934326, |
| "learning_rate": 1.055744569484428e-05, |
| "logits/chosen": -2.8590407371520996, |
| "logits/rejected": -2.9601387977600098, |
| "logps/chosen": -274.2496643066406, |
| "logps/rejected": -268.46722412109375, |
| "loss": 0.4795, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -2.092193126678467, |
| "rewards/margins": 1.7326141595840454, |
| "rewards/rejected": -3.824807643890381, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.47369798482072756, |
| "grad_norm": 6.7753801345825195, |
| "learning_rate": 1.0531274535461921e-05, |
| "logits/chosen": -2.928391695022583, |
| "logits/rejected": -3.0080180168151855, |
| "logps/chosen": -361.22479248046875, |
| "logps/rejected": -321.481201171875, |
| "loss": 0.546, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.5347732305526733, |
| "rewards/margins": 1.3956406116485596, |
| "rewards/rejected": -2.9304137229919434, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.4750065427898456, |
| "grad_norm": 8.390039443969727, |
| "learning_rate": 1.0505103376079561e-05, |
| "logits/chosen": -2.939857244491577, |
| "logits/rejected": -3.047630786895752, |
| "logps/chosen": -316.05804443359375, |
| "logps/rejected": -300.89349365234375, |
| "loss": 0.3695, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.7353222370147705, |
| "rewards/margins": 1.6302111148834229, |
| "rewards/rejected": -3.3655333518981934, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.4763151007589636, |
| "grad_norm": 5.884995937347412, |
| "learning_rate": 1.04789322166972e-05, |
| "logits/chosen": -3.018889904022217, |
| "logits/rejected": -3.0105044841766357, |
| "logps/chosen": -300.02117919921875, |
| "logps/rejected": -284.6014709472656, |
| "loss": 0.4955, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.9147802591323853, |
| "rewards/margins": 1.5021367073059082, |
| "rewards/rejected": -3.416916608810425, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.47762365872808166, |
| "grad_norm": 7.659745216369629, |
| "learning_rate": 1.045276105731484e-05, |
| "logits/chosen": -2.9221673011779785, |
| "logits/rejected": -3.043600082397461, |
| "logps/chosen": -303.6266784667969, |
| "logps/rejected": -328.57269287109375, |
| "loss": 0.6357, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -1.8578948974609375, |
| "rewards/margins": 1.369075059890747, |
| "rewards/rejected": -3.2269699573516846, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.4789322166971997, |
| "grad_norm": 8.604077339172363, |
| "learning_rate": 1.042658989793248e-05, |
| "logits/chosen": -2.9077818393707275, |
| "logits/rejected": -2.9542300701141357, |
| "logps/chosen": -319.1806335449219, |
| "logps/rejected": -304.31854248046875, |
| "loss": 0.5592, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -2.1609792709350586, |
| "rewards/margins": 1.1750867366790771, |
| "rewards/rejected": -3.3360657691955566, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.4802407746663177, |
| "grad_norm": 7.944966793060303, |
| "learning_rate": 1.0400418738550118e-05, |
| "logits/chosen": -2.9522438049316406, |
| "logits/rejected": -3.004556179046631, |
| "logps/chosen": -299.261474609375, |
| "logps/rejected": -300.3248596191406, |
| "loss": 0.4544, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.2106218338012695, |
| "rewards/margins": 1.8094288110733032, |
| "rewards/rejected": -3.020050287246704, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.48154933263543576, |
| "grad_norm": 7.704443454742432, |
| "learning_rate": 1.0374247579167758e-05, |
| "logits/chosen": -2.9210996627807617, |
| "logits/rejected": -2.998807430267334, |
| "logps/chosen": -325.70635986328125, |
| "logps/rejected": -287.83367919921875, |
| "loss": 0.5732, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.7213985919952393, |
| "rewards/margins": 1.4890426397323608, |
| "rewards/rejected": -3.2104415893554688, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.4828578906045538, |
| "grad_norm": 8.773541450500488, |
| "learning_rate": 1.0348076419785398e-05, |
| "logits/chosen": -2.9155502319335938, |
| "logits/rejected": -2.884643793106079, |
| "logps/chosen": -307.8321228027344, |
| "logps/rejected": -301.53973388671875, |
| "loss": 0.5069, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.5945721864700317, |
| "rewards/margins": 1.4130241870880127, |
| "rewards/rejected": -3.007596492767334, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.4841664485736718, |
| "grad_norm": 9.956704139709473, |
| "learning_rate": 1.0321905260403036e-05, |
| "logits/chosen": -2.9357645511627197, |
| "logits/rejected": -2.912456512451172, |
| "logps/chosen": -341.252197265625, |
| "logps/rejected": -370.76727294921875, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -1.7379004955291748, |
| "rewards/margins": 1.0990798473358154, |
| "rewards/rejected": -2.8369803428649902, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.48547500654278986, |
| "grad_norm": 5.837508201599121, |
| "learning_rate": 1.0295734101020676e-05, |
| "logits/chosen": -2.8405072689056396, |
| "logits/rejected": -2.8357603549957275, |
| "logps/chosen": -264.3638916015625, |
| "logps/rejected": -256.2645263671875, |
| "loss": 0.4158, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.5331943035125732, |
| "rewards/margins": 1.753406286239624, |
| "rewards/rejected": -3.2866005897521973, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.48678356451190785, |
| "grad_norm": 5.697160243988037, |
| "learning_rate": 1.0269562941638316e-05, |
| "logits/chosen": -2.8203680515289307, |
| "logits/rejected": -2.922636032104492, |
| "logps/chosen": -266.6560974121094, |
| "logps/rejected": -267.4014892578125, |
| "loss": 0.4632, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.468200445175171, |
| "rewards/margins": 1.5373655557632446, |
| "rewards/rejected": -3.005565881729126, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.4880921224810259, |
| "grad_norm": 6.479247570037842, |
| "learning_rate": 1.0243391782255957e-05, |
| "logits/chosen": -2.966212511062622, |
| "logits/rejected": -2.9255197048187256, |
| "logps/chosen": -267.66082763671875, |
| "logps/rejected": -291.5536193847656, |
| "loss": 0.4129, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.4823805093765259, |
| "rewards/margins": 1.5329614877700806, |
| "rewards/rejected": -3.0153422355651855, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.48940068045014395, |
| "grad_norm": 8.965081214904785, |
| "learning_rate": 1.0217220622873593e-05, |
| "logits/chosen": -3.036112070083618, |
| "logits/rejected": -3.028759241104126, |
| "logps/chosen": -281.88330078125, |
| "logps/rejected": -290.57879638671875, |
| "loss": 0.4945, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8533744812011719, |
| "rewards/margins": 1.3441312313079834, |
| "rewards/rejected": -3.1975057125091553, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.49070923841926195, |
| "grad_norm": 9.065515518188477, |
| "learning_rate": 1.0191049463491233e-05, |
| "logits/chosen": -2.9614920616149902, |
| "logits/rejected": -2.9071099758148193, |
| "logps/chosen": -296.06732177734375, |
| "logps/rejected": -283.09521484375, |
| "loss": 0.5388, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -2.123257875442505, |
| "rewards/margins": 1.5720679759979248, |
| "rewards/rejected": -3.6953258514404297, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.49201779638838, |
| "grad_norm": 7.550040245056152, |
| "learning_rate": 1.0164878304108873e-05, |
| "logits/chosen": -2.826888084411621, |
| "logits/rejected": -2.8919272422790527, |
| "logps/chosen": -321.03424072265625, |
| "logps/rejected": -347.545654296875, |
| "loss": 0.4234, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.8993819952011108, |
| "rewards/margins": 1.9633245468139648, |
| "rewards/rejected": -3.8627066612243652, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.49332635435749805, |
| "grad_norm": 9.235989570617676, |
| "learning_rate": 1.0138707144726512e-05, |
| "logits/chosen": -2.784430980682373, |
| "logits/rejected": -2.9601588249206543, |
| "logps/chosen": -355.8182067871094, |
| "logps/rejected": -314.7313232421875, |
| "loss": 0.4828, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.221585273742676, |
| "rewards/margins": 1.3879907131195068, |
| "rewards/rejected": -3.609576463699341, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.49463491232661605, |
| "grad_norm": 8.161865234375, |
| "learning_rate": 1.0112535985344152e-05, |
| "logits/chosen": -2.9995341300964355, |
| "logits/rejected": -3.0830676555633545, |
| "logps/chosen": -284.3017883300781, |
| "logps/rejected": -287.4847106933594, |
| "loss": 0.6595, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.8426592350006104, |
| "rewards/margins": 1.0771812200546265, |
| "rewards/rejected": -2.9198403358459473, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.4959434702957341, |
| "grad_norm": 9.801424026489258, |
| "learning_rate": 1.0086364825961792e-05, |
| "logits/chosen": -2.9873173236846924, |
| "logits/rejected": -2.9882421493530273, |
| "logps/chosen": -266.23260498046875, |
| "logps/rejected": -304.121337890625, |
| "loss": 0.506, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.6780809164047241, |
| "rewards/margins": 1.3383406400680542, |
| "rewards/rejected": -3.0164215564727783, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.49725202826485215, |
| "grad_norm": 8.835881233215332, |
| "learning_rate": 1.006019366657943e-05, |
| "logits/chosen": -2.8944599628448486, |
| "logits/rejected": -2.927996873855591, |
| "logps/chosen": -292.03179931640625, |
| "logps/rejected": -257.7762756347656, |
| "loss": 0.4871, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8646316528320312, |
| "rewards/margins": 1.4865410327911377, |
| "rewards/rejected": -3.351172685623169, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.49856058623397015, |
| "grad_norm": 6.250274181365967, |
| "learning_rate": 1.003402250719707e-05, |
| "logits/chosen": -3.011723518371582, |
| "logits/rejected": -2.9369235038757324, |
| "logps/chosen": -291.50860595703125, |
| "logps/rejected": -304.639404296875, |
| "loss": 0.6066, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.6494977474212646, |
| "rewards/margins": 1.0280935764312744, |
| "rewards/rejected": -2.6775918006896973, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.4998691442030882, |
| "grad_norm": 8.099715232849121, |
| "learning_rate": 1.000785134781471e-05, |
| "logits/chosen": -2.9518039226531982, |
| "logits/rejected": -2.928983688354492, |
| "logps/chosen": -266.6195983886719, |
| "logps/rejected": -278.40496826171875, |
| "loss": 0.5232, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.6815688610076904, |
| "rewards/margins": 1.2674450874328613, |
| "rewards/rejected": -2.9490139484405518, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.5011777021722063, |
| "grad_norm": 7.579314231872559, |
| "learning_rate": 9.981680188432349e-06, |
| "logits/chosen": -2.948557138442993, |
| "logits/rejected": -2.9623727798461914, |
| "logps/chosen": -265.8631896972656, |
| "logps/rejected": -255.78439331054688, |
| "loss": 0.5151, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.4069931507110596, |
| "rewards/margins": 1.1944947242736816, |
| "rewards/rejected": -2.601487636566162, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.5024862601413242, |
| "grad_norm": 6.461823463439941, |
| "learning_rate": 9.955509029049987e-06, |
| "logits/chosen": -2.9259510040283203, |
| "logits/rejected": -2.986363649368286, |
| "logps/chosen": -258.81256103515625, |
| "logps/rejected": -254.9714813232422, |
| "loss": 0.5218, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.7223705053329468, |
| "rewards/margins": 1.1844781637191772, |
| "rewards/rejected": -2.906848669052124, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.5037948181104422, |
| "grad_norm": 8.694483757019043, |
| "learning_rate": 9.929337869667627e-06, |
| "logits/chosen": -2.9782071113586426, |
| "logits/rejected": -3.065417766571045, |
| "logps/chosen": -320.1847229003906, |
| "logps/rejected": -307.56927490234375, |
| "loss": 0.5354, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.4156098365783691, |
| "rewards/margins": 1.2600579261779785, |
| "rewards/rejected": -2.6756675243377686, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.5051033760795604, |
| "grad_norm": 8.963370323181152, |
| "learning_rate": 9.903166710285267e-06, |
| "logits/chosen": -2.980201244354248, |
| "logits/rejected": -3.0007660388946533, |
| "logps/chosen": -314.35528564453125, |
| "logps/rejected": -313.6076354980469, |
| "loss": 0.4637, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.692043662071228, |
| "rewards/margins": 1.345879077911377, |
| "rewards/rejected": -3.0379226207733154, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.5064119340486783, |
| "grad_norm": 4.796964645385742, |
| "learning_rate": 9.876995550902906e-06, |
| "logits/chosen": -3.02583646774292, |
| "logits/rejected": -2.948720932006836, |
| "logps/chosen": -288.92779541015625, |
| "logps/rejected": -283.83978271484375, |
| "loss": 0.742, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -2.043752431869507, |
| "rewards/margins": 0.7483810186386108, |
| "rewards/rejected": -2.7921335697174072, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.5077204920177963, |
| "grad_norm": 8.783916473388672, |
| "learning_rate": 9.850824391520546e-06, |
| "logits/chosen": -2.9932680130004883, |
| "logits/rejected": -2.9787936210632324, |
| "logps/chosen": -312.96728515625, |
| "logps/rejected": -295.27716064453125, |
| "loss": 0.6287, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -1.8972053527832031, |
| "rewards/margins": 1.0939700603485107, |
| "rewards/rejected": -2.991175651550293, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.5090290499869145, |
| "grad_norm": 9.5989351272583, |
| "learning_rate": 9.824653232138186e-06, |
| "logits/chosen": -2.9638044834136963, |
| "logits/rejected": -3.0555965900421143, |
| "logps/chosen": -278.03289794921875, |
| "logps/rejected": -268.63946533203125, |
| "loss": 0.5472, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.7958492040634155, |
| "rewards/margins": 1.3007774353027344, |
| "rewards/rejected": -3.0966267585754395, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.5103376079560324, |
| "grad_norm": 8.681360244750977, |
| "learning_rate": 9.798482072755824e-06, |
| "logits/chosen": -2.989262342453003, |
| "logits/rejected": -2.974743127822876, |
| "logps/chosen": -254.3424072265625, |
| "logps/rejected": -264.46270751953125, |
| "loss": 0.4737, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.655994176864624, |
| "rewards/margins": 1.5236682891845703, |
| "rewards/rejected": -3.1796624660491943, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.5116461659251504, |
| "grad_norm": 5.015665531158447, |
| "learning_rate": 9.772310913373462e-06, |
| "logits/chosen": -2.7786598205566406, |
| "logits/rejected": -2.894463062286377, |
| "logps/chosen": -297.5106201171875, |
| "logps/rejected": -282.90631103515625, |
| "loss": 0.4604, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.898776650428772, |
| "rewards/margins": 1.5486475229263306, |
| "rewards/rejected": -3.4474239349365234, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.5129547238942685, |
| "grad_norm": 6.68208122253418, |
| "learning_rate": 9.746139753991103e-06, |
| "logits/chosen": -2.8744349479675293, |
| "logits/rejected": -2.9163289070129395, |
| "logps/chosen": -219.7449951171875, |
| "logps/rejected": -280.440673828125, |
| "loss": 0.4416, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8795044422149658, |
| "rewards/margins": 1.2291165590286255, |
| "rewards/rejected": -3.1086208820343018, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.5142632818633865, |
| "grad_norm": 6.96512508392334, |
| "learning_rate": 9.719968594608743e-06, |
| "logits/chosen": -3.036520004272461, |
| "logits/rejected": -3.0686442852020264, |
| "logps/chosen": -270.5203857421875, |
| "logps/rejected": -249.11648559570312, |
| "loss": 0.4188, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.7024023532867432, |
| "rewards/margins": 1.3636255264282227, |
| "rewards/rejected": -3.066027879714966, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.5155718398325045, |
| "grad_norm": 7.726841926574707, |
| "learning_rate": 9.693797435226381e-06, |
| "logits/chosen": -2.8714888095855713, |
| "logits/rejected": -2.9929287433624268, |
| "logps/chosen": -347.3719482421875, |
| "logps/rejected": -307.1537170410156, |
| "loss": 0.5226, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.9428255558013916, |
| "rewards/margins": 1.3433595895767212, |
| "rewards/rejected": -3.2861855030059814, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.5168803978016226, |
| "grad_norm": 9.329137802124023, |
| "learning_rate": 9.667626275844021e-06, |
| "logits/chosen": -2.8178086280822754, |
| "logits/rejected": -2.9708805084228516, |
| "logps/chosen": -261.303955078125, |
| "logps/rejected": -262.2368469238281, |
| "loss": 0.5773, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.8946533203125, |
| "rewards/margins": 1.4651789665222168, |
| "rewards/rejected": -3.3598320484161377, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.5181889557707406, |
| "grad_norm": 8.616991996765137, |
| "learning_rate": 9.64145511646166e-06, |
| "logits/chosen": -2.7979893684387207, |
| "logits/rejected": -2.8203110694885254, |
| "logps/chosen": -295.17608642578125, |
| "logps/rejected": -279.5721740722656, |
| "loss": 0.5596, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -2.0426454544067383, |
| "rewards/margins": 1.3784496784210205, |
| "rewards/rejected": -3.4210948944091797, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.5194975137398586, |
| "grad_norm": 12.504921913146973, |
| "learning_rate": 9.6152839570793e-06, |
| "logits/chosen": -2.8116393089294434, |
| "logits/rejected": -2.9229259490966797, |
| "logps/chosen": -360.49200439453125, |
| "logps/rejected": -346.6287841796875, |
| "loss": 0.6584, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.7081302404403687, |
| "rewards/margins": 1.1589677333831787, |
| "rewards/rejected": -2.867098093032837, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.5208060717089767, |
| "grad_norm": 9.76529312133789, |
| "learning_rate": 9.58911279769694e-06, |
| "logits/chosen": -2.9271957874298096, |
| "logits/rejected": -2.9286372661590576, |
| "logps/chosen": -304.6205749511719, |
| "logps/rejected": -270.85345458984375, |
| "loss": 0.5885, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.750661849975586, |
| "rewards/margins": 1.2978880405426025, |
| "rewards/rejected": -3.0485496520996094, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.5221146296780947, |
| "grad_norm": 6.337282180786133, |
| "learning_rate": 9.562941638314578e-06, |
| "logits/chosen": -2.8846378326416016, |
| "logits/rejected": -3.0143656730651855, |
| "logps/chosen": -344.0283203125, |
| "logps/rejected": -331.4059753417969, |
| "loss": 0.5132, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.4416577816009521, |
| "rewards/margins": 1.4556671380996704, |
| "rewards/rejected": -2.897324800491333, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.5234231876472127, |
| "grad_norm": 6.361517429351807, |
| "learning_rate": 9.536770478932218e-06, |
| "logits/chosen": -2.839524507522583, |
| "logits/rejected": -2.855935573577881, |
| "logps/chosen": -287.5265808105469, |
| "logps/rejected": -308.75701904296875, |
| "loss": 0.5585, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.3233356475830078, |
| "rewards/margins": 1.5039829015731812, |
| "rewards/rejected": -2.8273186683654785, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5234231876472127, |
| "eval_logits/chosen": -2.9509613513946533, |
| "eval_logits/rejected": -2.9671716690063477, |
| "eval_logps/chosen": -295.8841552734375, |
| "eval_logps/rejected": -290.96044921875, |
| "eval_loss": 0.5118626952171326, |
| "eval_rewards/accuracies": 0.7434999942779541, |
| "eval_rewards/chosen": -1.2650753259658813, |
| "eval_rewards/margins": 1.4375065565109253, |
| "eval_rewards/rejected": -2.7025818824768066, |
| "eval_runtime": 764.2545, |
| "eval_samples_per_second": 2.617, |
| "eval_steps_per_second": 0.327, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5247317456163308, |
| "grad_norm": 8.742295265197754, |
| "learning_rate": 9.510599319549856e-06, |
| "logits/chosen": -2.8847405910491943, |
| "logits/rejected": -2.829425811767578, |
| "logps/chosen": -284.7441711425781, |
| "logps/rejected": -327.94415283203125, |
| "loss": 0.5754, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.0865317583084106, |
| "rewards/margins": 1.353413701057434, |
| "rewards/rejected": -2.439945697784424, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.5260403035854488, |
| "grad_norm": 9.733824729919434, |
| "learning_rate": 9.484428160167496e-06, |
| "logits/chosen": -2.9705443382263184, |
| "logits/rejected": -2.9440743923187256, |
| "logps/chosen": -305.75921630859375, |
| "logps/rejected": -309.7073669433594, |
| "loss": 0.5781, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.1363924741744995, |
| "rewards/margins": 1.0744402408599854, |
| "rewards/rejected": -2.2108325958251953, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.5273488615545668, |
| "grad_norm": 5.931386470794678, |
| "learning_rate": 9.458257000785136e-06, |
| "logits/chosen": -2.9103846549987793, |
| "logits/rejected": -2.9428811073303223, |
| "logps/chosen": -325.4049987792969, |
| "logps/rejected": -326.3647155761719, |
| "loss": 0.4821, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.080718994140625, |
| "rewards/margins": 1.3079969882965088, |
| "rewards/rejected": -2.388716220855713, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.528657419523685, |
| "grad_norm": 8.640279769897461, |
| "learning_rate": 9.432085841402775e-06, |
| "logits/chosen": -2.983135938644409, |
| "logits/rejected": -3.0264086723327637, |
| "logps/chosen": -285.4974060058594, |
| "logps/rejected": -277.7830505371094, |
| "loss": 0.5414, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.9102684259414673, |
| "rewards/margins": 1.2370755672454834, |
| "rewards/rejected": -2.147343873977661, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.5299659774928029, |
| "grad_norm": 6.43532133102417, |
| "learning_rate": 9.405914682020413e-06, |
| "logits/chosen": -2.9441254138946533, |
| "logits/rejected": -2.9812066555023193, |
| "logps/chosen": -336.6988525390625, |
| "logps/rejected": -311.3836364746094, |
| "loss": 0.627, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.9982717633247375, |
| "rewards/margins": 1.2580807209014893, |
| "rewards/rejected": -2.256352663040161, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.5312745354619209, |
| "grad_norm": 7.047455787658691, |
| "learning_rate": 9.379743522638053e-06, |
| "logits/chosen": -2.9485602378845215, |
| "logits/rejected": -2.8787522315979004, |
| "logps/chosen": -304.49786376953125, |
| "logps/rejected": -285.79425048828125, |
| "loss": 0.6052, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.8985692858695984, |
| "rewards/margins": 1.0381746292114258, |
| "rewards/rejected": -1.936743974685669, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.532583093431039, |
| "grad_norm": 8.520474433898926, |
| "learning_rate": 9.353572363255693e-06, |
| "logits/chosen": -2.8559398651123047, |
| "logits/rejected": -2.9105234146118164, |
| "logps/chosen": -301.0296936035156, |
| "logps/rejected": -269.9303894042969, |
| "loss": 0.5853, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.1256568431854248, |
| "rewards/margins": 1.10567307472229, |
| "rewards/rejected": -2.231329917907715, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.533891651400157, |
| "grad_norm": 7.910058498382568, |
| "learning_rate": 9.327401203873332e-06, |
| "logits/chosen": -2.9811348915100098, |
| "logits/rejected": -3.01061749458313, |
| "logps/chosen": -310.69586181640625, |
| "logps/rejected": -335.1217956542969, |
| "loss": 0.6182, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.7057936191558838, |
| "rewards/margins": 0.8416263461112976, |
| "rewards/rejected": -1.547419786453247, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.535200209369275, |
| "grad_norm": 6.437031269073486, |
| "learning_rate": 9.301230044490972e-06, |
| "logits/chosen": -2.955460786819458, |
| "logits/rejected": -3.0287487506866455, |
| "logps/chosen": -302.23388671875, |
| "logps/rejected": -284.850830078125, |
| "loss": 0.4264, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.5403744578361511, |
| "rewards/margins": 1.4383584260940552, |
| "rewards/rejected": -1.9787328243255615, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.5365087673383931, |
| "grad_norm": 6.636228084564209, |
| "learning_rate": 9.27505888510861e-06, |
| "logits/chosen": -2.859696388244629, |
| "logits/rejected": -2.9273557662963867, |
| "logps/chosen": -282.9736022949219, |
| "logps/rejected": -305.96087646484375, |
| "loss": 0.4716, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.6416420340538025, |
| "rewards/margins": 1.3998219966888428, |
| "rewards/rejected": -2.041464328765869, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.5378173253075111, |
| "grad_norm": 7.346982955932617, |
| "learning_rate": 9.24888772572625e-06, |
| "logits/chosen": -2.8341944217681885, |
| "logits/rejected": -2.946746349334717, |
| "logps/chosen": -308.4425354003906, |
| "logps/rejected": -269.16448974609375, |
| "loss": 0.5097, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.9091157913208008, |
| "rewards/margins": 1.4609925746917725, |
| "rewards/rejected": -2.3701086044311523, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.5391258832766291, |
| "grad_norm": 5.3710432052612305, |
| "learning_rate": 9.22271656634389e-06, |
| "logits/chosen": -2.867131471633911, |
| "logits/rejected": -2.7901980876922607, |
| "logps/chosen": -294.47900390625, |
| "logps/rejected": -304.6952819824219, |
| "loss": 0.4905, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1196249723434448, |
| "rewards/margins": 1.551390290260315, |
| "rewards/rejected": -2.6710152626037598, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.5404344412457471, |
| "grad_norm": 8.228592872619629, |
| "learning_rate": 9.196545406961529e-06, |
| "logits/chosen": -2.9531893730163574, |
| "logits/rejected": -2.912006378173828, |
| "logps/chosen": -335.7592468261719, |
| "logps/rejected": -351.94818115234375, |
| "loss": 0.3817, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.1705877780914307, |
| "rewards/margins": 2.073190689086914, |
| "rewards/rejected": -3.2437782287597656, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.5417429992148652, |
| "grad_norm": 6.4846954345703125, |
| "learning_rate": 9.170374247579169e-06, |
| "logits/chosen": -3.026458263397217, |
| "logits/rejected": -3.085371971130371, |
| "logps/chosen": -276.64141845703125, |
| "logps/rejected": -233.27804565429688, |
| "loss": 0.4266, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.0601603984832764, |
| "rewards/margins": 1.4462592601776123, |
| "rewards/rejected": -2.5064198970794678, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.5430515571839832, |
| "grad_norm": 9.074657440185547, |
| "learning_rate": 9.144203088196809e-06, |
| "logits/chosen": -2.9421777725219727, |
| "logits/rejected": -2.927706480026245, |
| "logps/chosen": -303.47467041015625, |
| "logps/rejected": -296.0120544433594, |
| "loss": 0.363, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.3702189922332764, |
| "rewards/margins": 2.219372272491455, |
| "rewards/rejected": -3.5895907878875732, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.5443601151531012, |
| "grad_norm": 11.485058784484863, |
| "learning_rate": 9.118031928814447e-06, |
| "logits/chosen": -3.0031659603118896, |
| "logits/rejected": -3.0807993412017822, |
| "logps/chosen": -277.64984130859375, |
| "logps/rejected": -287.6283264160156, |
| "loss": 0.5127, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.5950052738189697, |
| "rewards/margins": 1.7552427053451538, |
| "rewards/rejected": -3.350247859954834, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.5456686731222193, |
| "grad_norm": 7.12365198135376, |
| "learning_rate": 9.091860769432087e-06, |
| "logits/chosen": -2.9222354888916016, |
| "logits/rejected": -3.0730066299438477, |
| "logps/chosen": -268.171142578125, |
| "logps/rejected": -258.3194885253906, |
| "loss": 0.4631, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.5083897113800049, |
| "rewards/margins": 1.9156631231307983, |
| "rewards/rejected": -3.4240524768829346, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.5469772310913373, |
| "grad_norm": 7.024919033050537, |
| "learning_rate": 9.065689610049726e-06, |
| "logits/chosen": -2.9282279014587402, |
| "logits/rejected": -2.9903883934020996, |
| "logps/chosen": -245.131103515625, |
| "logps/rejected": -287.2030334472656, |
| "loss": 0.5131, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8197906017303467, |
| "rewards/margins": 1.6753877401351929, |
| "rewards/rejected": -3.49517822265625, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.5482857890604553, |
| "grad_norm": 5.535000324249268, |
| "learning_rate": 9.039518450667366e-06, |
| "logits/chosen": -2.9842965602874756, |
| "logits/rejected": -2.9172840118408203, |
| "logps/chosen": -354.969970703125, |
| "logps/rejected": -327.3636779785156, |
| "loss": 0.4529, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.4617191553115845, |
| "rewards/margins": 1.7047052383422852, |
| "rewards/rejected": -3.166424512863159, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.5495943470295734, |
| "grad_norm": 7.004070281982422, |
| "learning_rate": 9.013347291285006e-06, |
| "logits/chosen": -2.9624948501586914, |
| "logits/rejected": -2.85925030708313, |
| "logps/chosen": -310.21282958984375, |
| "logps/rejected": -320.2611389160156, |
| "loss": 0.5602, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -2.095109462738037, |
| "rewards/margins": 1.4026702642440796, |
| "rewards/rejected": -3.4977798461914062, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.5509029049986914, |
| "grad_norm": 13.739896774291992, |
| "learning_rate": 8.987176131902644e-06, |
| "logits/chosen": -2.917407751083374, |
| "logits/rejected": -2.9701895713806152, |
| "logps/chosen": -318.69793701171875, |
| "logps/rejected": -318.5054626464844, |
| "loss": 0.5382, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.4192984104156494, |
| "rewards/margins": 1.9310028553009033, |
| "rewards/rejected": -3.3503010272979736, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.5522114629678094, |
| "grad_norm": 8.459038734436035, |
| "learning_rate": 8.961004972520282e-06, |
| "logits/chosen": -2.9396519660949707, |
| "logits/rejected": -3.022418975830078, |
| "logps/chosen": -284.6721496582031, |
| "logps/rejected": -299.3384704589844, |
| "loss": 0.5301, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.4773406982421875, |
| "rewards/margins": 1.503132939338684, |
| "rewards/rejected": -2.980473756790161, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.5535200209369275, |
| "grad_norm": 6.923474311828613, |
| "learning_rate": 8.934833813137923e-06, |
| "logits/chosen": -2.9355552196502686, |
| "logits/rejected": -2.9257876873016357, |
| "logps/chosen": -308.79180908203125, |
| "logps/rejected": -338.4895324707031, |
| "loss": 0.5048, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.2020846605300903, |
| "rewards/margins": 1.280178427696228, |
| "rewards/rejected": -2.4822630882263184, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.5548285789060455, |
| "grad_norm": 10.126736640930176, |
| "learning_rate": 8.908662653755563e-06, |
| "logits/chosen": -2.876392126083374, |
| "logits/rejected": -3.0241641998291016, |
| "logps/chosen": -279.66033935546875, |
| "logps/rejected": -269.89288330078125, |
| "loss": 0.5188, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.1738862991333008, |
| "rewards/margins": 1.4325240850448608, |
| "rewards/rejected": -2.606410264968872, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.5561371368751635, |
| "grad_norm": 5.744837760925293, |
| "learning_rate": 8.882491494373201e-06, |
| "logits/chosen": -2.7355411052703857, |
| "logits/rejected": -2.8887104988098145, |
| "logps/chosen": -256.62213134765625, |
| "logps/rejected": -286.48992919921875, |
| "loss": 0.4672, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.1047096252441406, |
| "rewards/margins": 1.5396578311920166, |
| "rewards/rejected": -2.6443674564361572, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.5574456948442816, |
| "grad_norm": 6.99560022354126, |
| "learning_rate": 8.856320334990841e-06, |
| "logits/chosen": -3.060870409011841, |
| "logits/rejected": -3.0697546005249023, |
| "logps/chosen": -310.7414245605469, |
| "logps/rejected": -238.05508422851562, |
| "loss": 0.454, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.8422433137893677, |
| "rewards/margins": 1.337949514389038, |
| "rewards/rejected": -2.180192708969116, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.5587542528133996, |
| "grad_norm": 7.061825275421143, |
| "learning_rate": 8.83014917560848e-06, |
| "logits/chosen": -2.8841323852539062, |
| "logits/rejected": -2.943366527557373, |
| "logps/chosen": -227.40780639648438, |
| "logps/rejected": -212.0872039794922, |
| "loss": 0.5566, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.1873077154159546, |
| "rewards/margins": 1.2954754829406738, |
| "rewards/rejected": -2.482783317565918, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.5600628107825176, |
| "grad_norm": 3.9094793796539307, |
| "learning_rate": 8.80397801622612e-06, |
| "logits/chosen": -2.954484224319458, |
| "logits/rejected": -3.0178635120391846, |
| "logps/chosen": -259.7063903808594, |
| "logps/rejected": -256.8832702636719, |
| "loss": 0.3804, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.817324161529541, |
| "rewards/margins": 1.6765632629394531, |
| "rewards/rejected": -2.493887424468994, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.5613713687516357, |
| "grad_norm": 7.536386966705322, |
| "learning_rate": 8.77780685684376e-06, |
| "logits/chosen": -2.761610507965088, |
| "logits/rejected": -2.877763271331787, |
| "logps/chosen": -257.71246337890625, |
| "logps/rejected": -227.89743041992188, |
| "loss": 0.4166, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.9659379124641418, |
| "rewards/margins": 1.762495994567871, |
| "rewards/rejected": -2.728433847427368, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.5626799267207537, |
| "grad_norm": 7.488414764404297, |
| "learning_rate": 8.751635697461398e-06, |
| "logits/chosen": -2.770033836364746, |
| "logits/rejected": -3.0158371925354004, |
| "logps/chosen": -282.5020446777344, |
| "logps/rejected": -273.234619140625, |
| "loss": 0.5068, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9604713320732117, |
| "rewards/margins": 1.5011874437332153, |
| "rewards/rejected": -2.4616587162017822, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.5639884846898717, |
| "grad_norm": 7.021855354309082, |
| "learning_rate": 8.725464538079038e-06, |
| "logits/chosen": -2.9612388610839844, |
| "logits/rejected": -2.963198184967041, |
| "logps/chosen": -299.5247802734375, |
| "logps/rejected": -272.65948486328125, |
| "loss": 0.5239, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.0747485160827637, |
| "rewards/margins": 1.7154130935668945, |
| "rewards/rejected": -2.790161609649658, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.5652970426589898, |
| "grad_norm": 5.9548797607421875, |
| "learning_rate": 8.699293378696676e-06, |
| "logits/chosen": -3.007736921310425, |
| "logits/rejected": -3.0112733840942383, |
| "logps/chosen": -292.77142333984375, |
| "logps/rejected": -297.82061767578125, |
| "loss": 0.4134, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.9671595692634583, |
| "rewards/margins": 2.1301705837249756, |
| "rewards/rejected": -3.0973305702209473, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.5666056006281078, |
| "grad_norm": 7.590676307678223, |
| "learning_rate": 8.673122219314316e-06, |
| "logits/chosen": -2.9755873680114746, |
| "logits/rejected": -3.037572145462036, |
| "logps/chosen": -314.57171630859375, |
| "logps/rejected": -303.40411376953125, |
| "loss": 0.6617, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.9973914623260498, |
| "rewards/margins": 1.1834059953689575, |
| "rewards/rejected": -3.180797815322876, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.5679141585972258, |
| "grad_norm": 8.90278148651123, |
| "learning_rate": 8.646951059931956e-06, |
| "logits/chosen": -3.0032241344451904, |
| "logits/rejected": -3.049996852874756, |
| "logps/chosen": -278.5682067871094, |
| "logps/rejected": -271.05926513671875, |
| "loss": 0.5459, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.4959290027618408, |
| "rewards/margins": 1.4511077404022217, |
| "rewards/rejected": -2.9470365047454834, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.5692227165663439, |
| "grad_norm": 7.02294397354126, |
| "learning_rate": 8.620779900549595e-06, |
| "logits/chosen": -2.896298885345459, |
| "logits/rejected": -2.9629626274108887, |
| "logps/chosen": -305.6729431152344, |
| "logps/rejected": -343.16595458984375, |
| "loss": 0.5035, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.7510077953338623, |
| "rewards/margins": 1.6487032175064087, |
| "rewards/rejected": -3.3997111320495605, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.5705312745354619, |
| "grad_norm": 7.937122821807861, |
| "learning_rate": 8.594608741167235e-06, |
| "logits/chosen": -2.9339687824249268, |
| "logits/rejected": -3.004333972930908, |
| "logps/chosen": -328.10791015625, |
| "logps/rejected": -312.27032470703125, |
| "loss": 0.3867, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.7036577463150024, |
| "rewards/margins": 1.9725004434585571, |
| "rewards/rejected": -3.6761581897735596, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.5718398325045799, |
| "grad_norm": 7.029597759246826, |
| "learning_rate": 8.568437581784875e-06, |
| "logits/chosen": -2.831653118133545, |
| "logits/rejected": -2.8481671810150146, |
| "logps/chosen": -266.60882568359375, |
| "logps/rejected": -274.7930603027344, |
| "loss": 0.5076, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.6596145629882812, |
| "rewards/margins": 1.5107403993606567, |
| "rewards/rejected": -3.1703550815582275, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.573148390473698, |
| "grad_norm": 6.836891174316406, |
| "learning_rate": 8.542266422402513e-06, |
| "logits/chosen": -2.917339563369751, |
| "logits/rejected": -3.0334200859069824, |
| "logps/chosen": -238.88229370117188, |
| "logps/rejected": -249.5050048828125, |
| "loss": 0.5092, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.2485872507095337, |
| "rewards/margins": 1.4144742488861084, |
| "rewards/rejected": -2.6630613803863525, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.574456948442816, |
| "grad_norm": 7.6597981452941895, |
| "learning_rate": 8.516095263020152e-06, |
| "logits/chosen": -2.942396402359009, |
| "logits/rejected": -3.004131555557251, |
| "logps/chosen": -263.2633361816406, |
| "logps/rejected": -259.5253601074219, |
| "loss": 0.5657, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.4041615724563599, |
| "rewards/margins": 1.3960957527160645, |
| "rewards/rejected": -2.8002572059631348, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.575765506411934, |
| "grad_norm": 10.346620559692383, |
| "learning_rate": 8.489924103637792e-06, |
| "logits/chosen": -2.958832025527954, |
| "logits/rejected": -2.9972152709960938, |
| "logps/chosen": -311.97686767578125, |
| "logps/rejected": -293.7490234375, |
| "loss": 0.4452, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.709890365600586, |
| "rewards/margins": 1.7548198699951172, |
| "rewards/rejected": -3.4647107124328613, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.5770740643810521, |
| "grad_norm": 6.80871057510376, |
| "learning_rate": 8.463752944255432e-06, |
| "logits/chosen": -2.995701313018799, |
| "logits/rejected": -2.9955978393554688, |
| "logps/chosen": -238.9085235595703, |
| "logps/rejected": -260.15234375, |
| "loss": 0.5445, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.4946893453598022, |
| "rewards/margins": 1.4255342483520508, |
| "rewards/rejected": -2.9202239513397217, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.5783826223501701, |
| "grad_norm": 6.468989849090576, |
| "learning_rate": 8.43758178487307e-06, |
| "logits/chosen": -3.0018184185028076, |
| "logits/rejected": -3.012239694595337, |
| "logps/chosen": -313.91973876953125, |
| "logps/rejected": -300.5749206542969, |
| "loss": 0.4718, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.8711507320404053, |
| "rewards/margins": 1.6339337825775146, |
| "rewards/rejected": -3.505084276199341, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.5796911803192881, |
| "grad_norm": 7.442930221557617, |
| "learning_rate": 8.41141062549071e-06, |
| "logits/chosen": -2.836812734603882, |
| "logits/rejected": -2.8636796474456787, |
| "logps/chosen": -264.2229309082031, |
| "logps/rejected": -278.0809631347656, |
| "loss": 0.5215, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.6197808980941772, |
| "rewards/margins": 1.3861788511276245, |
| "rewards/rejected": -3.005959987640381, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.5809997382884062, |
| "grad_norm": 9.090330123901367, |
| "learning_rate": 8.385239466108349e-06, |
| "logits/chosen": -2.896070957183838, |
| "logits/rejected": -2.8910112380981445, |
| "logps/chosen": -294.02801513671875, |
| "logps/rejected": -274.5065002441406, |
| "loss": 0.5265, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.9419463872909546, |
| "rewards/margins": 1.3138511180877686, |
| "rewards/rejected": -3.2557971477508545, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.5823082962575242, |
| "grad_norm": 8.379128456115723, |
| "learning_rate": 8.359068306725989e-06, |
| "logits/chosen": -2.844043016433716, |
| "logits/rejected": -2.9461586475372314, |
| "logps/chosen": -375.4545593261719, |
| "logps/rejected": -330.26068115234375, |
| "loss": 0.5607, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.5715759992599487, |
| "rewards/margins": 1.545506477355957, |
| "rewards/rejected": -3.1170825958251953, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.5836168542266422, |
| "grad_norm": 5.378319263458252, |
| "learning_rate": 8.332897147343629e-06, |
| "logits/chosen": -2.9515392780303955, |
| "logits/rejected": -3.003692150115967, |
| "logps/chosen": -303.4684753417969, |
| "logps/rejected": -290.7479248046875, |
| "loss": 0.456, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.7767654657363892, |
| "rewards/margins": 1.4634473323822021, |
| "rewards/rejected": -3.2402126789093018, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.5849254121957602, |
| "grad_norm": 10.692007064819336, |
| "learning_rate": 8.306725987961267e-06, |
| "logits/chosen": -2.96927809715271, |
| "logits/rejected": -2.9832751750946045, |
| "logps/chosen": -301.08135986328125, |
| "logps/rejected": -289.53436279296875, |
| "loss": 0.5926, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.165919065475464, |
| "rewards/margins": 1.5472595691680908, |
| "rewards/rejected": -3.7131786346435547, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.5862339701648783, |
| "grad_norm": 6.6370744705200195, |
| "learning_rate": 8.280554828578907e-06, |
| "logits/chosen": -2.9266231060028076, |
| "logits/rejected": -2.936668872833252, |
| "logps/chosen": -312.54803466796875, |
| "logps/rejected": -291.4410400390625, |
| "loss": 0.5544, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.9401369094848633, |
| "rewards/margins": 1.5303126573562622, |
| "rewards/rejected": -3.470449924468994, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.5875425281339963, |
| "grad_norm": 4.578351020812988, |
| "learning_rate": 8.254383669196546e-06, |
| "logits/chosen": -2.8608431816101074, |
| "logits/rejected": -2.8830883502960205, |
| "logps/chosen": -331.26593017578125, |
| "logps/rejected": -305.5675048828125, |
| "loss": 0.3923, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.8561537265777588, |
| "rewards/margins": 1.607722520828247, |
| "rewards/rejected": -3.463876724243164, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.5888510861031143, |
| "grad_norm": 6.522406578063965, |
| "learning_rate": 8.228212509814186e-06, |
| "logits/chosen": -2.880575656890869, |
| "logits/rejected": -2.9558682441711426, |
| "logps/chosen": -341.096923828125, |
| "logps/rejected": -299.71624755859375, |
| "loss": 0.567, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.9508142471313477, |
| "rewards/margins": 1.496654748916626, |
| "rewards/rejected": -3.4474685192108154, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.5901596440722324, |
| "grad_norm": 6.557366371154785, |
| "learning_rate": 8.202041350431826e-06, |
| "logits/chosen": -2.893763542175293, |
| "logits/rejected": -3.021374225616455, |
| "logps/chosen": -321.5784606933594, |
| "logps/rejected": -259.36566162109375, |
| "loss": 0.4864, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.7807306051254272, |
| "rewards/margins": 1.5349364280700684, |
| "rewards/rejected": -3.315666913986206, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.5914682020413504, |
| "grad_norm": 10.000592231750488, |
| "learning_rate": 8.175870191049464e-06, |
| "logits/chosen": -2.7134666442871094, |
| "logits/rejected": -2.776644229888916, |
| "logps/chosen": -329.2943420410156, |
| "logps/rejected": -301.40338134765625, |
| "loss": 0.525, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.6413131952285767, |
| "rewards/margins": 1.7001183032989502, |
| "rewards/rejected": -3.3414313793182373, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.5927767600104684, |
| "grad_norm": 7.016106128692627, |
| "learning_rate": 8.149699031667102e-06, |
| "logits/chosen": -2.9477972984313965, |
| "logits/rejected": -2.94490909576416, |
| "logps/chosen": -298.5036926269531, |
| "logps/rejected": -280.6138610839844, |
| "loss": 0.4627, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.7625675201416016, |
| "rewards/margins": 1.7733052968978882, |
| "rewards/rejected": -3.5358726978302, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.5940853179795865, |
| "grad_norm": 7.888564586639404, |
| "learning_rate": 8.123527872284743e-06, |
| "logits/chosen": -2.8331191539764404, |
| "logits/rejected": -2.9611730575561523, |
| "logps/chosen": -284.2607727050781, |
| "logps/rejected": -262.9834289550781, |
| "loss": 0.4227, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.5213582515716553, |
| "rewards/margins": 1.6578248739242554, |
| "rewards/rejected": -3.179183006286621, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.5953938759487045, |
| "grad_norm": 6.094775199890137, |
| "learning_rate": 8.097356712902383e-06, |
| "logits/chosen": -2.918649196624756, |
| "logits/rejected": -2.9431166648864746, |
| "logps/chosen": -273.8592834472656, |
| "logps/rejected": -277.9981689453125, |
| "loss": 0.577, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.7764129638671875, |
| "rewards/margins": 1.1202478408813477, |
| "rewards/rejected": -2.896660566329956, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.5967024339178225, |
| "grad_norm": 7.925708293914795, |
| "learning_rate": 8.071185553520021e-06, |
| "logits/chosen": -2.9345788955688477, |
| "logits/rejected": -3.008537769317627, |
| "logps/chosen": -307.4665832519531, |
| "logps/rejected": -274.0155029296875, |
| "loss": 0.5001, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.6642906665802002, |
| "rewards/margins": 1.5600162744522095, |
| "rewards/rejected": -3.2243072986602783, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.5980109918869406, |
| "grad_norm": 9.510141372680664, |
| "learning_rate": 8.045014394137661e-06, |
| "logits/chosen": -2.7968087196350098, |
| "logits/rejected": -2.7900938987731934, |
| "logps/chosen": -273.7165832519531, |
| "logps/rejected": -263.81915283203125, |
| "loss": 0.4475, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.9605255126953125, |
| "rewards/margins": 1.7081210613250732, |
| "rewards/rejected": -3.6686465740203857, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.5993195498560586, |
| "grad_norm": 7.476105213165283, |
| "learning_rate": 8.0188432347553e-06, |
| "logits/chosen": -2.966008424758911, |
| "logits/rejected": -3.010863780975342, |
| "logps/chosen": -301.1141052246094, |
| "logps/rejected": -314.04248046875, |
| "loss": 0.4834, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.9317734241485596, |
| "rewards/margins": 1.6113536357879639, |
| "rewards/rejected": -3.5431270599365234, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.6006281078251766, |
| "grad_norm": 5.335666656494141, |
| "learning_rate": 7.99267207537294e-06, |
| "logits/chosen": -2.929396152496338, |
| "logits/rejected": -2.97169828414917, |
| "logps/chosen": -287.72955322265625, |
| "logps/rejected": -318.4091796875, |
| "loss": 0.5622, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -2.0816383361816406, |
| "rewards/margins": 1.6077334880828857, |
| "rewards/rejected": -3.6893723011016846, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.6019366657942947, |
| "grad_norm": 8.660011291503906, |
| "learning_rate": 7.96650091599058e-06, |
| "logits/chosen": -2.955652952194214, |
| "logits/rejected": -2.9135398864746094, |
| "logps/chosen": -282.47552490234375, |
| "logps/rejected": -285.1706237792969, |
| "loss": 0.5139, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -2.121570587158203, |
| "rewards/margins": 1.3121795654296875, |
| "rewards/rejected": -3.433750629425049, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6032452237634127, |
| "grad_norm": 7.009401321411133, |
| "learning_rate": 7.940329756608218e-06, |
| "logits/chosen": -2.8541007041931152, |
| "logits/rejected": -2.9473915100097656, |
| "logps/chosen": -335.2101745605469, |
| "logps/rejected": -323.11773681640625, |
| "loss": 0.4064, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.4906498193740845, |
| "rewards/margins": 1.9026981592178345, |
| "rewards/rejected": -3.393347978591919, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.6045537817325307, |
| "grad_norm": 5.929949760437012, |
| "learning_rate": 7.914158597225858e-06, |
| "logits/chosen": -2.790144205093384, |
| "logits/rejected": -2.7649049758911133, |
| "logps/chosen": -233.0009002685547, |
| "logps/rejected": -255.8800506591797, |
| "loss": 0.502, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.5350371599197388, |
| "rewards/margins": 1.4691892862319946, |
| "rewards/rejected": -3.0042262077331543, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.6058623397016488, |
| "grad_norm": 5.333659648895264, |
| "learning_rate": 7.887987437843498e-06, |
| "logits/chosen": -2.83431339263916, |
| "logits/rejected": -2.869320869445801, |
| "logps/chosen": -291.4210510253906, |
| "logps/rejected": -289.72772216796875, |
| "loss": 0.4205, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.24900221824646, |
| "rewards/margins": 1.8472312688827515, |
| "rewards/rejected": -3.096233606338501, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.6071708976707668, |
| "grad_norm": 7.005737781524658, |
| "learning_rate": 7.861816278461136e-06, |
| "logits/chosen": -2.858786106109619, |
| "logits/rejected": -2.824657917022705, |
| "logps/chosen": -307.7120056152344, |
| "logps/rejected": -309.0006103515625, |
| "loss": 0.5332, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.736547827720642, |
| "rewards/margins": 1.4633195400238037, |
| "rewards/rejected": -3.1998674869537354, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.6084794556398848, |
| "grad_norm": 7.5584330558776855, |
| "learning_rate": 7.835645119078776e-06, |
| "logits/chosen": -2.8849313259124756, |
| "logits/rejected": -2.8943004608154297, |
| "logps/chosen": -256.06292724609375, |
| "logps/rejected": -318.1280212402344, |
| "loss": 0.4454, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.5195752382278442, |
| "rewards/margins": 1.7365375757217407, |
| "rewards/rejected": -3.256112575531006, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.6097880136090029, |
| "grad_norm": 9.819621086120605, |
| "learning_rate": 7.809473959696415e-06, |
| "logits/chosen": -2.734415292739868, |
| "logits/rejected": -2.8705554008483887, |
| "logps/chosen": -326.63543701171875, |
| "logps/rejected": -277.5613708496094, |
| "loss": 0.6395, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.3960602283477783, |
| "rewards/margins": 1.3911415338516235, |
| "rewards/rejected": -2.7872016429901123, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.6110965715781209, |
| "grad_norm": 10.025915145874023, |
| "learning_rate": 7.783302800314055e-06, |
| "logits/chosen": -2.8270926475524902, |
| "logits/rejected": -2.8613481521606445, |
| "logps/chosen": -306.28033447265625, |
| "logps/rejected": -287.93597412109375, |
| "loss": 0.5738, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.4758658409118652, |
| "rewards/margins": 1.4804503917694092, |
| "rewards/rejected": -2.9563159942626953, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.6124051295472389, |
| "grad_norm": 6.9185261726379395, |
| "learning_rate": 7.757131640931695e-06, |
| "logits/chosen": -2.9443321228027344, |
| "logits/rejected": -2.948289155960083, |
| "logps/chosen": -375.3352355957031, |
| "logps/rejected": -333.23834228515625, |
| "loss": 0.4571, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.5819398164749146, |
| "rewards/margins": 1.5913922786712646, |
| "rewards/rejected": -3.1733319759368896, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.613713687516357, |
| "grad_norm": 6.314789772033691, |
| "learning_rate": 7.730960481549333e-06, |
| "logits/chosen": -2.9200820922851562, |
| "logits/rejected": -2.8788719177246094, |
| "logps/chosen": -298.35296630859375, |
| "logps/rejected": -275.4773864746094, |
| "loss": 0.7795, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -1.692670464515686, |
| "rewards/margins": 0.9236106872558594, |
| "rewards/rejected": -2.616281270980835, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.615022245485475, |
| "grad_norm": 6.88268518447876, |
| "learning_rate": 7.704789322166972e-06, |
| "logits/chosen": -2.885225296020508, |
| "logits/rejected": -2.945056676864624, |
| "logps/chosen": -291.22589111328125, |
| "logps/rejected": -264.28485107421875, |
| "loss": 0.621, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.813675880432129, |
| "rewards/margins": 1.1586147546768188, |
| "rewards/rejected": -2.9722909927368164, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.616330803454593, |
| "grad_norm": 7.270668983459473, |
| "learning_rate": 7.678618162784612e-06, |
| "logits/chosen": -2.767096996307373, |
| "logits/rejected": -2.9178149700164795, |
| "logps/chosen": -273.98443603515625, |
| "logps/rejected": -270.39849853515625, |
| "loss": 0.4296, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.385209083557129, |
| "rewards/margins": 1.4976346492767334, |
| "rewards/rejected": -2.882843494415283, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.6176393614237111, |
| "grad_norm": 6.517707824707031, |
| "learning_rate": 7.652447003402252e-06, |
| "logits/chosen": -2.9354186058044434, |
| "logits/rejected": -2.8951194286346436, |
| "logps/chosen": -293.964599609375, |
| "logps/rejected": -319.67596435546875, |
| "loss": 0.5387, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.7208137512207031, |
| "rewards/margins": 1.1261407136917114, |
| "rewards/rejected": -2.846954822540283, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.6189479193928291, |
| "grad_norm": 3.2250170707702637, |
| "learning_rate": 7.62627584401989e-06, |
| "logits/chosen": -2.8758339881896973, |
| "logits/rejected": -2.9541773796081543, |
| "logps/chosen": -283.46185302734375, |
| "logps/rejected": -290.2981872558594, |
| "loss": 0.4126, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.4215718507766724, |
| "rewards/margins": 1.676491379737854, |
| "rewards/rejected": -3.0980632305145264, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.6202564773619471, |
| "grad_norm": 7.372755527496338, |
| "learning_rate": 7.60010468463753e-06, |
| "logits/chosen": -2.9308109283447266, |
| "logits/rejected": -2.946016788482666, |
| "logps/chosen": -314.4420471191406, |
| "logps/rejected": -285.1692810058594, |
| "loss": 0.339, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.5270148515701294, |
| "rewards/margins": 1.9710546731948853, |
| "rewards/rejected": -3.4980697631835938, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.6215650353310652, |
| "grad_norm": 7.630323886871338, |
| "learning_rate": 7.5739335252551695e-06, |
| "logits/chosen": -2.897289752960205, |
| "logits/rejected": -2.9930167198181152, |
| "logps/chosen": -347.9987487792969, |
| "logps/rejected": -320.02008056640625, |
| "loss": 0.5088, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.688640832901001, |
| "rewards/margins": 1.6700690984725952, |
| "rewards/rejected": -3.3587098121643066, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.6228735933001832, |
| "grad_norm": 8.262691497802734, |
| "learning_rate": 7.547762365872809e-06, |
| "logits/chosen": -2.941046953201294, |
| "logits/rejected": -2.9043381214141846, |
| "logps/chosen": -265.45703125, |
| "logps/rejected": -254.2328338623047, |
| "loss": 0.5536, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.5332114696502686, |
| "rewards/margins": 1.3792575597763062, |
| "rewards/rejected": -2.912468910217285, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.6241821512693012, |
| "grad_norm": 8.945664405822754, |
| "learning_rate": 7.521591206490449e-06, |
| "logits/chosen": -2.7841739654541016, |
| "logits/rejected": -2.9014525413513184, |
| "logps/chosen": -250.78091430664062, |
| "logps/rejected": -283.4075927734375, |
| "loss": 0.5543, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.4417940378189087, |
| "rewards/margins": 1.4987192153930664, |
| "rewards/rejected": -2.9405131340026855, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.6254907092384192, |
| "grad_norm": 5.672665596008301, |
| "learning_rate": 7.495420047108087e-06, |
| "logits/chosen": -2.9190821647644043, |
| "logits/rejected": -2.9655425548553467, |
| "logps/chosen": -308.78057861328125, |
| "logps/rejected": -286.87030029296875, |
| "loss": 0.5464, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.820185661315918, |
| "rewards/margins": 1.1439754962921143, |
| "rewards/rejected": -2.9641611576080322, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.6267992672075373, |
| "grad_norm": 6.946528911590576, |
| "learning_rate": 7.469248887725726e-06, |
| "logits/chosen": -2.8645572662353516, |
| "logits/rejected": -2.8304049968719482, |
| "logps/chosen": -278.38128662109375, |
| "logps/rejected": -312.0920104980469, |
| "loss": 0.4588, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.3060362339019775, |
| "rewards/margins": 1.906618356704712, |
| "rewards/rejected": -3.2126548290252686, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.6281078251766553, |
| "grad_norm": 6.621123313903809, |
| "learning_rate": 7.4430777283433664e-06, |
| "logits/chosen": -2.8144335746765137, |
| "logits/rejected": -2.8863790035247803, |
| "logps/chosen": -327.2550354003906, |
| "logps/rejected": -266.57574462890625, |
| "loss": 0.5097, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.4507298469543457, |
| "rewards/margins": 1.4654420614242554, |
| "rewards/rejected": -2.9161720275878906, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6281078251766553, |
| "eval_logits/chosen": -2.921830177307129, |
| "eval_logits/rejected": -2.93990159034729, |
| "eval_logps/chosen": -297.5841064453125, |
| "eval_logps/rejected": -293.12017822265625, |
| "eval_loss": 0.5002692937850952, |
| "eval_rewards/accuracies": 0.75, |
| "eval_rewards/chosen": -1.435073971748352, |
| "eval_rewards/margins": 1.4834831953048706, |
| "eval_rewards/rejected": -2.9185571670532227, |
| "eval_runtime": 764.0029, |
| "eval_samples_per_second": 2.618, |
| "eval_steps_per_second": 0.327, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6294163831457733, |
| "grad_norm": 7.230893135070801, |
| "learning_rate": 7.416906568961006e-06, |
| "logits/chosen": -2.885646104812622, |
| "logits/rejected": -2.8738009929656982, |
| "logps/chosen": -277.877685546875, |
| "logps/rejected": -286.29608154296875, |
| "loss": 0.6041, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.5320603847503662, |
| "rewards/margins": 1.3425400257110596, |
| "rewards/rejected": -2.8746001720428467, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.6307249411148914, |
| "grad_norm": 6.221070289611816, |
| "learning_rate": 7.390735409578646e-06, |
| "logits/chosen": -2.8773863315582275, |
| "logits/rejected": -2.948465347290039, |
| "logps/chosen": -354.95001220703125, |
| "logps/rejected": -330.3343505859375, |
| "loss": 0.7765, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.9595705270767212, |
| "rewards/margins": 0.9573482275009155, |
| "rewards/rejected": -2.9169187545776367, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.6320334990840094, |
| "grad_norm": 4.76958703994751, |
| "learning_rate": 7.364564250196284e-06, |
| "logits/chosen": -2.9388175010681152, |
| "logits/rejected": -2.9442152976989746, |
| "logps/chosen": -263.76995849609375, |
| "logps/rejected": -264.36749267578125, |
| "loss": 0.469, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.533185362815857, |
| "rewards/margins": 1.467279076576233, |
| "rewards/rejected": -3.000464677810669, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.6333420570531274, |
| "grad_norm": 8.054102897644043, |
| "learning_rate": 7.338393090813923e-06, |
| "logits/chosen": -2.8760969638824463, |
| "logits/rejected": -2.887807607650757, |
| "logps/chosen": -289.54315185546875, |
| "logps/rejected": -323.89935302734375, |
| "loss": 0.4297, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.7106348276138306, |
| "rewards/margins": 1.607092261314392, |
| "rewards/rejected": -3.3177268505096436, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.6346506150222455, |
| "grad_norm": 6.590506076812744, |
| "learning_rate": 7.312221931431563e-06, |
| "logits/chosen": -2.808767318725586, |
| "logits/rejected": -2.9431216716766357, |
| "logps/chosen": -298.54888916015625, |
| "logps/rejected": -359.0498962402344, |
| "loss": 0.379, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.2185065746307373, |
| "rewards/margins": 1.578076720237732, |
| "rewards/rejected": -2.796583414077759, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.6359591729913635, |
| "grad_norm": 7.335874557495117, |
| "learning_rate": 7.286050772049203e-06, |
| "logits/chosen": -2.8771159648895264, |
| "logits/rejected": -2.7269673347473145, |
| "logps/chosen": -253.0265655517578, |
| "logps/rejected": -315.6542663574219, |
| "loss": 0.459, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.6146316528320312, |
| "rewards/margins": 1.7264766693115234, |
| "rewards/rejected": -3.3411078453063965, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.6372677309604815, |
| "grad_norm": 7.070531368255615, |
| "learning_rate": 7.259879612666841e-06, |
| "logits/chosen": -2.8449649810791016, |
| "logits/rejected": -2.9761483669281006, |
| "logps/chosen": -274.96630859375, |
| "logps/rejected": -268.19732666015625, |
| "loss": 0.5524, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.5227558612823486, |
| "rewards/margins": 1.515989899635315, |
| "rewards/rejected": -3.0387461185455322, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.6385762889295996, |
| "grad_norm": 5.620815277099609, |
| "learning_rate": 7.233708453284481e-06, |
| "logits/chosen": -2.837573528289795, |
| "logits/rejected": -2.907860040664673, |
| "logps/chosen": -312.64727783203125, |
| "logps/rejected": -317.07342529296875, |
| "loss": 0.5055, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.5986802577972412, |
| "rewards/margins": 1.4222062826156616, |
| "rewards/rejected": -3.020886182785034, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.6398848468987176, |
| "grad_norm": 8.898514747619629, |
| "learning_rate": 7.20753729390212e-06, |
| "logits/chosen": -2.9366869926452637, |
| "logits/rejected": -3.005056381225586, |
| "logps/chosen": -362.00030517578125, |
| "logps/rejected": -289.4993896484375, |
| "loss": 0.4732, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.4922668933868408, |
| "rewards/margins": 1.557114601135254, |
| "rewards/rejected": -3.049381732940674, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.6411934048678356, |
| "grad_norm": 5.531732082366943, |
| "learning_rate": 7.1813661345197595e-06, |
| "logits/chosen": -2.8961660861968994, |
| "logits/rejected": -2.9190566539764404, |
| "logps/chosen": -337.93487548828125, |
| "logps/rejected": -288.6690979003906, |
| "loss": 0.4368, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.5329985618591309, |
| "rewards/margins": 1.7288877964019775, |
| "rewards/rejected": -3.2618861198425293, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.6425019628369537, |
| "grad_norm": 6.2724103927612305, |
| "learning_rate": 7.1551949751373995e-06, |
| "logits/chosen": -2.958855152130127, |
| "logits/rejected": -3.0128567218780518, |
| "logps/chosen": -313.1339111328125, |
| "logps/rejected": -276.3824768066406, |
| "loss": 0.5257, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.249989628791809, |
| "rewards/margins": 1.6770312786102295, |
| "rewards/rejected": -2.927020788192749, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.6438105208060717, |
| "grad_norm": 7.424379825592041, |
| "learning_rate": 7.129023815755039e-06, |
| "logits/chosen": -2.9298288822174072, |
| "logits/rejected": -2.917689085006714, |
| "logps/chosen": -288.6741638183594, |
| "logps/rejected": -274.7695007324219, |
| "loss": 0.49, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.7392343282699585, |
| "rewards/margins": 1.7145700454711914, |
| "rewards/rejected": -3.4538040161132812, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.6451190787751897, |
| "grad_norm": 5.782430171966553, |
| "learning_rate": 7.102852656372677e-06, |
| "logits/chosen": -2.796736478805542, |
| "logits/rejected": -2.917767286300659, |
| "logps/chosen": -316.8404541015625, |
| "logps/rejected": -304.5469970703125, |
| "loss": 0.3817, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.597495675086975, |
| "rewards/margins": 2.14239501953125, |
| "rewards/rejected": -3.7398905754089355, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.6464276367443078, |
| "grad_norm": 7.320804595947266, |
| "learning_rate": 7.076681496990317e-06, |
| "logits/chosen": -2.8980050086975098, |
| "logits/rejected": -2.9889309406280518, |
| "logps/chosen": -309.5592346191406, |
| "logps/rejected": -293.1075134277344, |
| "loss": 0.4587, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.9160000085830688, |
| "rewards/margins": 1.8903604745864868, |
| "rewards/rejected": -3.8063607215881348, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.6477361947134258, |
| "grad_norm": 7.583347797393799, |
| "learning_rate": 7.050510337607956e-06, |
| "logits/chosen": -2.924474000930786, |
| "logits/rejected": -2.9702935218811035, |
| "logps/chosen": -315.4239807128906, |
| "logps/rejected": -279.01507568359375, |
| "loss": 0.4572, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.9099395275115967, |
| "rewards/margins": 1.9472777843475342, |
| "rewards/rejected": -3.857217311859131, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.6490447526825438, |
| "grad_norm": 3.8960468769073486, |
| "learning_rate": 7.0243391782255965e-06, |
| "logits/chosen": -2.9497616291046143, |
| "logits/rejected": -3.0569794178009033, |
| "logps/chosen": -294.25347900390625, |
| "logps/rejected": -303.3873596191406, |
| "loss": 0.5478, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.9578750133514404, |
| "rewards/margins": 1.8239530324935913, |
| "rewards/rejected": -3.7818284034729004, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.6503533106516619, |
| "grad_norm": 7.425008296966553, |
| "learning_rate": 6.998168018843236e-06, |
| "logits/chosen": -2.807569980621338, |
| "logits/rejected": -2.941020965576172, |
| "logps/chosen": -360.5503845214844, |
| "logps/rejected": -316.94677734375, |
| "loss": 0.3726, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.6083217859268188, |
| "rewards/margins": 2.2246527671813965, |
| "rewards/rejected": -3.8329741954803467, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.6516618686207799, |
| "grad_norm": 8.695091247558594, |
| "learning_rate": 6.971996859460874e-06, |
| "logits/chosen": -2.9309356212615967, |
| "logits/rejected": -2.965848922729492, |
| "logps/chosen": -295.5977783203125, |
| "logps/rejected": -287.81689453125, |
| "loss": 0.4912, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.896004319190979, |
| "rewards/margins": 2.422163724899292, |
| "rewards/rejected": -4.318167686462402, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.6529704265898979, |
| "grad_norm": 9.302384376525879, |
| "learning_rate": 6.945825700078514e-06, |
| "logits/chosen": -2.886300563812256, |
| "logits/rejected": -2.930643320083618, |
| "logps/chosen": -296.8878173828125, |
| "logps/rejected": -288.6578063964844, |
| "loss": 0.487, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.846461534500122, |
| "rewards/margins": 1.7389333248138428, |
| "rewards/rejected": -3.585395097732544, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.654278984559016, |
| "grad_norm": 9.560452461242676, |
| "learning_rate": 6.919654540696153e-06, |
| "logits/chosen": -2.8469934463500977, |
| "logits/rejected": -2.935584306716919, |
| "logps/chosen": -260.8996887207031, |
| "logps/rejected": -299.7215270996094, |
| "loss": 0.5645, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.634810447692871, |
| "rewards/margins": 1.784527063369751, |
| "rewards/rejected": -3.419337511062622, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.655587542528134, |
| "grad_norm": 7.051764011383057, |
| "learning_rate": 6.8934833813137926e-06, |
| "logits/chosen": -2.8771421909332275, |
| "logits/rejected": -2.951645612716675, |
| "logps/chosen": -305.71038818359375, |
| "logps/rejected": -284.0111389160156, |
| "loss": 0.4771, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.5477997064590454, |
| "rewards/margins": 1.621638298034668, |
| "rewards/rejected": -3.169437885284424, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.656896100497252, |
| "grad_norm": 10.278838157653809, |
| "learning_rate": 6.867312221931433e-06, |
| "logits/chosen": -2.9116673469543457, |
| "logits/rejected": -2.973714828491211, |
| "logps/chosen": -270.4404296875, |
| "logps/rejected": -254.27865600585938, |
| "loss": 0.5929, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.5017471313476562, |
| "rewards/margins": 1.1838232278823853, |
| "rewards/rejected": -2.685570240020752, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.6582046584663701, |
| "grad_norm": 8.013856887817383, |
| "learning_rate": 6.841141062549072e-06, |
| "logits/chosen": -2.8651554584503174, |
| "logits/rejected": -2.9246227741241455, |
| "logps/chosen": -227.64547729492188, |
| "logps/rejected": -250.6405487060547, |
| "loss": 0.6189, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -1.8577998876571655, |
| "rewards/margins": 0.9316193461418152, |
| "rewards/rejected": -2.789419174194336, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.6595132164354881, |
| "grad_norm": 6.389255523681641, |
| "learning_rate": 6.81496990316671e-06, |
| "logits/chosen": -2.7882442474365234, |
| "logits/rejected": -2.774643898010254, |
| "logps/chosen": -261.3345642089844, |
| "logps/rejected": -283.9215087890625, |
| "loss": 0.49, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.8135063648223877, |
| "rewards/margins": 1.7753257751464844, |
| "rewards/rejected": -3.588832139968872, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.6608217744046061, |
| "grad_norm": 8.19912338256836, |
| "learning_rate": 6.78879874378435e-06, |
| "logits/chosen": -2.8803114891052246, |
| "logits/rejected": -2.9215333461761475, |
| "logps/chosen": -280.89788818359375, |
| "logps/rejected": -320.07012939453125, |
| "loss": 0.5114, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8473488092422485, |
| "rewards/margins": 1.4395393133163452, |
| "rewards/rejected": -3.2868876457214355, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.6621303323737242, |
| "grad_norm": 12.39254379272461, |
| "learning_rate": 6.7626275844019895e-06, |
| "logits/chosen": -2.866048812866211, |
| "logits/rejected": -2.9401180744171143, |
| "logps/chosen": -286.77490234375, |
| "logps/rejected": -260.3507995605469, |
| "loss": 0.4503, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.7392785549163818, |
| "rewards/margins": 1.815791130065918, |
| "rewards/rejected": -3.5550696849823, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.6634388903428422, |
| "grad_norm": 7.732004165649414, |
| "learning_rate": 6.736456425019629e-06, |
| "logits/chosen": -2.8534507751464844, |
| "logits/rejected": -2.924248218536377, |
| "logps/chosen": -319.3000793457031, |
| "logps/rejected": -301.00396728515625, |
| "loss": 0.5023, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.768906593322754, |
| "rewards/margins": 1.7894645929336548, |
| "rewards/rejected": -3.5583713054656982, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.6647474483119602, |
| "grad_norm": 7.866215705871582, |
| "learning_rate": 6.710285265637269e-06, |
| "logits/chosen": -3.003950595855713, |
| "logits/rejected": -2.9520037174224854, |
| "logps/chosen": -321.5911560058594, |
| "logps/rejected": -313.03955078125, |
| "loss": 0.5501, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.6573905944824219, |
| "rewards/margins": 1.614404320716858, |
| "rewards/rejected": -3.2717947959899902, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.6660560062810783, |
| "grad_norm": 8.200257301330566, |
| "learning_rate": 6.684114106254907e-06, |
| "logits/chosen": -2.8988094329833984, |
| "logits/rejected": -2.9182348251342773, |
| "logps/chosen": -269.07464599609375, |
| "logps/rejected": -266.5125427246094, |
| "loss": 0.4299, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.3188421726226807, |
| "rewards/margins": 2.008021354675293, |
| "rewards/rejected": -3.3268637657165527, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.6673645642501963, |
| "grad_norm": 8.644819259643555, |
| "learning_rate": 6.657942946872546e-06, |
| "logits/chosen": -2.9794552326202393, |
| "logits/rejected": -2.959059238433838, |
| "logps/chosen": -286.81549072265625, |
| "logps/rejected": -325.7430114746094, |
| "loss": 0.4827, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.3967323303222656, |
| "rewards/margins": 1.7587554454803467, |
| "rewards/rejected": -3.155487537384033, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.6686731222193143, |
| "grad_norm": 6.768131256103516, |
| "learning_rate": 6.6317717874901865e-06, |
| "logits/chosen": -2.858863592147827, |
| "logits/rejected": -2.9572906494140625, |
| "logps/chosen": -252.885498046875, |
| "logps/rejected": -306.57196044921875, |
| "loss": 0.4259, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.3627374172210693, |
| "rewards/margins": 2.1756911277770996, |
| "rewards/rejected": -3.538428783416748, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.6699816801884323, |
| "grad_norm": 5.3841552734375, |
| "learning_rate": 6.605600628107826e-06, |
| "logits/chosen": -2.863058090209961, |
| "logits/rejected": -2.9404101371765137, |
| "logps/chosen": -292.02935791015625, |
| "logps/rejected": -301.8074645996094, |
| "loss": 0.4912, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.5280544757843018, |
| "rewards/margins": 1.760963797569275, |
| "rewards/rejected": -3.289018154144287, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.6712902381575504, |
| "grad_norm": 4.600503921508789, |
| "learning_rate": 6.579429468725466e-06, |
| "logits/chosen": -2.857856273651123, |
| "logits/rejected": -2.9453234672546387, |
| "logps/chosen": -288.824951171875, |
| "logps/rejected": -269.5613708496094, |
| "loss": 0.519, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.3591705560684204, |
| "rewards/margins": 1.5891754627227783, |
| "rewards/rejected": -2.948345899581909, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.6725987961266684, |
| "grad_norm": 7.518556118011475, |
| "learning_rate": 6.553258309343105e-06, |
| "logits/chosen": -2.8256914615631104, |
| "logits/rejected": -2.893322467803955, |
| "logps/chosen": -361.60540771484375, |
| "logps/rejected": -349.1207275390625, |
| "loss": 0.4789, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.478563904762268, |
| "rewards/margins": 1.865069031715393, |
| "rewards/rejected": -3.3436331748962402, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.6739073540957864, |
| "grad_norm": 4.997129440307617, |
| "learning_rate": 6.527087149960743e-06, |
| "logits/chosen": -2.830671787261963, |
| "logits/rejected": -2.8592820167541504, |
| "logps/chosen": -265.6669921875, |
| "logps/rejected": -297.15960693359375, |
| "loss": 0.3297, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.161731481552124, |
| "rewards/margins": 2.2399954795837402, |
| "rewards/rejected": -3.4017269611358643, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.6752159120649045, |
| "grad_norm": 6.114622592926025, |
| "learning_rate": 6.500915990578383e-06, |
| "logits/chosen": -2.8342204093933105, |
| "logits/rejected": -2.8946008682250977, |
| "logps/chosen": -295.8099670410156, |
| "logps/rejected": -326.4892578125, |
| "loss": 0.4826, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.4532636404037476, |
| "rewards/margins": 1.522308111190796, |
| "rewards/rejected": -2.975571870803833, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.6765244700340225, |
| "grad_norm": 8.778839111328125, |
| "learning_rate": 6.474744831196023e-06, |
| "logits/chosen": -2.8909201622009277, |
| "logits/rejected": -2.9403858184814453, |
| "logps/chosen": -303.0553283691406, |
| "logps/rejected": -298.9825439453125, |
| "loss": 0.5317, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.4531861543655396, |
| "rewards/margins": 1.1554243564605713, |
| "rewards/rejected": -2.6086106300354004, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.6778330280031405, |
| "grad_norm": 7.011162757873535, |
| "learning_rate": 6.448573671813662e-06, |
| "logits/chosen": -2.8256630897521973, |
| "logits/rejected": -2.8981237411499023, |
| "logps/chosen": -290.13531494140625, |
| "logps/rejected": -302.042724609375, |
| "loss": 0.4532, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.2221115827560425, |
| "rewards/margins": 1.5408296585083008, |
| "rewards/rejected": -2.762941360473633, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.6791415859722586, |
| "grad_norm": 7.359402179718018, |
| "learning_rate": 6.422402512431302e-06, |
| "logits/chosen": -2.989375591278076, |
| "logits/rejected": -2.956820487976074, |
| "logps/chosen": -244.23779296875, |
| "logps/rejected": -300.09613037109375, |
| "loss": 0.5183, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.2675294876098633, |
| "rewards/margins": 1.5874407291412354, |
| "rewards/rejected": -2.8549702167510986, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.6804501439413766, |
| "grad_norm": 9.219598770141602, |
| "learning_rate": 6.39623135304894e-06, |
| "logits/chosen": -2.8202285766601562, |
| "logits/rejected": -2.9479031562805176, |
| "logps/chosen": -292.47711181640625, |
| "logps/rejected": -301.48541259765625, |
| "loss": 0.4481, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.2682809829711914, |
| "rewards/margins": 1.6929352283477783, |
| "rewards/rejected": -2.9612162113189697, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.6817587019104946, |
| "grad_norm": 10.345588684082031, |
| "learning_rate": 6.3700601936665795e-06, |
| "logits/chosen": -2.9142959117889404, |
| "logits/rejected": -3.0136189460754395, |
| "logps/chosen": -275.33282470703125, |
| "logps/rejected": -271.1238708496094, |
| "loss": 0.6053, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.4676587581634521, |
| "rewards/margins": 1.2222703695297241, |
| "rewards/rejected": -2.689929485321045, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.6830672598796127, |
| "grad_norm": 8.903675079345703, |
| "learning_rate": 6.3438890342842196e-06, |
| "logits/chosen": -2.9312522411346436, |
| "logits/rejected": -2.9643070697784424, |
| "logps/chosen": -268.38250732421875, |
| "logps/rejected": -276.27752685546875, |
| "loss": 0.5743, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -1.5821937322616577, |
| "rewards/margins": 1.11379075050354, |
| "rewards/rejected": -2.695984363555908, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.6843758178487307, |
| "grad_norm": 4.592360973358154, |
| "learning_rate": 6.317717874901859e-06, |
| "logits/chosen": -2.953341007232666, |
| "logits/rejected": -2.873232364654541, |
| "logps/chosen": -290.78289794921875, |
| "logps/rejected": -318.7877502441406, |
| "loss": 0.3272, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.0863611698150635, |
| "rewards/margins": 1.956146240234375, |
| "rewards/rejected": -3.0425074100494385, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.6856843758178487, |
| "grad_norm": 8.31113338470459, |
| "learning_rate": 6.291546715519498e-06, |
| "logits/chosen": -2.826333999633789, |
| "logits/rejected": -2.865309953689575, |
| "logps/chosen": -328.55352783203125, |
| "logps/rejected": -317.73321533203125, |
| "loss": 0.4531, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.1655009984970093, |
| "rewards/margins": 1.6823265552520752, |
| "rewards/rejected": -2.847827434539795, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.6869929337869668, |
| "grad_norm": 5.8730292320251465, |
| "learning_rate": 6.265375556137138e-06, |
| "logits/chosen": -2.9004733562469482, |
| "logits/rejected": -2.986215114593506, |
| "logps/chosen": -304.14996337890625, |
| "logps/rejected": -325.6918029785156, |
| "loss": 0.4951, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.4205595254898071, |
| "rewards/margins": 1.4959120750427246, |
| "rewards/rejected": -2.9164717197418213, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.6883014917560848, |
| "grad_norm": 6.657158851623535, |
| "learning_rate": 6.2392043967547764e-06, |
| "logits/chosen": -2.896409034729004, |
| "logits/rejected": -2.907116413116455, |
| "logps/chosen": -279.0505676269531, |
| "logps/rejected": -285.6734619140625, |
| "loss": 0.4821, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.2515028715133667, |
| "rewards/margins": 1.3621675968170166, |
| "rewards/rejected": -2.613670587539673, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.6896100497252028, |
| "grad_norm": 6.316776275634766, |
| "learning_rate": 6.213033237372416e-06, |
| "logits/chosen": -2.944918155670166, |
| "logits/rejected": -3.0123748779296875, |
| "logps/chosen": -355.7431945800781, |
| "logps/rejected": -290.6505126953125, |
| "loss": 0.4345, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.2796906232833862, |
| "rewards/margins": 1.6674339771270752, |
| "rewards/rejected": -2.947124719619751, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.6909186076943209, |
| "grad_norm": 6.7956037521362305, |
| "learning_rate": 6.186862077990056e-06, |
| "logits/chosen": -2.8235573768615723, |
| "logits/rejected": -2.815488815307617, |
| "logps/chosen": -261.06329345703125, |
| "logps/rejected": -269.0946350097656, |
| "loss": 0.3785, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.296617031097412, |
| "rewards/margins": 1.810760736465454, |
| "rewards/rejected": -3.1073780059814453, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.6922271656634389, |
| "grad_norm": 9.322579383850098, |
| "learning_rate": 6.160690918607695e-06, |
| "logits/chosen": -2.9193060398101807, |
| "logits/rejected": -3.0121703147888184, |
| "logps/chosen": -290.3641052246094, |
| "logps/rejected": -252.94198608398438, |
| "loss": 0.376, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.0163379907608032, |
| "rewards/margins": 1.8828426599502563, |
| "rewards/rejected": -2.8991806507110596, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.6935357236325569, |
| "grad_norm": 8.177329063415527, |
| "learning_rate": 6.134519759225335e-06, |
| "logits/chosen": -2.927708387374878, |
| "logits/rejected": -3.0037269592285156, |
| "logps/chosen": -308.3323059082031, |
| "logps/rejected": -318.09002685546875, |
| "loss": 0.5581, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -1.1352941989898682, |
| "rewards/margins": 1.3868353366851807, |
| "rewards/rejected": -2.522129535675049, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.694844281601675, |
| "grad_norm": 10.346382141113281, |
| "learning_rate": 6.108348599842973e-06, |
| "logits/chosen": -2.8416390419006348, |
| "logits/rejected": -2.8420791625976562, |
| "logps/chosen": -267.317626953125, |
| "logps/rejected": -282.65570068359375, |
| "loss": 0.6123, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.2157745361328125, |
| "rewards/margins": 1.3261229991912842, |
| "rewards/rejected": -2.5418972969055176, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.696152839570793, |
| "grad_norm": 7.9833455085754395, |
| "learning_rate": 6.082177440460613e-06, |
| "logits/chosen": -3.010012626647949, |
| "logits/rejected": -3.0121254920959473, |
| "logps/chosen": -314.1331787109375, |
| "logps/rejected": -295.4376525878906, |
| "loss": 0.5446, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.2808277606964111, |
| "rewards/margins": 1.409279227256775, |
| "rewards/rejected": -2.6901068687438965, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.697461397539911, |
| "grad_norm": 6.993275165557861, |
| "learning_rate": 6.056006281078253e-06, |
| "logits/chosen": -3.0581610202789307, |
| "logits/rejected": -3.0452892780303955, |
| "logps/chosen": -276.1371154785156, |
| "logps/rejected": -248.65353393554688, |
| "loss": 0.4886, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.2057183980941772, |
| "rewards/margins": 1.5118528604507446, |
| "rewards/rejected": -2.717571258544922, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.6987699555090291, |
| "grad_norm": 6.968574047088623, |
| "learning_rate": 6.029835121695892e-06, |
| "logits/chosen": -2.9653797149658203, |
| "logits/rejected": -3.0148282051086426, |
| "logps/chosen": -324.75762939453125, |
| "logps/rejected": -289.0790100097656, |
| "loss": 0.5447, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.534473180770874, |
| "rewards/margins": 1.1837376356124878, |
| "rewards/rejected": -2.7182106971740723, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.7000785134781471, |
| "grad_norm": 7.007041931152344, |
| "learning_rate": 6.00366396231353e-06, |
| "logits/chosen": -2.9610750675201416, |
| "logits/rejected": -2.9877383708953857, |
| "logps/chosen": -269.05517578125, |
| "logps/rejected": -309.22509765625, |
| "loss": 0.4013, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.3385134935379028, |
| "rewards/margins": 1.7016382217407227, |
| "rewards/rejected": -3.040152072906494, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.7013870714472651, |
| "grad_norm": 8.326202392578125, |
| "learning_rate": 5.977492802931171e-06, |
| "logits/chosen": -2.8807883262634277, |
| "logits/rejected": -2.998851776123047, |
| "logps/chosen": -299.6264953613281, |
| "logps/rejected": -335.3392639160156, |
| "loss": 0.6032, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.4939355850219727, |
| "rewards/margins": 1.3633782863616943, |
| "rewards/rejected": -2.857314109802246, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.7026956294163832, |
| "grad_norm": 7.929780006408691, |
| "learning_rate": 5.9513216435488095e-06, |
| "logits/chosen": -2.8801910877227783, |
| "logits/rejected": -2.940908432006836, |
| "logps/chosen": -303.807861328125, |
| "logps/rejected": -265.91217041015625, |
| "loss": 0.5652, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.0966110229492188, |
| "rewards/margins": 1.154617428779602, |
| "rewards/rejected": -2.2512283325195312, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.7040041873855012, |
| "grad_norm": 6.519760608673096, |
| "learning_rate": 5.925150484166449e-06, |
| "logits/chosen": -2.935680389404297, |
| "logits/rejected": -3.0190281867980957, |
| "logps/chosen": -292.3984069824219, |
| "logps/rejected": -277.78302001953125, |
| "loss": 0.6154, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.2248005867004395, |
| "rewards/margins": 1.3764398097991943, |
| "rewards/rejected": -2.601240634918213, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.7053127453546192, |
| "grad_norm": 5.480634689331055, |
| "learning_rate": 5.898979324784089e-06, |
| "logits/chosen": -2.9160866737365723, |
| "logits/rejected": -3.0109434127807617, |
| "logps/chosen": -268.1304626464844, |
| "logps/rejected": -307.4697570800781, |
| "loss": 0.4778, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.837879478931427, |
| "rewards/margins": 1.6470983028411865, |
| "rewards/rejected": -2.484978199005127, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.7066213033237373, |
| "grad_norm": 6.198331832885742, |
| "learning_rate": 5.872808165401728e-06, |
| "logits/chosen": -2.925713300704956, |
| "logits/rejected": -3.0466697216033936, |
| "logps/chosen": -287.07537841796875, |
| "logps/rejected": -267.8861083984375, |
| "loss": 0.4928, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.7808945178985596, |
| "rewards/margins": 1.2690925598144531, |
| "rewards/rejected": -2.049987316131592, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.7079298612928553, |
| "grad_norm": 6.397192001342773, |
| "learning_rate": 5.846637006019366e-06, |
| "logits/chosen": -2.9498023986816406, |
| "logits/rejected": -3.021524667739868, |
| "logps/chosen": -356.5502014160156, |
| "logps/rejected": -305.47344970703125, |
| "loss": 0.4493, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.1231266260147095, |
| "rewards/margins": 1.5973341464996338, |
| "rewards/rejected": -2.7204606533050537, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.7092384192619733, |
| "grad_norm": 4.5419697761535645, |
| "learning_rate": 5.8204658466370065e-06, |
| "logits/chosen": -2.9890902042388916, |
| "logits/rejected": -2.981123208999634, |
| "logps/chosen": -280.586669921875, |
| "logps/rejected": -268.80108642578125, |
| "loss": 0.5424, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.0319064855575562, |
| "rewards/margins": 1.4627954959869385, |
| "rewards/rejected": -2.494702100753784, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.7105469772310914, |
| "grad_norm": 5.94234561920166, |
| "learning_rate": 5.794294687254646e-06, |
| "logits/chosen": -3.0644314289093018, |
| "logits/rejected": -3.11076021194458, |
| "logps/chosen": -307.77947998046875, |
| "logps/rejected": -304.94781494140625, |
| "loss": 0.4971, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.1088581085205078, |
| "rewards/margins": 1.72271728515625, |
| "rewards/rejected": -2.831575632095337, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.7118555352002094, |
| "grad_norm": 4.344791412353516, |
| "learning_rate": 5.768123527872285e-06, |
| "logits/chosen": -3.0225837230682373, |
| "logits/rejected": -3.0797817707061768, |
| "logps/chosen": -314.328857421875, |
| "logps/rejected": -300.76959228515625, |
| "loss": 0.5329, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.093664526939392, |
| "rewards/margins": 1.374602198600769, |
| "rewards/rejected": -2.4682669639587402, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.7131640931693274, |
| "grad_norm": 6.447783470153809, |
| "learning_rate": 5.741952368489925e-06, |
| "logits/chosen": -2.824953079223633, |
| "logits/rejected": -2.915835380554199, |
| "logps/chosen": -251.0038604736328, |
| "logps/rejected": -229.1015167236328, |
| "loss": 0.4933, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.0042675733566284, |
| "rewards/margins": 1.4493547677993774, |
| "rewards/rejected": -2.453622579574585, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.7144726511384454, |
| "grad_norm": 4.415769100189209, |
| "learning_rate": 5.715781209107563e-06, |
| "logits/chosen": -2.971993923187256, |
| "logits/rejected": -3.010190963745117, |
| "logps/chosen": -345.302734375, |
| "logps/rejected": -356.4703063964844, |
| "loss": 0.3961, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.6722342371940613, |
| "rewards/margins": 1.6876239776611328, |
| "rewards/rejected": -2.359858274459839, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.7157812091075635, |
| "grad_norm": 7.8874640464782715, |
| "learning_rate": 5.689610049725203e-06, |
| "logits/chosen": -3.04805850982666, |
| "logits/rejected": -3.023395538330078, |
| "logps/chosen": -252.2591094970703, |
| "logps/rejected": -256.0127868652344, |
| "loss": 0.4838, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.035502552986145, |
| "rewards/margins": 1.598411202430725, |
| "rewards/rejected": -2.63391375541687, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.7170897670766815, |
| "grad_norm": 7.325488567352295, |
| "learning_rate": 5.663438890342843e-06, |
| "logits/chosen": -3.0302627086639404, |
| "logits/rejected": -2.9467923641204834, |
| "logps/chosen": -280.1380615234375, |
| "logps/rejected": -290.256591796875, |
| "loss": 0.5807, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.152597188949585, |
| "rewards/margins": 1.088136911392212, |
| "rewards/rejected": -2.240734338760376, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.7183983250457995, |
| "grad_norm": 7.371074676513672, |
| "learning_rate": 5.637267730960482e-06, |
| "logits/chosen": -2.880004644393921, |
| "logits/rejected": -2.8563218116760254, |
| "logps/chosen": -313.92279052734375, |
| "logps/rejected": -299.59124755859375, |
| "loss": 0.5275, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.3305268287658691, |
| "rewards/margins": 1.441772222518921, |
| "rewards/rejected": -2.772299289703369, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.7197068830149176, |
| "grad_norm": 4.628333568572998, |
| "learning_rate": 5.611096571578122e-06, |
| "logits/chosen": -2.926056385040283, |
| "logits/rejected": -2.9997127056121826, |
| "logps/chosen": -326.965087890625, |
| "logps/rejected": -287.66619873046875, |
| "loss": 0.4962, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.244328260421753, |
| "rewards/margins": 1.6143608093261719, |
| "rewards/rejected": -2.8586888313293457, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.7210154409840356, |
| "grad_norm": 8.735886573791504, |
| "learning_rate": 5.584925412195761e-06, |
| "logits/chosen": -2.9470670223236084, |
| "logits/rejected": -2.914656162261963, |
| "logps/chosen": -291.71343994140625, |
| "logps/rejected": -282.67962646484375, |
| "loss": 0.4442, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.071761131286621, |
| "rewards/margins": 1.5430763959884644, |
| "rewards/rejected": -2.614838123321533, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.7223239989531536, |
| "grad_norm": 6.217535495758057, |
| "learning_rate": 5.5587542528133995e-06, |
| "logits/chosen": -3.065964698791504, |
| "logits/rejected": -2.982069730758667, |
| "logps/chosen": -328.1929626464844, |
| "logps/rejected": -315.532470703125, |
| "loss": 0.4297, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.2176027297973633, |
| "rewards/margins": 1.6291157007217407, |
| "rewards/rejected": -2.8467183113098145, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.7236325569222717, |
| "grad_norm": 10.293171882629395, |
| "learning_rate": 5.5325830934310396e-06, |
| "logits/chosen": -2.8855443000793457, |
| "logits/rejected": -2.993187665939331, |
| "logps/chosen": -273.87005615234375, |
| "logps/rejected": -276.5475158691406, |
| "loss": 0.6475, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.329725980758667, |
| "rewards/margins": 1.0645885467529297, |
| "rewards/rejected": -2.3943145275115967, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.7249411148913897, |
| "grad_norm": 6.53238582611084, |
| "learning_rate": 5.506411934048679e-06, |
| "logits/chosen": -2.9521586894989014, |
| "logits/rejected": -2.991973638534546, |
| "logps/chosen": -240.7144775390625, |
| "logps/rejected": -256.20306396484375, |
| "loss": 0.433, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.3881947994232178, |
| "rewards/margins": 1.3882848024368286, |
| "rewards/rejected": -2.776479721069336, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.7262496728605077, |
| "grad_norm": 5.987064361572266, |
| "learning_rate": 5.480240774666318e-06, |
| "logits/chosen": -3.003497838973999, |
| "logits/rejected": -3.095515727996826, |
| "logps/chosen": -291.5846252441406, |
| "logps/rejected": -314.75897216796875, |
| "loss": 0.4325, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.2352880239486694, |
| "rewards/margins": 1.6097803115844727, |
| "rewards/rejected": -2.8450684547424316, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.7275582308296258, |
| "grad_norm": 8.166574478149414, |
| "learning_rate": 5.454069615283958e-06, |
| "logits/chosen": -2.9763851165771484, |
| "logits/rejected": -2.9842090606689453, |
| "logps/chosen": -245.7153778076172, |
| "logps/rejected": -240.0733184814453, |
| "loss": 0.5427, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.0601531267166138, |
| "rewards/margins": 1.1864640712738037, |
| "rewards/rejected": -2.246617078781128, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.7288667887987438, |
| "grad_norm": 5.318563938140869, |
| "learning_rate": 5.4278984559015964e-06, |
| "logits/chosen": -2.8072509765625, |
| "logits/rejected": -2.9027323722839355, |
| "logps/chosen": -281.28741455078125, |
| "logps/rejected": -281.07037353515625, |
| "loss": 0.4658, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.4859296083450317, |
| "rewards/margins": 1.3587102890014648, |
| "rewards/rejected": -2.844639778137207, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.7301753467678618, |
| "grad_norm": 7.786154270172119, |
| "learning_rate": 5.401727296519236e-06, |
| "logits/chosen": -3.0480589866638184, |
| "logits/rejected": -3.023777723312378, |
| "logps/chosen": -291.3687744140625, |
| "logps/rejected": -273.6734619140625, |
| "loss": 0.4742, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.2371892929077148, |
| "rewards/margins": 1.3749767541885376, |
| "rewards/rejected": -2.612166166305542, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.7314839047369799, |
| "grad_norm": 6.338807582855225, |
| "learning_rate": 5.375556137136876e-06, |
| "logits/chosen": -2.8649275302886963, |
| "logits/rejected": -2.9403674602508545, |
| "logps/chosen": -297.8705749511719, |
| "logps/rejected": -286.6891174316406, |
| "loss": 0.5284, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.5126415491104126, |
| "rewards/margins": 1.4750219583511353, |
| "rewards/rejected": -2.987663507461548, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.7327924627060979, |
| "grad_norm": 7.393446445465088, |
| "learning_rate": 5.349384977754515e-06, |
| "logits/chosen": -2.7669756412506104, |
| "logits/rejected": -2.9230990409851074, |
| "logps/chosen": -273.23651123046875, |
| "logps/rejected": -301.51702880859375, |
| "loss": 0.5417, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.8848727941513062, |
| "rewards/margins": 1.4472146034240723, |
| "rewards/rejected": -3.3320870399475098, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7327924627060979, |
| "eval_logits/chosen": -3.0151562690734863, |
| "eval_logits/rejected": -3.032125473022461, |
| "eval_logps/chosen": -299.7012939453125, |
| "eval_logps/rejected": -294.1008605957031, |
| "eval_loss": 0.4922027885913849, |
| "eval_rewards/accuracies": 0.7544999718666077, |
| "eval_rewards/chosen": -1.6467875242233276, |
| "eval_rewards/margins": 1.3698387145996094, |
| "eval_rewards/rejected": -3.0166258811950684, |
| "eval_runtime": 764.128, |
| "eval_samples_per_second": 2.617, |
| "eval_steps_per_second": 0.327, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7341010206752159, |
| "grad_norm": 4.838715076446533, |
| "learning_rate": 5.323213818372154e-06, |
| "logits/chosen": -2.983696460723877, |
| "logits/rejected": -2.989692211151123, |
| "logps/chosen": -326.8425598144531, |
| "logps/rejected": -332.06634521484375, |
| "loss": 0.6553, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.7837555408477783, |
| "rewards/margins": 1.0040950775146484, |
| "rewards/rejected": -2.7878506183624268, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.735409578644334, |
| "grad_norm": 7.242071151733398, |
| "learning_rate": 5.297042658989794e-06, |
| "logits/chosen": -2.930501937866211, |
| "logits/rejected": -3.0442214012145996, |
| "logps/chosen": -284.0743408203125, |
| "logps/rejected": -317.67840576171875, |
| "loss": 0.626, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.8534746170043945, |
| "rewards/margins": 0.8570615649223328, |
| "rewards/rejected": -2.710536241531372, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.736718136613452, |
| "grad_norm": 6.814687728881836, |
| "learning_rate": 5.270871499607433e-06, |
| "logits/chosen": -2.9587209224700928, |
| "logits/rejected": -3.0509235858917236, |
| "logps/chosen": -265.3656921386719, |
| "logps/rejected": -268.7741394042969, |
| "loss": 0.4371, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.7126185894012451, |
| "rewards/margins": 1.4011996984481812, |
| "rewards/rejected": -3.113818645477295, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.73802669458257, |
| "grad_norm": 6.233587265014648, |
| "learning_rate": 5.244700340225073e-06, |
| "logits/chosen": -3.000408172607422, |
| "logits/rejected": -3.0090386867523193, |
| "logps/chosen": -268.31610107421875, |
| "logps/rejected": -252.54257202148438, |
| "loss": 0.5113, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8398408889770508, |
| "rewards/margins": 1.0091921091079712, |
| "rewards/rejected": -2.8490328788757324, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.7393352525516881, |
| "grad_norm": 5.571577072143555, |
| "learning_rate": 5.218529180842712e-06, |
| "logits/chosen": -2.983218193054199, |
| "logits/rejected": -2.993669271469116, |
| "logps/chosen": -253.2226104736328, |
| "logps/rejected": -260.74432373046875, |
| "loss": 0.4537, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.7749837636947632, |
| "rewards/margins": 1.258331537246704, |
| "rewards/rejected": -3.033315420150757, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.7406438105208061, |
| "grad_norm": 8.487015724182129, |
| "learning_rate": 5.192358021460351e-06, |
| "logits/chosen": -3.1049036979675293, |
| "logits/rejected": -3.024142026901245, |
| "logps/chosen": -331.94085693359375, |
| "logps/rejected": -290.12615966796875, |
| "loss": 0.4844, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.843961477279663, |
| "rewards/margins": 1.2949938774108887, |
| "rewards/rejected": -3.1389553546905518, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.7419523684899241, |
| "grad_norm": 9.09654426574707, |
| "learning_rate": 5.166186862077991e-06, |
| "logits/chosen": -3.0611274242401123, |
| "logits/rejected": -3.0860462188720703, |
| "logps/chosen": -314.9799499511719, |
| "logps/rejected": -305.2943115234375, |
| "loss": 0.549, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.8103431463241577, |
| "rewards/margins": 1.2532975673675537, |
| "rewards/rejected": -3.063640832901001, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.7432609264590422, |
| "grad_norm": 7.024341583251953, |
| "learning_rate": 5.1400157026956295e-06, |
| "logits/chosen": -2.8816845417022705, |
| "logits/rejected": -2.858503818511963, |
| "logps/chosen": -238.06704711914062, |
| "logps/rejected": -253.7016143798828, |
| "loss": 0.5255, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.8397810459136963, |
| "rewards/margins": 1.2725389003753662, |
| "rewards/rejected": -3.1123194694519043, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.7445694844281602, |
| "grad_norm": 7.907282829284668, |
| "learning_rate": 5.113844543313269e-06, |
| "logits/chosen": -3.0064635276794434, |
| "logits/rejected": -3.0659427642822266, |
| "logps/chosen": -268.85504150390625, |
| "logps/rejected": -243.37680053710938, |
| "loss": 0.5628, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -2.0020573139190674, |
| "rewards/margins": 1.343377947807312, |
| "rewards/rejected": -3.345435380935669, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.7458780423972782, |
| "grad_norm": 7.9956464767456055, |
| "learning_rate": 5.087673383930909e-06, |
| "logits/chosen": -2.9852802753448486, |
| "logits/rejected": -3.0461061000823975, |
| "logps/chosen": -336.5865478515625, |
| "logps/rejected": -308.699951171875, |
| "loss": 0.5046, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -2.004922389984131, |
| "rewards/margins": 1.480072021484375, |
| "rewards/rejected": -3.484994411468506, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.7471866003663963, |
| "grad_norm": 5.574730396270752, |
| "learning_rate": 5.061502224548548e-06, |
| "logits/chosen": -2.90897798538208, |
| "logits/rejected": -3.0144150257110596, |
| "logps/chosen": -294.5137634277344, |
| "logps/rejected": -311.13787841796875, |
| "loss": 0.4537, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.66525137424469, |
| "rewards/margins": 1.5599861145019531, |
| "rewards/rejected": -3.2252373695373535, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.7484951583355143, |
| "grad_norm": 8.235709190368652, |
| "learning_rate": 5.035331065166187e-06, |
| "logits/chosen": -2.959197521209717, |
| "logits/rejected": -2.9602441787719727, |
| "logps/chosen": -266.502685546875, |
| "logps/rejected": -283.14190673828125, |
| "loss": 0.483, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8751081228256226, |
| "rewards/margins": 1.5132043361663818, |
| "rewards/rejected": -3.388312578201294, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.7498037163046323, |
| "grad_norm": 10.213626861572266, |
| "learning_rate": 5.009159905783827e-06, |
| "logits/chosen": -2.9322094917297363, |
| "logits/rejected": -2.9696342945098877, |
| "logps/chosen": -317.585205078125, |
| "logps/rejected": -321.09234619140625, |
| "loss": 0.3887, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.844107985496521, |
| "rewards/margins": 1.7453218698501587, |
| "rewards/rejected": -3.5894293785095215, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.7511122742737504, |
| "grad_norm": 4.826941013336182, |
| "learning_rate": 4.982988746401466e-06, |
| "logits/chosen": -3.0065250396728516, |
| "logits/rejected": -3.007230281829834, |
| "logps/chosen": -262.45745849609375, |
| "logps/rejected": -316.62176513671875, |
| "loss": 0.5233, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.0057075023651123, |
| "rewards/margins": 1.4744579792022705, |
| "rewards/rejected": -3.4801650047302246, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.7524208322428684, |
| "grad_norm": 7.547994613647461, |
| "learning_rate": 4.956817587019106e-06, |
| "logits/chosen": -2.992795467376709, |
| "logits/rejected": -3.04087495803833, |
| "logps/chosen": -288.2919616699219, |
| "logps/rejected": -297.0101623535156, |
| "loss": 0.5241, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.7158348560333252, |
| "rewards/margins": 1.439750075340271, |
| "rewards/rejected": -3.1555845737457275, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.7537293902119864, |
| "grad_norm": 8.768786430358887, |
| "learning_rate": 4.930646427636745e-06, |
| "logits/chosen": -3.0172226428985596, |
| "logits/rejected": -3.03350567817688, |
| "logps/chosen": -284.9944152832031, |
| "logps/rejected": -287.624267578125, |
| "loss": 0.678, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.690021276473999, |
| "rewards/margins": 1.1501357555389404, |
| "rewards/rejected": -2.8401570320129395, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.7550379481811044, |
| "grad_norm": 7.017695426940918, |
| "learning_rate": 4.904475268254384e-06, |
| "logits/chosen": -3.009779214859009, |
| "logits/rejected": -2.9742047786712646, |
| "logps/chosen": -298.6566467285156, |
| "logps/rejected": -313.1660461425781, |
| "loss": 0.4167, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.845075011253357, |
| "rewards/margins": 1.5888986587524414, |
| "rewards/rejected": -3.433973789215088, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.7563465061502225, |
| "grad_norm": 7.003612518310547, |
| "learning_rate": 4.878304108872023e-06, |
| "logits/chosen": -3.0446248054504395, |
| "logits/rejected": -3.0289571285247803, |
| "logps/chosen": -262.9287109375, |
| "logps/rejected": -265.87652587890625, |
| "loss": 0.4688, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.8515796661376953, |
| "rewards/margins": 1.2062747478485107, |
| "rewards/rejected": -3.057854413986206, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.7576550641193405, |
| "grad_norm": 9.433390617370605, |
| "learning_rate": 4.852132949489663e-06, |
| "logits/chosen": -3.0275864601135254, |
| "logits/rejected": -3.0345730781555176, |
| "logps/chosen": -275.7232971191406, |
| "logps/rejected": -304.7025451660156, |
| "loss": 0.5553, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.858757734298706, |
| "rewards/margins": 1.2982639074325562, |
| "rewards/rejected": -3.1570212841033936, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.7589636220884585, |
| "grad_norm": 8.930874824523926, |
| "learning_rate": 4.825961790107302e-06, |
| "logits/chosen": -3.054530620574951, |
| "logits/rejected": -3.0698142051696777, |
| "logps/chosen": -266.8838195800781, |
| "logps/rejected": -265.035400390625, |
| "loss": 0.6378, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -1.7914512157440186, |
| "rewards/margins": 0.9923449754714966, |
| "rewards/rejected": -2.7837963104248047, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.7602721800575766, |
| "grad_norm": 6.339506149291992, |
| "learning_rate": 4.799790630724941e-06, |
| "logits/chosen": -2.9580090045928955, |
| "logits/rejected": -3.0641238689422607, |
| "logps/chosen": -343.8387756347656, |
| "logps/rejected": -306.41131591796875, |
| "loss": 0.5801, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.693976640701294, |
| "rewards/margins": 1.276618480682373, |
| "rewards/rejected": -2.970594882965088, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.7615807380266946, |
| "grad_norm": 7.329442977905273, |
| "learning_rate": 4.773619471342581e-06, |
| "logits/chosen": -2.9426028728485107, |
| "logits/rejected": -3.050346851348877, |
| "logps/chosen": -329.4857177734375, |
| "logps/rejected": -312.78326416015625, |
| "loss": 0.4338, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.4311538934707642, |
| "rewards/margins": 1.5746768712997437, |
| "rewards/rejected": -3.005831003189087, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.7628892959958126, |
| "grad_norm": 9.978039741516113, |
| "learning_rate": 4.74744831196022e-06, |
| "logits/chosen": -2.9623208045959473, |
| "logits/rejected": -3.00553035736084, |
| "logps/chosen": -272.9161682128906, |
| "logps/rejected": -275.42938232421875, |
| "loss": 0.5062, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.415977954864502, |
| "rewards/margins": 1.6318323612213135, |
| "rewards/rejected": -3.0478103160858154, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.7641978539649307, |
| "grad_norm": 9.443857192993164, |
| "learning_rate": 4.7212771525778596e-06, |
| "logits/chosen": -3.0159335136413574, |
| "logits/rejected": -3.0996041297912598, |
| "logps/chosen": -368.2574768066406, |
| "logps/rejected": -347.77099609375, |
| "loss": 0.5326, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.184187650680542, |
| "rewards/margins": 1.3835378885269165, |
| "rewards/rejected": -2.56772518157959, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.7655064119340487, |
| "grad_norm": 6.5323004722595215, |
| "learning_rate": 4.695105993195499e-06, |
| "logits/chosen": -2.9812138080596924, |
| "logits/rejected": -3.046619415283203, |
| "logps/chosen": -291.3216247558594, |
| "logps/rejected": -335.71905517578125, |
| "loss": 0.4556, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1183115243911743, |
| "rewards/margins": 1.6687357425689697, |
| "rewards/rejected": -2.7870471477508545, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.7668149699031667, |
| "grad_norm": 7.712170124053955, |
| "learning_rate": 4.668934833813139e-06, |
| "logits/chosen": -2.9688682556152344, |
| "logits/rejected": -2.981501817703247, |
| "logps/chosen": -286.4724426269531, |
| "logps/rejected": -257.21624755859375, |
| "loss": 0.6616, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -1.215268850326538, |
| "rewards/margins": 1.1600792407989502, |
| "rewards/rejected": -2.3753480911254883, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.7681235278722848, |
| "grad_norm": 6.923589706420898, |
| "learning_rate": 4.642763674430777e-06, |
| "logits/chosen": -2.9614179134368896, |
| "logits/rejected": -2.9387574195861816, |
| "logps/chosen": -311.4593811035156, |
| "logps/rejected": -321.67041015625, |
| "loss": 0.5725, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.0969353914260864, |
| "rewards/margins": 1.274997591972351, |
| "rewards/rejected": -2.3719329833984375, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.7694320858414028, |
| "grad_norm": 5.904993534088135, |
| "learning_rate": 4.616592515048417e-06, |
| "logits/chosen": -3.0357108116149902, |
| "logits/rejected": -2.909348487854004, |
| "logps/chosen": -281.0157775878906, |
| "logps/rejected": -285.02655029296875, |
| "loss": 0.5521, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.1831865310668945, |
| "rewards/margins": 1.486000895500183, |
| "rewards/rejected": -2.669187307357788, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.7707406438105208, |
| "grad_norm": 6.901908874511719, |
| "learning_rate": 4.5904213556660565e-06, |
| "logits/chosen": -2.8211007118225098, |
| "logits/rejected": -2.88071870803833, |
| "logps/chosen": -271.14752197265625, |
| "logps/rejected": -254.5480194091797, |
| "loss": 0.5307, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.3990057706832886, |
| "rewards/margins": 1.23850417137146, |
| "rewards/rejected": -2.637510061264038, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.7720492017796389, |
| "grad_norm": 6.1756978034973145, |
| "learning_rate": 4.564250196283696e-06, |
| "logits/chosen": -2.891523838043213, |
| "logits/rejected": -2.9697718620300293, |
| "logps/chosen": -291.3207702636719, |
| "logps/rejected": -290.206787109375, |
| "loss": 0.5143, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.3040390014648438, |
| "rewards/margins": 1.3442087173461914, |
| "rewards/rejected": -2.6482479572296143, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.7733577597487569, |
| "grad_norm": 8.50953483581543, |
| "learning_rate": 4.538079036901335e-06, |
| "logits/chosen": -2.8105571269989014, |
| "logits/rejected": -2.8145947456359863, |
| "logps/chosen": -294.50286865234375, |
| "logps/rejected": -265.7152404785156, |
| "loss": 0.5944, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.2462295293807983, |
| "rewards/margins": 1.3464641571044922, |
| "rewards/rejected": -2.59269380569458, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.7746663177178749, |
| "grad_norm": 8.569794654846191, |
| "learning_rate": 4.511907877518974e-06, |
| "logits/chosen": -2.9901108741760254, |
| "logits/rejected": -2.969892978668213, |
| "logps/chosen": -309.3415832519531, |
| "logps/rejected": -323.17120361328125, |
| "loss": 0.5608, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.099224328994751, |
| "rewards/margins": 1.1640383005142212, |
| "rewards/rejected": -2.2632622718811035, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.775974875686993, |
| "grad_norm": 6.8540143966674805, |
| "learning_rate": 4.485736718136614e-06, |
| "logits/chosen": -2.9594240188598633, |
| "logits/rejected": -3.0211386680603027, |
| "logps/chosen": -293.110107421875, |
| "logps/rejected": -330.95574951171875, |
| "loss": 0.5497, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.0265569686889648, |
| "rewards/margins": 1.503934621810913, |
| "rewards/rejected": -2.530491352081299, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.777283433656111, |
| "grad_norm": 6.731704235076904, |
| "learning_rate": 4.459565558754253e-06, |
| "logits/chosen": -2.9934017658233643, |
| "logits/rejected": -3.0968306064605713, |
| "logps/chosen": -261.086181640625, |
| "logps/rejected": -260.58489990234375, |
| "loss": 0.5805, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.1074072122573853, |
| "rewards/margins": 1.2867536544799805, |
| "rewards/rejected": -2.3941612243652344, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.778591991625229, |
| "grad_norm": 6.898943901062012, |
| "learning_rate": 4.433394399371893e-06, |
| "logits/chosen": -2.861293315887451, |
| "logits/rejected": -2.998875141143799, |
| "logps/chosen": -283.81689453125, |
| "logps/rejected": -248.768798828125, |
| "loss": 0.4341, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.1969306468963623, |
| "rewards/margins": 1.3554353713989258, |
| "rewards/rejected": -2.552365779876709, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.7799005495943471, |
| "grad_norm": 7.095617294311523, |
| "learning_rate": 4.407223239989532e-06, |
| "logits/chosen": -2.9728245735168457, |
| "logits/rejected": -3.051290273666382, |
| "logps/chosen": -310.69158935546875, |
| "logps/rejected": -277.09735107421875, |
| "loss": 0.5192, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.1353857517242432, |
| "rewards/margins": 1.3678357601165771, |
| "rewards/rejected": -2.5032215118408203, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.7812091075634651, |
| "grad_norm": 8.380833625793457, |
| "learning_rate": 4.381052080607171e-06, |
| "logits/chosen": -2.9203391075134277, |
| "logits/rejected": -2.90177321434021, |
| "logps/chosen": -270.91522216796875, |
| "logps/rejected": -295.3896179199219, |
| "loss": 0.5453, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.3055708408355713, |
| "rewards/margins": 1.305996298789978, |
| "rewards/rejected": -2.6115670204162598, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.7825176655325831, |
| "grad_norm": 5.626356601715088, |
| "learning_rate": 4.35488092122481e-06, |
| "logits/chosen": -3.0040221214294434, |
| "logits/rejected": -2.9230525493621826, |
| "logps/chosen": -313.6124267578125, |
| "logps/rejected": -320.3941955566406, |
| "loss": 0.5426, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.2137765884399414, |
| "rewards/margins": 1.1921061277389526, |
| "rewards/rejected": -2.4058830738067627, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.7838262235017012, |
| "grad_norm": 7.655642986297607, |
| "learning_rate": 4.32870976184245e-06, |
| "logits/chosen": -2.935147762298584, |
| "logits/rejected": -2.9671669006347656, |
| "logps/chosen": -319.5419006347656, |
| "logps/rejected": -267.1727600097656, |
| "loss": 0.4952, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.858169436454773, |
| "rewards/margins": 1.29342520236969, |
| "rewards/rejected": -2.151594400405884, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.7851347814708192, |
| "grad_norm": 3.3605234622955322, |
| "learning_rate": 4.30253860246009e-06, |
| "logits/chosen": -2.91528058052063, |
| "logits/rejected": -2.91469407081604, |
| "logps/chosen": -303.7840270996094, |
| "logps/rejected": -274.5299377441406, |
| "loss": 0.4449, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.7587825655937195, |
| "rewards/margins": 1.5326234102249146, |
| "rewards/rejected": -2.2914059162139893, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.7864433394399372, |
| "grad_norm": 7.770859718322754, |
| "learning_rate": 4.276367443077729e-06, |
| "logits/chosen": -2.953711748123169, |
| "logits/rejected": -2.9724724292755127, |
| "logps/chosen": -318.14874267578125, |
| "logps/rejected": -262.6700744628906, |
| "loss": 0.6077, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -1.4992079734802246, |
| "rewards/margins": 0.9534958004951477, |
| "rewards/rejected": -2.4527037143707275, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.7877518974090553, |
| "grad_norm": 6.161098003387451, |
| "learning_rate": 4.250196283695368e-06, |
| "logits/chosen": -2.969233751296997, |
| "logits/rejected": -3.020934581756592, |
| "logps/chosen": -288.4843444824219, |
| "logps/rejected": -319.7886657714844, |
| "loss": 0.5435, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.9259554743766785, |
| "rewards/margins": 1.0652966499328613, |
| "rewards/rejected": -1.9912521839141846, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.7890604553781733, |
| "grad_norm": 7.810305118560791, |
| "learning_rate": 4.224025124313007e-06, |
| "logits/chosen": -2.9788691997528076, |
| "logits/rejected": -3.0794777870178223, |
| "logps/chosen": -277.36798095703125, |
| "logps/rejected": -287.5350646972656, |
| "loss": 0.5337, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.8270560503005981, |
| "rewards/margins": 1.2853838205337524, |
| "rewards/rejected": -2.1124396324157715, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.7903690133472913, |
| "grad_norm": 5.958588600158691, |
| "learning_rate": 4.1978539649306465e-06, |
| "logits/chosen": -2.976804494857788, |
| "logits/rejected": -3.009307384490967, |
| "logps/chosen": -244.9545440673828, |
| "logps/rejected": -222.89016723632812, |
| "loss": 0.5216, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.0923906564712524, |
| "rewards/margins": 1.161705732345581, |
| "rewards/rejected": -2.254096508026123, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.7916775713164094, |
| "grad_norm": 5.89898157119751, |
| "learning_rate": 4.171682805548286e-06, |
| "logits/chosen": -2.9162070751190186, |
| "logits/rejected": -2.9571709632873535, |
| "logps/chosen": -259.6465759277344, |
| "logps/rejected": -298.78155517578125, |
| "loss": 0.4204, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.9322217702865601, |
| "rewards/margins": 1.6251089572906494, |
| "rewards/rejected": -2.55733060836792, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.7929861292855274, |
| "grad_norm": 6.151041030883789, |
| "learning_rate": 4.145511646165926e-06, |
| "logits/chosen": -2.9598276615142822, |
| "logits/rejected": -3.0385665893554688, |
| "logps/chosen": -338.153076171875, |
| "logps/rejected": -307.7692565917969, |
| "loss": 0.5783, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.3869907855987549, |
| "rewards/margins": 1.015866994857788, |
| "rewards/rejected": -2.402858018875122, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.7942946872546454, |
| "grad_norm": 5.3807291984558105, |
| "learning_rate": 4.119340486783565e-06, |
| "logits/chosen": -3.0343217849731445, |
| "logits/rejected": -3.071495771408081, |
| "logps/chosen": -235.5870361328125, |
| "logps/rejected": -239.8998565673828, |
| "loss": 0.5386, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.5046651363372803, |
| "rewards/margins": 1.3207638263702393, |
| "rewards/rejected": -2.8254292011260986, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.7956032452237635, |
| "grad_norm": 7.987537860870361, |
| "learning_rate": 4.093169327401204e-06, |
| "logits/chosen": -2.9960923194885254, |
| "logits/rejected": -3.0310943126678467, |
| "logps/chosen": -300.4662780761719, |
| "logps/rejected": -280.687744140625, |
| "loss": 0.555, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.4423376321792603, |
| "rewards/margins": 1.2718671560287476, |
| "rewards/rejected": -2.714204788208008, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.7969118031928815, |
| "grad_norm": 7.680141448974609, |
| "learning_rate": 4.066998168018843e-06, |
| "logits/chosen": -2.984675645828247, |
| "logits/rejected": -2.976343870162964, |
| "logps/chosen": -283.5673828125, |
| "logps/rejected": -267.16009521484375, |
| "loss": 0.5549, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.6328814029693604, |
| "rewards/margins": 1.3648440837860107, |
| "rewards/rejected": -2.997725486755371, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.7982203611619995, |
| "grad_norm": 6.773283004760742, |
| "learning_rate": 4.0408270086364835e-06, |
| "logits/chosen": -2.978335380554199, |
| "logits/rejected": -3.0025360584259033, |
| "logps/chosen": -329.9278564453125, |
| "logps/rejected": -262.8837890625, |
| "loss": 0.5166, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.4951245784759521, |
| "rewards/margins": 1.3376452922821045, |
| "rewards/rejected": -2.8327701091766357, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.7995289191311175, |
| "grad_norm": 8.977509498596191, |
| "learning_rate": 4.014655849254122e-06, |
| "logits/chosen": -3.0407679080963135, |
| "logits/rejected": -3.0819222927093506, |
| "logps/chosen": -307.4114990234375, |
| "logps/rejected": -280.94561767578125, |
| "loss": 0.5877, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.498591423034668, |
| "rewards/margins": 1.1345003843307495, |
| "rewards/rejected": -2.633091926574707, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.8008374771002356, |
| "grad_norm": 8.170519828796387, |
| "learning_rate": 3.988484689871762e-06, |
| "logits/chosen": -3.032698631286621, |
| "logits/rejected": -3.0359036922454834, |
| "logps/chosen": -236.0676727294922, |
| "logps/rejected": -247.2089080810547, |
| "loss": 0.6471, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.6221116781234741, |
| "rewards/margins": 1.1387770175933838, |
| "rewards/rejected": -2.7608890533447266, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.8021460350693536, |
| "grad_norm": 9.307779312133789, |
| "learning_rate": 3.962313530489401e-06, |
| "logits/chosen": -2.9368393421173096, |
| "logits/rejected": -2.983513355255127, |
| "logps/chosen": -348.62835693359375, |
| "logps/rejected": -318.1614685058594, |
| "loss": 0.4591, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.301815390586853, |
| "rewards/margins": 1.657279372215271, |
| "rewards/rejected": -2.959094762802124, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.8034545930384716, |
| "grad_norm": 7.106703758239746, |
| "learning_rate": 3.93614237110704e-06, |
| "logits/chosen": -2.9417312145233154, |
| "logits/rejected": -2.972074031829834, |
| "logps/chosen": -292.5469970703125, |
| "logps/rejected": -245.0569305419922, |
| "loss": 0.5217, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.4775664806365967, |
| "rewards/margins": 1.3939807415008545, |
| "rewards/rejected": -2.871546983718872, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.8047631510075897, |
| "grad_norm": 10.409375190734863, |
| "learning_rate": 3.90997121172468e-06, |
| "logits/chosen": -2.9813740253448486, |
| "logits/rejected": -2.9496383666992188, |
| "logps/chosen": -306.1288146972656, |
| "logps/rejected": -290.25262451171875, |
| "loss": 0.5501, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.700331449508667, |
| "rewards/margins": 1.1744781732559204, |
| "rewards/rejected": -2.874809980392456, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.8060717089767077, |
| "grad_norm": 5.405389308929443, |
| "learning_rate": 3.883800052342319e-06, |
| "logits/chosen": -2.9348511695861816, |
| "logits/rejected": -3.074619770050049, |
| "logps/chosen": -305.00555419921875, |
| "logps/rejected": -289.0831604003906, |
| "loss": 0.4904, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.4995722770690918, |
| "rewards/margins": 1.304827332496643, |
| "rewards/rejected": -2.8043994903564453, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.8073802669458257, |
| "grad_norm": 7.511078834533691, |
| "learning_rate": 3.857628892959959e-06, |
| "logits/chosen": -2.7204461097717285, |
| "logits/rejected": -2.8985595703125, |
| "logps/chosen": -276.2601318359375, |
| "logps/rejected": -236.35366821289062, |
| "loss": 0.4225, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.4592502117156982, |
| "rewards/margins": 1.428612470626831, |
| "rewards/rejected": -2.8878626823425293, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.8086888249149438, |
| "grad_norm": 7.300355434417725, |
| "learning_rate": 3.831457733577597e-06, |
| "logits/chosen": -2.975726366043091, |
| "logits/rejected": -2.9710006713867188, |
| "logps/chosen": -293.2878112792969, |
| "logps/rejected": -317.8365783691406, |
| "loss": 0.469, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.6435035467147827, |
| "rewards/margins": 1.1404824256896973, |
| "rewards/rejected": -2.7839858531951904, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.8099973828840618, |
| "grad_norm": 6.010566711425781, |
| "learning_rate": 3.8052865741952373e-06, |
| "logits/chosen": -3.0353527069091797, |
| "logits/rejected": -3.0600810050964355, |
| "logps/chosen": -293.95343017578125, |
| "logps/rejected": -270.61004638671875, |
| "loss": 0.5388, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.5871026515960693, |
| "rewards/margins": 1.1377742290496826, |
| "rewards/rejected": -2.724876880645752, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.8113059408531798, |
| "grad_norm": 6.270742893218994, |
| "learning_rate": 3.7791154148128765e-06, |
| "logits/chosen": -2.8717355728149414, |
| "logits/rejected": -2.875217914581299, |
| "logps/chosen": -311.8862609863281, |
| "logps/rejected": -329.5162048339844, |
| "loss": 0.4579, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.5746370553970337, |
| "rewards/margins": 1.3537368774414062, |
| "rewards/rejected": -2.9283738136291504, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.8126144988222979, |
| "grad_norm": 9.443546295166016, |
| "learning_rate": 3.7529442554305157e-06, |
| "logits/chosen": -2.9529592990875244, |
| "logits/rejected": -2.9881930351257324, |
| "logps/chosen": -282.13140869140625, |
| "logps/rejected": -282.1882019042969, |
| "loss": 0.5176, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.5689818859100342, |
| "rewards/margins": 1.4024773836135864, |
| "rewards/rejected": -2.971459150314331, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.8139230567914159, |
| "grad_norm": 5.381568908691406, |
| "learning_rate": 3.7267730960481554e-06, |
| "logits/chosen": -2.9820408821105957, |
| "logits/rejected": -2.9640519618988037, |
| "logps/chosen": -291.12969970703125, |
| "logps/rejected": -277.7159118652344, |
| "loss": 0.3889, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.7315528392791748, |
| "rewards/margins": 1.7429358959197998, |
| "rewards/rejected": -3.4744884967803955, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.8152316147605339, |
| "grad_norm": 5.273469924926758, |
| "learning_rate": 3.7006019366657946e-06, |
| "logits/chosen": -2.9469833374023438, |
| "logits/rejected": -3.015949010848999, |
| "logps/chosen": -285.083740234375, |
| "logps/rejected": -265.0752868652344, |
| "loss": 0.4623, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.6700470447540283, |
| "rewards/margins": 1.4441113471984863, |
| "rewards/rejected": -3.1141583919525146, |
| "step": 3115 |
| }, |
| { |
| "epoch": 0.816540172729652, |
| "grad_norm": 6.050240993499756, |
| "learning_rate": 3.6744307772834342e-06, |
| "logits/chosen": -2.932485580444336, |
| "logits/rejected": -2.9992053508758545, |
| "logps/chosen": -276.6758117675781, |
| "logps/rejected": -296.73333740234375, |
| "loss": 0.4808, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.8731861114501953, |
| "rewards/margins": 1.7045581340789795, |
| "rewards/rejected": -3.577744722366333, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.81784873069877, |
| "grad_norm": 7.137781620025635, |
| "learning_rate": 3.648259617901073e-06, |
| "logits/chosen": -2.9615964889526367, |
| "logits/rejected": -2.880619525909424, |
| "logps/chosen": -268.4992370605469, |
| "logps/rejected": -295.4488830566406, |
| "loss": 0.5318, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.7875770330429077, |
| "rewards/margins": 1.2315229177474976, |
| "rewards/rejected": -3.019099712371826, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.819157288667888, |
| "grad_norm": 7.0195770263671875, |
| "learning_rate": 3.6220884585187127e-06, |
| "logits/chosen": -3.0774314403533936, |
| "logits/rejected": -3.045903205871582, |
| "logps/chosen": -302.8963928222656, |
| "logps/rejected": -289.8187561035156, |
| "loss": 0.4234, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.661341667175293, |
| "rewards/margins": 1.1674296855926514, |
| "rewards/rejected": -2.8287715911865234, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.8204658466370061, |
| "grad_norm": 7.271994590759277, |
| "learning_rate": 3.5959172991363523e-06, |
| "logits/chosen": -2.9475741386413574, |
| "logits/rejected": -3.019735813140869, |
| "logps/chosen": -316.7098693847656, |
| "logps/rejected": -328.3517761230469, |
| "loss": 0.5028, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.7327712774276733, |
| "rewards/margins": 1.505361795425415, |
| "rewards/rejected": -3.238132953643799, |
| "step": 3135 |
| }, |
| { |
| "epoch": 0.821774404606124, |
| "grad_norm": 6.570908546447754, |
| "learning_rate": 3.569746139753991e-06, |
| "logits/chosen": -2.9615020751953125, |
| "logits/rejected": -3.003160238265991, |
| "logps/chosen": -299.74273681640625, |
| "logps/rejected": -291.9687805175781, |
| "loss": 0.3802, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.4846771955490112, |
| "rewards/margins": 2.014193058013916, |
| "rewards/rejected": -3.498870372772217, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.823082962575242, |
| "grad_norm": 3.8049066066741943, |
| "learning_rate": 3.5435749803716308e-06, |
| "logits/chosen": -3.0279877185821533, |
| "logits/rejected": -3.0599331855773926, |
| "logps/chosen": -309.2520446777344, |
| "logps/rejected": -310.35345458984375, |
| "loss": 0.5532, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.453507661819458, |
| "rewards/margins": 1.6446282863616943, |
| "rewards/rejected": -3.0981359481811523, |
| "step": 3145 |
| }, |
| { |
| "epoch": 0.8243915205443602, |
| "grad_norm": 9.395318031311035, |
| "learning_rate": 3.5174038209892704e-06, |
| "logits/chosen": -2.9431824684143066, |
| "logits/rejected": -3.0078811645507812, |
| "logps/chosen": -323.8676452636719, |
| "logps/rejected": -280.33453369140625, |
| "loss": 0.7375, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.7898139953613281, |
| "rewards/margins": 0.9050365686416626, |
| "rewards/rejected": -2.694850444793701, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.8257000785134782, |
| "grad_norm": 6.055134296417236, |
| "learning_rate": 3.491232661606909e-06, |
| "logits/chosen": -2.9507486820220947, |
| "logits/rejected": -3.0724995136260986, |
| "logps/chosen": -315.60272216796875, |
| "logps/rejected": -324.2132263183594, |
| "loss": 0.5575, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.733232855796814, |
| "rewards/margins": 1.3413363695144653, |
| "rewards/rejected": -3.0745689868927, |
| "step": 3155 |
| }, |
| { |
| "epoch": 0.8270086364825961, |
| "grad_norm": 6.183289051055908, |
| "learning_rate": 3.465061502224549e-06, |
| "logits/chosen": -2.8759653568267822, |
| "logits/rejected": -2.8962912559509277, |
| "logps/chosen": -282.1387023925781, |
| "logps/rejected": -255.8695526123047, |
| "loss": 0.5389, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.533860445022583, |
| "rewards/margins": 1.3426233530044556, |
| "rewards/rejected": -2.876483678817749, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.8283171944517143, |
| "grad_norm": 5.198134422302246, |
| "learning_rate": 3.438890342842188e-06, |
| "logits/chosen": -2.9086239337921143, |
| "logits/rejected": -2.9607226848602295, |
| "logps/chosen": -277.3299560546875, |
| "logps/rejected": -286.2669982910156, |
| "loss": 0.4774, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.849469780921936, |
| "rewards/margins": 1.3763679265975952, |
| "rewards/rejected": -3.2258377075195312, |
| "step": 3165 |
| }, |
| { |
| "epoch": 0.8296257524208323, |
| "grad_norm": 9.19830322265625, |
| "learning_rate": 3.4127191834598277e-06, |
| "logits/chosen": -3.077191114425659, |
| "logits/rejected": -2.9784042835235596, |
| "logps/chosen": -299.14813232421875, |
| "logps/rejected": -330.7813415527344, |
| "loss": 0.46, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.447545051574707, |
| "rewards/margins": 1.3955647945404053, |
| "rewards/rejected": -2.8431098461151123, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.8309343103899502, |
| "grad_norm": 6.804299354553223, |
| "learning_rate": 3.386548024077467e-06, |
| "logits/chosen": -2.9858601093292236, |
| "logits/rejected": -2.996666431427002, |
| "logps/chosen": -314.24005126953125, |
| "logps/rejected": -302.2778015136719, |
| "loss": 0.5011, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.7465810775756836, |
| "rewards/margins": 1.3649499416351318, |
| "rewards/rejected": -3.1115307807922363, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.8322428683590684, |
| "grad_norm": 5.180750846862793, |
| "learning_rate": 3.360376864695106e-06, |
| "logits/chosen": -2.973330020904541, |
| "logits/rejected": -3.0549609661102295, |
| "logps/chosen": -384.7432556152344, |
| "logps/rejected": -316.04833984375, |
| "loss": 0.5156, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.293901801109314, |
| "rewards/margins": 1.534147024154663, |
| "rewards/rejected": -2.8280491828918457, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.8335514263281864, |
| "grad_norm": 7.629887104034424, |
| "learning_rate": 3.3342057053127458e-06, |
| "logits/chosen": -3.01334285736084, |
| "logits/rejected": -2.983562469482422, |
| "logps/chosen": -322.8381652832031, |
| "logps/rejected": -297.83392333984375, |
| "loss": 0.527, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.3352491855621338, |
| "rewards/margins": 1.2987505197525024, |
| "rewards/rejected": -2.6339995861053467, |
| "step": 3185 |
| }, |
| { |
| "epoch": 0.8348599842973043, |
| "grad_norm": 7.089648246765137, |
| "learning_rate": 3.3080345459303846e-06, |
| "logits/chosen": -2.8630924224853516, |
| "logits/rejected": -2.8851840496063232, |
| "logps/chosen": -304.0502014160156, |
| "logps/rejected": -321.80255126953125, |
| "loss": 0.586, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.5677211284637451, |
| "rewards/margins": 1.1580266952514648, |
| "rewards/rejected": -2.72574782371521, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.8361685422664225, |
| "grad_norm": 6.6564860343933105, |
| "learning_rate": 3.281863386548024e-06, |
| "logits/chosen": -2.8017802238464355, |
| "logits/rejected": -2.8708529472351074, |
| "logps/chosen": -257.3683166503906, |
| "logps/rejected": -240.1475830078125, |
| "loss": 0.5368, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.4609788656234741, |
| "rewards/margins": 1.4196208715438843, |
| "rewards/rejected": -2.8805994987487793, |
| "step": 3195 |
| }, |
| { |
| "epoch": 0.8374771002355405, |
| "grad_norm": 7.388543605804443, |
| "learning_rate": 3.255692227165664e-06, |
| "logits/chosen": -2.9424808025360107, |
| "logits/rejected": -3.038038492202759, |
| "logps/chosen": -301.71630859375, |
| "logps/rejected": -311.08245849609375, |
| "loss": 0.4928, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.0317409038543701, |
| "rewards/margins": 1.6584066152572632, |
| "rewards/rejected": -2.6901473999023438, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8374771002355405, |
| "eval_logits/chosen": -3.004171848297119, |
| "eval_logits/rejected": -3.0209107398986816, |
| "eval_logps/chosen": -295.4100036621094, |
| "eval_logps/rejected": -290.0252990722656, |
| "eval_loss": 0.49639275670051575, |
| "eval_rewards/accuracies": 0.7595000267028809, |
| "eval_rewards/chosen": -1.2176601886749268, |
| "eval_rewards/margins": 1.3914103507995605, |
| "eval_rewards/rejected": -2.6090707778930664, |
| "eval_runtime": 763.7592, |
| "eval_samples_per_second": 2.619, |
| "eval_steps_per_second": 0.327, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8387856582046584, |
| "grad_norm": 7.057781219482422, |
| "learning_rate": 3.2295210677833035e-06, |
| "logits/chosen": -2.9613780975341797, |
| "logits/rejected": -2.9954867362976074, |
| "logps/chosen": -296.17083740234375, |
| "logps/rejected": -265.18072509765625, |
| "loss": 0.3919, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.425917387008667, |
| "rewards/margins": 1.6352697610855103, |
| "rewards/rejected": -3.0611870288848877, |
| "step": 3205 |
| }, |
| { |
| "epoch": 0.8400942161737766, |
| "grad_norm": 7.055730819702148, |
| "learning_rate": 3.2033499084009423e-06, |
| "logits/chosen": -2.848184585571289, |
| "logits/rejected": -2.9945926666259766, |
| "logps/chosen": -307.47662353515625, |
| "logps/rejected": -315.60101318359375, |
| "loss": 0.3921, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.9309557676315308, |
| "rewards/margins": 1.7479255199432373, |
| "rewards/rejected": -2.6788811683654785, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.8414027741428945, |
| "grad_norm": 9.12265682220459, |
| "learning_rate": 3.177178749018582e-06, |
| "logits/chosen": -3.0333364009857178, |
| "logits/rejected": -3.006922483444214, |
| "logps/chosen": -251.0235595703125, |
| "logps/rejected": -257.051025390625, |
| "loss": 0.6059, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.6401920318603516, |
| "rewards/margins": 1.1688909530639648, |
| "rewards/rejected": -2.8090832233428955, |
| "step": 3215 |
| }, |
| { |
| "epoch": 0.8427113321120125, |
| "grad_norm": 6.198644638061523, |
| "learning_rate": 3.151007589636221e-06, |
| "logits/chosen": -2.97039532661438, |
| "logits/rejected": -3.030413866043091, |
| "logps/chosen": -256.3705749511719, |
| "logps/rejected": -229.65072631835938, |
| "loss": 0.463, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.372048258781433, |
| "rewards/margins": 1.6272108554840088, |
| "rewards/rejected": -2.9992592334747314, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.8440198900811305, |
| "grad_norm": 6.126826763153076, |
| "learning_rate": 3.1248364302538604e-06, |
| "logits/chosen": -2.9617550373077393, |
| "logits/rejected": -3.0138838291168213, |
| "logps/chosen": -273.10321044921875, |
| "logps/rejected": -262.1094055175781, |
| "loss": 0.4266, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.1408138275146484, |
| "rewards/margins": 1.4347550868988037, |
| "rewards/rejected": -2.575568675994873, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.8453284480502486, |
| "grad_norm": 9.463789939880371, |
| "learning_rate": 3.0986652708715e-06, |
| "logits/chosen": -2.980067014694214, |
| "logits/rejected": -3.017759084701538, |
| "logps/chosen": -259.8030700683594, |
| "logps/rejected": -261.61419677734375, |
| "loss": 0.5268, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.146183967590332, |
| "rewards/margins": 1.2039825916290283, |
| "rewards/rejected": -2.3501665592193604, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.8466370060193666, |
| "grad_norm": 6.915153503417969, |
| "learning_rate": 3.0724941114891392e-06, |
| "logits/chosen": -3.0340006351470947, |
| "logits/rejected": -2.977094888687134, |
| "logps/chosen": -311.4469909667969, |
| "logps/rejected": -297.03460693359375, |
| "loss": 0.5994, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.4913580417633057, |
| "rewards/margins": 1.2569396495819092, |
| "rewards/rejected": -2.7482974529266357, |
| "step": 3235 |
| }, |
| { |
| "epoch": 0.8479455639884846, |
| "grad_norm": 7.158982276916504, |
| "learning_rate": 3.0463229521067784e-06, |
| "logits/chosen": -2.955960273742676, |
| "logits/rejected": -3.0300660133361816, |
| "logps/chosen": -270.5257873535156, |
| "logps/rejected": -304.80340576171875, |
| "loss": 0.4053, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.099576711654663, |
| "rewards/margins": 1.5984654426574707, |
| "rewards/rejected": -2.698042392730713, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.8492541219576027, |
| "grad_norm": 5.484156131744385, |
| "learning_rate": 3.0201517927244177e-06, |
| "logits/chosen": -2.967376232147217, |
| "logits/rejected": -3.0268256664276123, |
| "logps/chosen": -282.62249755859375, |
| "logps/rejected": -254.0297088623047, |
| "loss": 0.4376, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.060046672821045, |
| "rewards/margins": 1.762258529663086, |
| "rewards/rejected": -2.822305202484131, |
| "step": 3245 |
| }, |
| { |
| "epoch": 0.8505626799267207, |
| "grad_norm": 7.077023506164551, |
| "learning_rate": 2.9939806333420573e-06, |
| "logits/chosen": -2.9605937004089355, |
| "logits/rejected": -3.015320301055908, |
| "logps/chosen": -313.13873291015625, |
| "logps/rejected": -316.87750244140625, |
| "loss": 0.5465, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.3930941820144653, |
| "rewards/margins": 1.102479100227356, |
| "rewards/rejected": -2.4955732822418213, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.8518712378958387, |
| "grad_norm": 7.836671829223633, |
| "learning_rate": 2.967809473959697e-06, |
| "logits/chosen": -3.0352444648742676, |
| "logits/rejected": -3.102151393890381, |
| "logps/chosen": -307.4310302734375, |
| "logps/rejected": -253.9599609375, |
| "loss": 0.5673, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.2398912906646729, |
| "rewards/margins": 1.4965870380401611, |
| "rewards/rejected": -2.736478328704834, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.8531797958649568, |
| "grad_norm": 7.435060977935791, |
| "learning_rate": 2.9416383145773357e-06, |
| "logits/chosen": -2.9178872108459473, |
| "logits/rejected": -3.0403456687927246, |
| "logps/chosen": -319.54052734375, |
| "logps/rejected": -270.2267761230469, |
| "loss": 0.533, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.2776553630828857, |
| "rewards/margins": 1.4546865224838257, |
| "rewards/rejected": -2.732342004776001, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.8544883538340748, |
| "grad_norm": 6.412667274475098, |
| "learning_rate": 2.9154671551949754e-06, |
| "logits/chosen": -3.0104992389678955, |
| "logits/rejected": -3.0792577266693115, |
| "logps/chosen": -278.3431091308594, |
| "logps/rejected": -322.3707580566406, |
| "loss": 0.4199, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.0318635702133179, |
| "rewards/margins": 1.6050819158554077, |
| "rewards/rejected": -2.6369454860687256, |
| "step": 3265 |
| }, |
| { |
| "epoch": 0.8557969118031928, |
| "grad_norm": 8.721508979797363, |
| "learning_rate": 2.889295995812615e-06, |
| "logits/chosen": -3.0468242168426514, |
| "logits/rejected": -3.033629894256592, |
| "logps/chosen": -371.76409912109375, |
| "logps/rejected": -343.81085205078125, |
| "loss": 0.4677, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.381502628326416, |
| "rewards/margins": 1.6215152740478516, |
| "rewards/rejected": -3.0030181407928467, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.857105469772311, |
| "grad_norm": 2.7983927726745605, |
| "learning_rate": 2.863124836430254e-06, |
| "logits/chosen": -2.9721121788024902, |
| "logits/rejected": -3.047161102294922, |
| "logps/chosen": -325.7302551269531, |
| "logps/rejected": -356.19097900390625, |
| "loss": 0.3731, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.456455945968628, |
| "rewards/margins": 1.8465648889541626, |
| "rewards/rejected": -3.303021192550659, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.8584140277414289, |
| "grad_norm": 4.696521282196045, |
| "learning_rate": 2.8369536770478935e-06, |
| "logits/chosen": -2.99096417427063, |
| "logits/rejected": -3.0428411960601807, |
| "logps/chosen": -293.07366943359375, |
| "logps/rejected": -274.5947570800781, |
| "loss": 0.3984, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.1890863180160522, |
| "rewards/margins": 2.1313068866729736, |
| "rewards/rejected": -3.3203930854797363, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.8597225857105469, |
| "grad_norm": 7.623385429382324, |
| "learning_rate": 2.8107825176655327e-06, |
| "logits/chosen": -2.9804584980010986, |
| "logits/rejected": -2.954515218734741, |
| "logps/chosen": -273.37713623046875, |
| "logps/rejected": -262.2710266113281, |
| "loss": 0.4606, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.275095820426941, |
| "rewards/margins": 1.7871692180633545, |
| "rewards/rejected": -3.062264919281006, |
| "step": 3285 |
| }, |
| { |
| "epoch": 0.861031143679665, |
| "grad_norm": 8.984639167785645, |
| "learning_rate": 2.7846113582831723e-06, |
| "logits/chosen": -2.820075511932373, |
| "logits/rejected": -2.9113526344299316, |
| "logps/chosen": -308.37994384765625, |
| "logps/rejected": -295.8663024902344, |
| "loss": 0.4429, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.3035976886749268, |
| "rewards/margins": 1.941608190536499, |
| "rewards/rejected": -3.2452056407928467, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.862339701648783, |
| "grad_norm": 6.510889530181885, |
| "learning_rate": 2.7584401989008115e-06, |
| "logits/chosen": -3.090820074081421, |
| "logits/rejected": -3.129952907562256, |
| "logps/chosen": -280.90997314453125, |
| "logps/rejected": -277.00030517578125, |
| "loss": 0.4266, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.054640293121338, |
| "rewards/margins": 1.758040428161621, |
| "rewards/rejected": -2.812680721282959, |
| "step": 3295 |
| }, |
| { |
| "epoch": 0.863648259617901, |
| "grad_norm": 9.088248252868652, |
| "learning_rate": 2.7322690395184508e-06, |
| "logits/chosen": -2.9733877182006836, |
| "logits/rejected": -3.027345895767212, |
| "logps/chosen": -287.5892639160156, |
| "logps/rejected": -267.43878173828125, |
| "loss": 0.4333, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.3994263410568237, |
| "rewards/margins": 2.2078773975372314, |
| "rewards/rejected": -3.607304096221924, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.8649568175870191, |
| "grad_norm": 5.8171491622924805, |
| "learning_rate": 2.7060978801360904e-06, |
| "logits/chosen": -2.925607204437256, |
| "logits/rejected": -3.0442306995391846, |
| "logps/chosen": -332.95458984375, |
| "logps/rejected": -302.6817321777344, |
| "loss": 0.4657, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.267094373703003, |
| "rewards/margins": 1.7528711557388306, |
| "rewards/rejected": -3.019965648651123, |
| "step": 3305 |
| }, |
| { |
| "epoch": 0.8662653755561371, |
| "grad_norm": 5.004279136657715, |
| "learning_rate": 2.679926720753729e-06, |
| "logits/chosen": -2.962287425994873, |
| "logits/rejected": -2.9575023651123047, |
| "logps/chosen": -266.0353088378906, |
| "logps/rejected": -269.9578552246094, |
| "loss": 0.5247, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.3235856294631958, |
| "rewards/margins": 1.6374728679656982, |
| "rewards/rejected": -2.9610581398010254, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.8675739335252551, |
| "grad_norm": 7.791489601135254, |
| "learning_rate": 2.653755561371369e-06, |
| "logits/chosen": -2.993173122406006, |
| "logits/rejected": -3.0355095863342285, |
| "logps/chosen": -230.2362060546875, |
| "logps/rejected": -252.8092803955078, |
| "loss": 0.4766, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.343328833580017, |
| "rewards/margins": 1.6592735052108765, |
| "rewards/rejected": -3.0026021003723145, |
| "step": 3315 |
| }, |
| { |
| "epoch": 0.8688824914943732, |
| "grad_norm": 6.9868998527526855, |
| "learning_rate": 2.6275844019890085e-06, |
| "logits/chosen": -2.90317964553833, |
| "logits/rejected": -3.0146877765655518, |
| "logps/chosen": -232.83566284179688, |
| "logps/rejected": -263.18365478515625, |
| "loss": 0.4633, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.3262803554534912, |
| "rewards/margins": 1.7742512226104736, |
| "rewards/rejected": -3.100531578063965, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.8701910494634912, |
| "grad_norm": 10.964067459106445, |
| "learning_rate": 2.601413242606648e-06, |
| "logits/chosen": -2.9223103523254395, |
| "logits/rejected": -2.892368793487549, |
| "logps/chosen": -276.1986389160156, |
| "logps/rejected": -297.419921875, |
| "loss": 0.8137, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -2.0564141273498535, |
| "rewards/margins": 0.9367812871932983, |
| "rewards/rejected": -2.9931955337524414, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.8714996074326092, |
| "grad_norm": 9.714390754699707, |
| "learning_rate": 2.575242083224287e-06, |
| "logits/chosen": -2.947138547897339, |
| "logits/rejected": -3.092214822769165, |
| "logps/chosen": -357.3052062988281, |
| "logps/rejected": -319.5323791503906, |
| "loss": 0.4528, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.2855061292648315, |
| "rewards/margins": 2.0207602977752686, |
| "rewards/rejected": -3.3062667846679688, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.8728081654017273, |
| "grad_norm": 7.657586574554443, |
| "learning_rate": 2.5490709238419266e-06, |
| "logits/chosen": -3.013709545135498, |
| "logits/rejected": -3.0418596267700195, |
| "logps/chosen": -328.94537353515625, |
| "logps/rejected": -283.6571044921875, |
| "loss": 0.525, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1225342750549316, |
| "rewards/margins": 1.795938491821289, |
| "rewards/rejected": -2.9184727668762207, |
| "step": 3335 |
| }, |
| { |
| "epoch": 0.8741167233708453, |
| "grad_norm": 6.528357028961182, |
| "learning_rate": 2.5228997644595658e-06, |
| "logits/chosen": -2.9861557483673096, |
| "logits/rejected": -3.135845184326172, |
| "logps/chosen": -243.2984619140625, |
| "logps/rejected": -237.9906005859375, |
| "loss": 0.6573, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.59354829788208, |
| "rewards/margins": 1.3717825412750244, |
| "rewards/rejected": -2.9653310775756836, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.8754252813399633, |
| "grad_norm": 8.699584007263184, |
| "learning_rate": 2.4967286050772054e-06, |
| "logits/chosen": -2.866548538208008, |
| "logits/rejected": -3.0045650005340576, |
| "logps/chosen": -319.767822265625, |
| "logps/rejected": -277.4715881347656, |
| "loss": 0.506, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.2535655498504639, |
| "rewards/margins": 1.7059853076934814, |
| "rewards/rejected": -2.9595508575439453, |
| "step": 3345 |
| }, |
| { |
| "epoch": 0.8767338393090814, |
| "grad_norm": 6.741850852966309, |
| "learning_rate": 2.4705574456948446e-06, |
| "logits/chosen": -3.012995719909668, |
| "logits/rejected": -3.0462710857391357, |
| "logps/chosen": -335.1493225097656, |
| "logps/rejected": -292.0329895019531, |
| "loss": 0.4305, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.2484524250030518, |
| "rewards/margins": 1.6712596416473389, |
| "rewards/rejected": -2.9197120666503906, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.8780423972781994, |
| "grad_norm": 6.849013805389404, |
| "learning_rate": 2.444386286312484e-06, |
| "logits/chosen": -3.019461154937744, |
| "logits/rejected": -3.0690269470214844, |
| "logps/chosen": -328.6025695800781, |
| "logps/rejected": -283.9448547363281, |
| "loss": 0.4625, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.1433143615722656, |
| "rewards/margins": 1.3490070104599, |
| "rewards/rejected": -2.492321491241455, |
| "step": 3355 |
| }, |
| { |
| "epoch": 0.8793509552473174, |
| "grad_norm": 3.412297248840332, |
| "learning_rate": 2.418215126930123e-06, |
| "logits/chosen": -2.9395012855529785, |
| "logits/rejected": -2.933227300643921, |
| "logps/chosen": -276.8416748046875, |
| "logps/rejected": -305.0869445800781, |
| "loss": 0.3263, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.8936668634414673, |
| "rewards/margins": 2.017547845840454, |
| "rewards/rejected": -2.911214590072632, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.8806595132164355, |
| "grad_norm": 7.709452152252197, |
| "learning_rate": 2.3920439675477623e-06, |
| "logits/chosen": -3.047055721282959, |
| "logits/rejected": -3.083347797393799, |
| "logps/chosen": -260.4131164550781, |
| "logps/rejected": -314.905517578125, |
| "loss": 0.5143, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1395976543426514, |
| "rewards/margins": 1.6013320684432983, |
| "rewards/rejected": -2.740929365158081, |
| "step": 3365 |
| }, |
| { |
| "epoch": 0.8819680711855535, |
| "grad_norm": 7.616606712341309, |
| "learning_rate": 2.365872808165402e-06, |
| "logits/chosen": -3.0480358600616455, |
| "logits/rejected": -3.052595853805542, |
| "logps/chosen": -265.3002624511719, |
| "logps/rejected": -294.23712158203125, |
| "loss": 0.5369, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.218409538269043, |
| "rewards/margins": 1.6266998052597046, |
| "rewards/rejected": -2.845109224319458, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.8832766291546715, |
| "grad_norm": 5.287708759307861, |
| "learning_rate": 2.339701648783041e-06, |
| "logits/chosen": -2.958054304122925, |
| "logits/rejected": -3.068449020385742, |
| "logps/chosen": -268.5967712402344, |
| "logps/rejected": -294.0408630371094, |
| "loss": 0.5142, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.2594164609909058, |
| "rewards/margins": 1.3391399383544922, |
| "rewards/rejected": -2.5985562801361084, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.8845851871237895, |
| "grad_norm": 9.27445125579834, |
| "learning_rate": 2.313530489400681e-06, |
| "logits/chosen": -3.025479793548584, |
| "logits/rejected": -3.016753673553467, |
| "logps/chosen": -314.6586608886719, |
| "logps/rejected": -325.9570007324219, |
| "loss": 0.6003, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.6857277154922485, |
| "rewards/margins": 1.2919389009475708, |
| "rewards/rejected": -2.9776668548583984, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.8858937450929076, |
| "grad_norm": 7.8645243644714355, |
| "learning_rate": 2.28735933001832e-06, |
| "logits/chosen": -2.9674994945526123, |
| "logits/rejected": -3.07468843460083, |
| "logps/chosen": -277.78887939453125, |
| "logps/rejected": -275.15899658203125, |
| "loss": 0.5845, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.2671358585357666, |
| "rewards/margins": 1.3928353786468506, |
| "rewards/rejected": -2.659970998764038, |
| "step": 3385 |
| }, |
| { |
| "epoch": 0.8872023030620256, |
| "grad_norm": 7.450486660003662, |
| "learning_rate": 2.2611881706359592e-06, |
| "logits/chosen": -2.8782074451446533, |
| "logits/rejected": -2.9619007110595703, |
| "logps/chosen": -273.5116271972656, |
| "logps/rejected": -306.6257629394531, |
| "loss": 0.5905, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.3433325290679932, |
| "rewards/margins": 1.4469295740127563, |
| "rewards/rejected": -2.790262222290039, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.8885108610311436, |
| "grad_norm": 5.97955322265625, |
| "learning_rate": 2.235017011253599e-06, |
| "logits/chosen": -2.9929678440093994, |
| "logits/rejected": -3.1036667823791504, |
| "logps/chosen": -338.9819030761719, |
| "logps/rejected": -304.00146484375, |
| "loss": 0.4292, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.3203136920928955, |
| "rewards/margins": 1.7166574001312256, |
| "rewards/rejected": -3.0369715690612793, |
| "step": 3395 |
| }, |
| { |
| "epoch": 0.8898194190002617, |
| "grad_norm": 6.3199310302734375, |
| "learning_rate": 2.208845851871238e-06, |
| "logits/chosen": -2.9735336303710938, |
| "logits/rejected": -2.9744515419006348, |
| "logps/chosen": -296.5519104003906, |
| "logps/rejected": -322.54473876953125, |
| "loss": 0.4602, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.639952301979065, |
| "rewards/margins": 1.7713512182235718, |
| "rewards/rejected": -3.4113032817840576, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.8911279769693797, |
| "grad_norm": 7.517760276794434, |
| "learning_rate": 2.1826746924888777e-06, |
| "logits/chosen": -3.0101208686828613, |
| "logits/rejected": -3.0078091621398926, |
| "logps/chosen": -309.48797607421875, |
| "logps/rejected": -322.8365783691406, |
| "loss": 0.4754, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.2661359310150146, |
| "rewards/margins": 1.4626022577285767, |
| "rewards/rejected": -2.7287380695343018, |
| "step": 3405 |
| }, |
| { |
| "epoch": 0.8924365349384977, |
| "grad_norm": 7.3932037353515625, |
| "learning_rate": 2.156503533106517e-06, |
| "logits/chosen": -2.8534436225891113, |
| "logits/rejected": -2.9222302436828613, |
| "logps/chosen": -314.8663330078125, |
| "logps/rejected": -323.77252197265625, |
| "loss": 0.6032, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.5054900646209717, |
| "rewards/margins": 1.3880724906921387, |
| "rewards/rejected": -2.8935627937316895, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.8937450929076158, |
| "grad_norm": 5.375305652618408, |
| "learning_rate": 2.130332373724156e-06, |
| "logits/chosen": -3.040001630783081, |
| "logits/rejected": -3.1081435680389404, |
| "logps/chosen": -280.5118408203125, |
| "logps/rejected": -292.08270263671875, |
| "loss": 0.3811, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.2469137907028198, |
| "rewards/margins": 1.6295486688613892, |
| "rewards/rejected": -2.87646222114563, |
| "step": 3415 |
| }, |
| { |
| "epoch": 0.8950536508767338, |
| "grad_norm": 9.578230857849121, |
| "learning_rate": 2.1041612143417954e-06, |
| "logits/chosen": -2.9527251720428467, |
| "logits/rejected": -3.0252342224121094, |
| "logps/chosen": -309.64801025390625, |
| "logps/rejected": -319.7813415527344, |
| "loss": 0.5587, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.5965030193328857, |
| "rewards/margins": 1.1897649765014648, |
| "rewards/rejected": -2.7862679958343506, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.8963622088458518, |
| "grad_norm": 5.5111002922058105, |
| "learning_rate": 2.0779900549594346e-06, |
| "logits/chosen": -2.8854727745056152, |
| "logits/rejected": -2.8697562217712402, |
| "logps/chosen": -303.0765380859375, |
| "logps/rejected": -296.5349426269531, |
| "loss": 0.4015, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.1718518733978271, |
| "rewards/margins": 1.609633207321167, |
| "rewards/rejected": -2.781485080718994, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.8976707668149699, |
| "grad_norm": 8.389058113098145, |
| "learning_rate": 2.0518188955770743e-06, |
| "logits/chosen": -3.0345101356506348, |
| "logits/rejected": -3.033433198928833, |
| "logps/chosen": -293.57318115234375, |
| "logps/rejected": -266.28497314453125, |
| "loss": 0.4547, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.349166750907898, |
| "rewards/margins": 1.5450422763824463, |
| "rewards/rejected": -2.894209146499634, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.8989793247840879, |
| "grad_norm": 7.349452495574951, |
| "learning_rate": 2.0256477361947135e-06, |
| "logits/chosen": -3.0003952980041504, |
| "logits/rejected": -3.0549144744873047, |
| "logps/chosen": -286.839111328125, |
| "logps/rejected": -297.920166015625, |
| "loss": 0.345, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.3413785696029663, |
| "rewards/margins": 2.113399028778076, |
| "rewards/rejected": -3.454777479171753, |
| "step": 3435 |
| }, |
| { |
| "epoch": 0.9002878827532059, |
| "grad_norm": 8.613375663757324, |
| "learning_rate": 1.999476576812353e-06, |
| "logits/chosen": -2.9884345531463623, |
| "logits/rejected": -3.0320065021514893, |
| "logps/chosen": -296.2138366699219, |
| "logps/rejected": -302.5164489746094, |
| "loss": 0.5699, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.6587432622909546, |
| "rewards/margins": 1.523429274559021, |
| "rewards/rejected": -3.1821722984313965, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.901596440722324, |
| "grad_norm": 5.533935070037842, |
| "learning_rate": 1.9733054174299923e-06, |
| "logits/chosen": -2.9528141021728516, |
| "logits/rejected": -2.9987263679504395, |
| "logps/chosen": -291.6798095703125, |
| "logps/rejected": -290.3052062988281, |
| "loss": 0.4147, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.4009933471679688, |
| "rewards/margins": 1.6354844570159912, |
| "rewards/rejected": -3.03647780418396, |
| "step": 3445 |
| }, |
| { |
| "epoch": 0.902904998691442, |
| "grad_norm": 9.2689847946167, |
| "learning_rate": 1.9471342580476316e-06, |
| "logits/chosen": -2.980245351791382, |
| "logits/rejected": -3.004823684692383, |
| "logps/chosen": -322.0609436035156, |
| "logps/rejected": -259.2235107421875, |
| "loss": 0.6756, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -2.1433141231536865, |
| "rewards/margins": 1.234440803527832, |
| "rewards/rejected": -3.3777554035186768, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.90421355666056, |
| "grad_norm": 10.556782722473145, |
| "learning_rate": 1.920963098665271e-06, |
| "logits/chosen": -3.0391366481781006, |
| "logits/rejected": -3.0794732570648193, |
| "logps/chosen": -274.0779724121094, |
| "logps/rejected": -266.954345703125, |
| "loss": 0.6314, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.3744312524795532, |
| "rewards/margins": 1.1151368618011475, |
| "rewards/rejected": -2.4895682334899902, |
| "step": 3455 |
| }, |
| { |
| "epoch": 0.9055221146296781, |
| "grad_norm": 9.761650085449219, |
| "learning_rate": 1.8947919392829104e-06, |
| "logits/chosen": -2.862259864807129, |
| "logits/rejected": -2.933992624282837, |
| "logps/chosen": -306.671142578125, |
| "logps/rejected": -282.5141296386719, |
| "loss": 0.624, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.6207879781723022, |
| "rewards/margins": 1.4092611074447632, |
| "rewards/rejected": -3.0300488471984863, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.9068306725987961, |
| "grad_norm": 5.749721527099609, |
| "learning_rate": 1.8686207799005498e-06, |
| "logits/chosen": -2.98447847366333, |
| "logits/rejected": -3.064558744430542, |
| "logps/chosen": -324.86114501953125, |
| "logps/rejected": -278.88970947265625, |
| "loss": 0.3623, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.3682321310043335, |
| "rewards/margins": 1.78044855594635, |
| "rewards/rejected": -3.1486804485321045, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.9081392305679141, |
| "grad_norm": 8.051587104797363, |
| "learning_rate": 1.842449620518189e-06, |
| "logits/chosen": -2.9344372749328613, |
| "logits/rejected": -3.005045175552368, |
| "logps/chosen": -310.6846618652344, |
| "logps/rejected": -305.3709411621094, |
| "loss": 0.6313, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.7746692895889282, |
| "rewards/margins": 1.2048325538635254, |
| "rewards/rejected": -2.979501962661743, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.9094477885370322, |
| "grad_norm": 5.901284217834473, |
| "learning_rate": 1.8162784611358283e-06, |
| "logits/chosen": -2.8661179542541504, |
| "logits/rejected": -2.9399428367614746, |
| "logps/chosen": -287.2570495605469, |
| "logps/rejected": -279.65753173828125, |
| "loss": 0.4383, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.3972651958465576, |
| "rewards/margins": 1.6384613513946533, |
| "rewards/rejected": -3.035726547241211, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.9107563465061502, |
| "grad_norm": 11.674195289611816, |
| "learning_rate": 1.790107301753468e-06, |
| "logits/chosen": -2.847024440765381, |
| "logits/rejected": -2.9919915199279785, |
| "logps/chosen": -363.022216796875, |
| "logps/rejected": -360.4650573730469, |
| "loss": 0.5059, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.6431465148925781, |
| "rewards/margins": 1.526166558265686, |
| "rewards/rejected": -3.1693129539489746, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.9120649044752682, |
| "grad_norm": 6.637493133544922, |
| "learning_rate": 1.7639361423711071e-06, |
| "logits/chosen": -2.994047164916992, |
| "logits/rejected": -3.043743371963501, |
| "logps/chosen": -257.53936767578125, |
| "logps/rejected": -255.7579345703125, |
| "loss": 0.5947, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.4301214218139648, |
| "rewards/margins": 1.0894838571548462, |
| "rewards/rejected": -2.5196051597595215, |
| "step": 3485 |
| }, |
| { |
| "epoch": 0.9133734624443863, |
| "grad_norm": 7.341172695159912, |
| "learning_rate": 1.7377649829887466e-06, |
| "logits/chosen": -2.9541261196136475, |
| "logits/rejected": -3.027951240539551, |
| "logps/chosen": -282.1636657714844, |
| "logps/rejected": -250.5120849609375, |
| "loss": 0.4473, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.2336229085922241, |
| "rewards/margins": 1.685388207435608, |
| "rewards/rejected": -2.919011354446411, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.9146820204135043, |
| "grad_norm": 5.005151271820068, |
| "learning_rate": 1.7115938236063858e-06, |
| "logits/chosen": -3.0645532608032227, |
| "logits/rejected": -3.0290141105651855, |
| "logps/chosen": -294.0321960449219, |
| "logps/rejected": -294.2281799316406, |
| "loss": 0.4609, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.3800275325775146, |
| "rewards/margins": 1.3488258123397827, |
| "rewards/rejected": -2.728853225708008, |
| "step": 3495 |
| }, |
| { |
| "epoch": 0.9159905783826223, |
| "grad_norm": 8.08800983428955, |
| "learning_rate": 1.6854226642240254e-06, |
| "logits/chosen": -3.062406063079834, |
| "logits/rejected": -3.1058766841888428, |
| "logps/chosen": -282.1437683105469, |
| "logps/rejected": -263.7963562011719, |
| "loss": 0.4244, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.356877088546753, |
| "rewards/margins": 1.5647704601287842, |
| "rewards/rejected": -2.921647548675537, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.9172991363517404, |
| "grad_norm": 7.3398518562316895, |
| "learning_rate": 1.6592515048416647e-06, |
| "logits/chosen": -3.0162220001220703, |
| "logits/rejected": -3.0233070850372314, |
| "logps/chosen": -289.8272705078125, |
| "logps/rejected": -308.98638916015625, |
| "loss": 0.4882, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.6926946640014648, |
| "rewards/margins": 1.4996764659881592, |
| "rewards/rejected": -3.192371129989624, |
| "step": 3505 |
| }, |
| { |
| "epoch": 0.9186076943208584, |
| "grad_norm": 6.586010456085205, |
| "learning_rate": 1.6330803454593039e-06, |
| "logits/chosen": -2.993813991546631, |
| "logits/rejected": -3.0858685970306396, |
| "logps/chosen": -304.7975769042969, |
| "logps/rejected": -281.93597412109375, |
| "loss": 0.4022, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.3266664743423462, |
| "rewards/margins": 1.8491216897964478, |
| "rewards/rejected": -3.175788164138794, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.9199162522899764, |
| "grad_norm": 5.34058141708374, |
| "learning_rate": 1.6069091860769433e-06, |
| "logits/chosen": -2.8806405067443848, |
| "logits/rejected": -3.0084080696105957, |
| "logps/chosen": -341.41949462890625, |
| "logps/rejected": -265.82244873046875, |
| "loss": 0.4117, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.258302927017212, |
| "rewards/margins": 1.7906951904296875, |
| "rewards/rejected": -3.0489978790283203, |
| "step": 3515 |
| }, |
| { |
| "epoch": 0.9212248102590945, |
| "grad_norm": 9.21333122253418, |
| "learning_rate": 1.5807380266945827e-06, |
| "logits/chosen": -3.036750078201294, |
| "logits/rejected": -3.039182186126709, |
| "logps/chosen": -262.3890686035156, |
| "logps/rejected": -282.87860107421875, |
| "loss": 0.6021, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.4617328643798828, |
| "rewards/margins": 1.3960540294647217, |
| "rewards/rejected": -2.8577866554260254, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.9225333682282125, |
| "grad_norm": 7.1087212562561035, |
| "learning_rate": 1.5545668673122222e-06, |
| "logits/chosen": -2.8940954208374023, |
| "logits/rejected": -2.9105896949768066, |
| "logps/chosen": -310.8441162109375, |
| "logps/rejected": -275.7667541503906, |
| "loss": 0.3913, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.1332801580429077, |
| "rewards/margins": 1.8010799884796143, |
| "rewards/rejected": -2.9343602657318115, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.9238419261973305, |
| "grad_norm": 8.39911937713623, |
| "learning_rate": 1.5283957079298614e-06, |
| "logits/chosen": -2.99474835395813, |
| "logits/rejected": -3.022717237472534, |
| "logps/chosen": -317.1966857910156, |
| "logps/rejected": -348.03912353515625, |
| "loss": 0.5075, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.2938311100006104, |
| "rewards/margins": 1.4564930200576782, |
| "rewards/rejected": -2.75032377243042, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.9251504841664486, |
| "grad_norm": 5.257741928100586, |
| "learning_rate": 1.5022245485475006e-06, |
| "logits/chosen": -3.0030980110168457, |
| "logits/rejected": -3.0215423107147217, |
| "logps/chosen": -259.13018798828125, |
| "logps/rejected": -269.13836669921875, |
| "loss": 0.6352, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.4522452354431152, |
| "rewards/margins": 1.424629807472229, |
| "rewards/rejected": -2.8768749237060547, |
| "step": 3535 |
| }, |
| { |
| "epoch": 0.9264590421355666, |
| "grad_norm": 7.180831432342529, |
| "learning_rate": 1.4760533891651402e-06, |
| "logits/chosen": -2.985456705093384, |
| "logits/rejected": -3.057675838470459, |
| "logps/chosen": -301.18206787109375, |
| "logps/rejected": -271.5340881347656, |
| "loss": 0.4618, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.2516207695007324, |
| "rewards/margins": 1.704184889793396, |
| "rewards/rejected": -2.955806016921997, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.9277676001046846, |
| "grad_norm": 5.434940814971924, |
| "learning_rate": 1.4498822297827795e-06, |
| "logits/chosen": -3.045609951019287, |
| "logits/rejected": -3.0566811561584473, |
| "logps/chosen": -275.1131896972656, |
| "logps/rejected": -282.7756042480469, |
| "loss": 0.4518, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.5488427877426147, |
| "rewards/margins": 1.5551543235778809, |
| "rewards/rejected": -3.103996992111206, |
| "step": 3545 |
| }, |
| { |
| "epoch": 0.9290761580738026, |
| "grad_norm": 4.331151008605957, |
| "learning_rate": 1.4237110704004189e-06, |
| "logits/chosen": -2.9415290355682373, |
| "logits/rejected": -3.057833671569824, |
| "logps/chosen": -267.46356201171875, |
| "logps/rejected": -269.8301696777344, |
| "loss": 0.4672, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.1876513957977295, |
| "rewards/margins": 1.8198295831680298, |
| "rewards/rejected": -3.0074806213378906, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.9303847160429207, |
| "grad_norm": 5.2976813316345215, |
| "learning_rate": 1.3975399110180581e-06, |
| "logits/chosen": -2.95489764213562, |
| "logits/rejected": -2.976945400238037, |
| "logps/chosen": -272.0807189941406, |
| "logps/rejected": -283.7767333984375, |
| "loss": 0.5264, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.4996970891952515, |
| "rewards/margins": 1.2823091745376587, |
| "rewards/rejected": -2.7820065021514893, |
| "step": 3555 |
| }, |
| { |
| "epoch": 0.9316932740120387, |
| "grad_norm": 7.1556196212768555, |
| "learning_rate": 1.3713687516356975e-06, |
| "logits/chosen": -3.040180206298828, |
| "logits/rejected": -3.0060112476348877, |
| "logps/chosen": -238.9356689453125, |
| "logps/rejected": -238.90194702148438, |
| "loss": 0.4935, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.2994797229766846, |
| "rewards/margins": 1.4473230838775635, |
| "rewards/rejected": -2.746802568435669, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.9330018319811567, |
| "grad_norm": 5.718963623046875, |
| "learning_rate": 1.345197592253337e-06, |
| "logits/chosen": -2.975170135498047, |
| "logits/rejected": -2.9809365272521973, |
| "logps/chosen": -262.6956787109375, |
| "logps/rejected": -278.5767822265625, |
| "loss": 0.3928, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.0227237939834595, |
| "rewards/margins": 1.6377525329589844, |
| "rewards/rejected": -2.6604764461517334, |
| "step": 3565 |
| }, |
| { |
| "epoch": 0.9343103899502748, |
| "grad_norm": 8.762356758117676, |
| "learning_rate": 1.3190264328709762e-06, |
| "logits/chosen": -2.9716594219207764, |
| "logits/rejected": -3.007230520248413, |
| "logps/chosen": -306.72589111328125, |
| "logps/rejected": -303.2646179199219, |
| "loss": 0.5313, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.334179401397705, |
| "rewards/margins": 1.589410662651062, |
| "rewards/rejected": -2.9235899448394775, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.9356189479193928, |
| "grad_norm": 9.781453132629395, |
| "learning_rate": 1.2928552734886158e-06, |
| "logits/chosen": -2.9384396076202393, |
| "logits/rejected": -3.0885565280914307, |
| "logps/chosen": -293.48382568359375, |
| "logps/rejected": -283.4349670410156, |
| "loss": 0.6182, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.8570163249969482, |
| "rewards/margins": 1.306370496749878, |
| "rewards/rejected": -3.163386821746826, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.9369275058885108, |
| "grad_norm": 7.885117053985596, |
| "learning_rate": 1.266684114106255e-06, |
| "logits/chosen": -2.9314887523651123, |
| "logits/rejected": -3.0194966793060303, |
| "logps/chosen": -291.6297912597656, |
| "logps/rejected": -270.6680603027344, |
| "loss": 0.5607, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.429483413696289, |
| "rewards/margins": 1.4721983671188354, |
| "rewards/rejected": -2.901681661605835, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.9382360638576289, |
| "grad_norm": 9.249140739440918, |
| "learning_rate": 1.2405129547238943e-06, |
| "logits/chosen": -2.9748375415802, |
| "logits/rejected": -3.042620897293091, |
| "logps/chosen": -338.7610778808594, |
| "logps/rejected": -306.6709899902344, |
| "loss": 0.5191, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.091373324394226, |
| "rewards/margins": 1.554322600364685, |
| "rewards/rejected": -2.645695924758911, |
| "step": 3585 |
| }, |
| { |
| "epoch": 0.9395446218267469, |
| "grad_norm": 7.393261432647705, |
| "learning_rate": 1.2143417953415337e-06, |
| "logits/chosen": -3.0142085552215576, |
| "logits/rejected": -3.020599603652954, |
| "logps/chosen": -287.16290283203125, |
| "logps/rejected": -286.26214599609375, |
| "loss": 0.4996, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.6127214431762695, |
| "rewards/margins": 1.7448869943618774, |
| "rewards/rejected": -3.3576083183288574, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.9408531797958649, |
| "grad_norm": 10.740133285522461, |
| "learning_rate": 1.1881706359591731e-06, |
| "logits/chosen": -3.0725698471069336, |
| "logits/rejected": -3.0806450843811035, |
| "logps/chosen": -308.4686279296875, |
| "logps/rejected": -282.9092712402344, |
| "loss": 0.5655, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.3958985805511475, |
| "rewards/margins": 1.4924328327178955, |
| "rewards/rejected": -2.888331651687622, |
| "step": 3595 |
| }, |
| { |
| "epoch": 0.942161737764983, |
| "grad_norm": 5.223201274871826, |
| "learning_rate": 1.1619994765768126e-06, |
| "logits/chosen": -2.972827672958374, |
| "logits/rejected": -2.9871978759765625, |
| "logps/chosen": -298.4678955078125, |
| "logps/rejected": -260.34326171875, |
| "loss": 0.511, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.3953911066055298, |
| "rewards/margins": 1.678969144821167, |
| "rewards/rejected": -3.0743606090545654, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.942161737764983, |
| "eval_logits/chosen": -3.021827459335327, |
| "eval_logits/rejected": -3.038386583328247, |
| "eval_logps/chosen": -297.16925048828125, |
| "eval_logps/rejected": -292.7310485839844, |
| "eval_loss": 0.4937221109867096, |
| "eval_rewards/accuracies": 0.7570000290870667, |
| "eval_rewards/chosen": -1.393584966659546, |
| "eval_rewards/margins": 1.4860624074935913, |
| "eval_rewards/rejected": -2.8796472549438477, |
| "eval_runtime": 762.7775, |
| "eval_samples_per_second": 2.622, |
| "eval_steps_per_second": 0.328, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.943470295734101, |
| "grad_norm": 9.47269058227539, |
| "learning_rate": 1.1358283171944518e-06, |
| "logits/chosen": -3.0384669303894043, |
| "logits/rejected": -3.0766215324401855, |
| "logps/chosen": -334.4903869628906, |
| "logps/rejected": -284.4979553222656, |
| "loss": 0.3992, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.2259769439697266, |
| "rewards/margins": 1.6997013092041016, |
| "rewards/rejected": -2.925678253173828, |
| "step": 3605 |
| }, |
| { |
| "epoch": 0.944778853703219, |
| "grad_norm": 5.090957164764404, |
| "learning_rate": 1.1096571578120912e-06, |
| "logits/chosen": -3.0102407932281494, |
| "logits/rejected": -3.027517318725586, |
| "logps/chosen": -324.6131286621094, |
| "logps/rejected": -330.85943603515625, |
| "loss": 0.4659, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.461601972579956, |
| "rewards/margins": 1.6823663711547852, |
| "rewards/rejected": -3.1439685821533203, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.9460874116723371, |
| "grad_norm": 6.4353485107421875, |
| "learning_rate": 1.0834859984297304e-06, |
| "logits/chosen": -3.0820727348327637, |
| "logits/rejected": -3.122159481048584, |
| "logps/chosen": -327.16680908203125, |
| "logps/rejected": -324.119140625, |
| "loss": 0.5123, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.2964187860488892, |
| "rewards/margins": 1.2985150814056396, |
| "rewards/rejected": -2.5949337482452393, |
| "step": 3615 |
| }, |
| { |
| "epoch": 0.9473959696414551, |
| "grad_norm": 10.36352252960205, |
| "learning_rate": 1.0573148390473699e-06, |
| "logits/chosen": -2.820996046066284, |
| "logits/rejected": -2.93684720993042, |
| "logps/chosen": -257.29962158203125, |
| "logps/rejected": -299.9443664550781, |
| "loss": 0.469, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.3600294589996338, |
| "rewards/margins": 1.814413070678711, |
| "rewards/rejected": -3.1744422912597656, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.9487045276105731, |
| "grad_norm": 7.558009147644043, |
| "learning_rate": 1.0311436796650093e-06, |
| "logits/chosen": -3.0475263595581055, |
| "logits/rejected": -3.0534262657165527, |
| "logps/chosen": -290.08026123046875, |
| "logps/rejected": -308.2751159667969, |
| "loss": 0.3977, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.28037428855896, |
| "rewards/margins": 1.833155870437622, |
| "rewards/rejected": -3.1135306358337402, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.9500130855796912, |
| "grad_norm": 4.711981773376465, |
| "learning_rate": 1.0049725202826487e-06, |
| "logits/chosen": -2.9161949157714844, |
| "logits/rejected": -3.079786777496338, |
| "logps/chosen": -344.1520080566406, |
| "logps/rejected": -316.8307189941406, |
| "loss": 0.494, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.3298174142837524, |
| "rewards/margins": 1.6952533721923828, |
| "rewards/rejected": -3.0250706672668457, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.9513216435488092, |
| "grad_norm": 11.685251235961914, |
| "learning_rate": 9.78801360900288e-07, |
| "logits/chosen": -3.006196975708008, |
| "logits/rejected": -3.021721601486206, |
| "logps/chosen": -306.54205322265625, |
| "logps/rejected": -291.6183776855469, |
| "loss": 0.5325, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.6336702108383179, |
| "rewards/margins": 1.6822528839111328, |
| "rewards/rejected": -3.3159232139587402, |
| "step": 3635 |
| }, |
| { |
| "epoch": 0.9526302015179272, |
| "grad_norm": 7.035577297210693, |
| "learning_rate": 9.526302015179273e-07, |
| "logits/chosen": -3.0137877464294434, |
| "logits/rejected": -3.0155646800994873, |
| "logps/chosen": -280.07354736328125, |
| "logps/rejected": -279.3880310058594, |
| "loss": 0.4263, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.2827895879745483, |
| "rewards/margins": 1.7637962102890015, |
| "rewards/rejected": -3.04658579826355, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.9539387594870453, |
| "grad_norm": 6.1892852783203125, |
| "learning_rate": 9.264590421355667e-07, |
| "logits/chosen": -3.0102293491363525, |
| "logits/rejected": -3.043808937072754, |
| "logps/chosen": -282.82232666015625, |
| "logps/rejected": -295.6250915527344, |
| "loss": 0.5537, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.4279065132141113, |
| "rewards/margins": 1.4096248149871826, |
| "rewards/rejected": -2.837531566619873, |
| "step": 3645 |
| }, |
| { |
| "epoch": 0.9552473174561633, |
| "grad_norm": 7.0120344161987305, |
| "learning_rate": 9.00287882753206e-07, |
| "logits/chosen": -2.8158748149871826, |
| "logits/rejected": -3.0110549926757812, |
| "logps/chosen": -322.323974609375, |
| "logps/rejected": -296.1746520996094, |
| "loss": 0.5607, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.5179932117462158, |
| "rewards/margins": 1.3574055433273315, |
| "rewards/rejected": -2.875398635864258, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.9565558754252813, |
| "grad_norm": 6.899664878845215, |
| "learning_rate": 8.741167233708454e-07, |
| "logits/chosen": -2.86753249168396, |
| "logits/rejected": -3.003220558166504, |
| "logps/chosen": -288.17242431640625, |
| "logps/rejected": -287.6828918457031, |
| "loss": 0.5421, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.4167354106903076, |
| "rewards/margins": 1.3222873210906982, |
| "rewards/rejected": -2.739022731781006, |
| "step": 3655 |
| }, |
| { |
| "epoch": 0.9578644333943994, |
| "grad_norm": 7.400771617889404, |
| "learning_rate": 8.479455639884849e-07, |
| "logits/chosen": -2.8408029079437256, |
| "logits/rejected": -3.037991762161255, |
| "logps/chosen": -289.79815673828125, |
| "logps/rejected": -266.6900939941406, |
| "loss": 0.4928, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.5893640518188477, |
| "rewards/margins": 1.4343297481536865, |
| "rewards/rejected": -3.023693799972534, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.9591729913635174, |
| "grad_norm": 7.997771263122559, |
| "learning_rate": 8.217744046061241e-07, |
| "logits/chosen": -3.0197484493255615, |
| "logits/rejected": -2.9930665493011475, |
| "logps/chosen": -272.28826904296875, |
| "logps/rejected": -285.2413024902344, |
| "loss": 0.5595, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.3076242208480835, |
| "rewards/margins": 1.3322126865386963, |
| "rewards/rejected": -2.6398367881774902, |
| "step": 3665 |
| }, |
| { |
| "epoch": 0.9604815493326354, |
| "grad_norm": 8.226212501525879, |
| "learning_rate": 7.956032452237634e-07, |
| "logits/chosen": -3.0422444343566895, |
| "logits/rejected": -3.020461320877075, |
| "logps/chosen": -278.33685302734375, |
| "logps/rejected": -284.96014404296875, |
| "loss": 0.4872, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.0055291652679443, |
| "rewards/margins": 1.4944764375686646, |
| "rewards/rejected": -2.5000054836273193, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.9617901073017535, |
| "grad_norm": 4.699522018432617, |
| "learning_rate": 7.694320858414028e-07, |
| "logits/chosen": -3.019260883331299, |
| "logits/rejected": -3.022517442703247, |
| "logps/chosen": -271.10174560546875, |
| "logps/rejected": -268.4229431152344, |
| "loss": 0.4087, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.1063932180404663, |
| "rewards/margins": 1.805824875831604, |
| "rewards/rejected": -2.9122183322906494, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.9630986652708715, |
| "grad_norm": 7.629035949707031, |
| "learning_rate": 7.432609264590422e-07, |
| "logits/chosen": -3.0566253662109375, |
| "logits/rejected": -3.069202423095703, |
| "logps/chosen": -271.2048034667969, |
| "logps/rejected": -276.36383056640625, |
| "loss": 0.4807, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.0480868816375732, |
| "rewards/margins": 1.5648603439331055, |
| "rewards/rejected": -2.6129469871520996, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.9644072232399895, |
| "grad_norm": 6.998068809509277, |
| "learning_rate": 7.170897670766816e-07, |
| "logits/chosen": -2.845982789993286, |
| "logits/rejected": -2.9252867698669434, |
| "logps/chosen": -268.194091796875, |
| "logps/rejected": -262.69482421875, |
| "loss": 0.6263, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -2.0061893463134766, |
| "rewards/margins": 0.8048659563064575, |
| "rewards/rejected": -2.8110554218292236, |
| "step": 3685 |
| }, |
| { |
| "epoch": 0.9657157812091076, |
| "grad_norm": 4.9705586433410645, |
| "learning_rate": 6.90918607694321e-07, |
| "logits/chosen": -2.9738311767578125, |
| "logits/rejected": -2.96089768409729, |
| "logps/chosen": -269.18121337890625, |
| "logps/rejected": -312.06085205078125, |
| "loss": 0.4902, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.5259528160095215, |
| "rewards/margins": 1.5478928089141846, |
| "rewards/rejected": -3.073845624923706, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.9670243391782256, |
| "grad_norm": 7.587747573852539, |
| "learning_rate": 6.647474483119602e-07, |
| "logits/chosen": -2.985318660736084, |
| "logits/rejected": -3.0679590702056885, |
| "logps/chosen": -281.947265625, |
| "logps/rejected": -309.543701171875, |
| "loss": 0.451, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.10109543800354, |
| "rewards/margins": 1.621584177017212, |
| "rewards/rejected": -2.722679615020752, |
| "step": 3695 |
| }, |
| { |
| "epoch": 0.9683328971473436, |
| "grad_norm": 7.26895809173584, |
| "learning_rate": 6.385762889295996e-07, |
| "logits/chosen": -2.9335684776306152, |
| "logits/rejected": -2.8907690048217773, |
| "logps/chosen": -280.83489990234375, |
| "logps/rejected": -266.060546875, |
| "loss": 0.4105, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.2659553289413452, |
| "rewards/margins": 1.7439613342285156, |
| "rewards/rejected": -3.009916305541992, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.9696414551164617, |
| "grad_norm": 7.7010111808776855, |
| "learning_rate": 6.12405129547239e-07, |
| "logits/chosen": -2.914029836654663, |
| "logits/rejected": -2.911940097808838, |
| "logps/chosen": -285.9615173339844, |
| "logps/rejected": -265.48681640625, |
| "loss": 0.5042, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.1594058275222778, |
| "rewards/margins": 1.309971570968628, |
| "rewards/rejected": -2.4693775177001953, |
| "step": 3705 |
| }, |
| { |
| "epoch": 0.9709500130855797, |
| "grad_norm": 9.604898452758789, |
| "learning_rate": 5.862339701648783e-07, |
| "logits/chosen": -2.9038796424865723, |
| "logits/rejected": -2.8771822452545166, |
| "logps/chosen": -322.8350830078125, |
| "logps/rejected": -357.41046142578125, |
| "loss": 0.5632, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.2673771381378174, |
| "rewards/margins": 1.460440993309021, |
| "rewards/rejected": -2.727818012237549, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.9722585710546977, |
| "grad_norm": 4.567657947540283, |
| "learning_rate": 5.600628107825177e-07, |
| "logits/chosen": -2.9830799102783203, |
| "logits/rejected": -3.0490164756774902, |
| "logps/chosen": -347.1837463378906, |
| "logps/rejected": -324.29754638671875, |
| "loss": 0.4124, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.351874589920044, |
| "rewards/margins": 1.7234786748886108, |
| "rewards/rejected": -3.0753531455993652, |
| "step": 3715 |
| }, |
| { |
| "epoch": 0.9735671290238157, |
| "grad_norm": 7.263519763946533, |
| "learning_rate": 5.338916514001571e-07, |
| "logits/chosen": -3.0393004417419434, |
| "logits/rejected": -3.0236408710479736, |
| "logps/chosen": -294.21710205078125, |
| "logps/rejected": -302.565673828125, |
| "loss": 0.4879, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.2430657148361206, |
| "rewards/margins": 1.4778881072998047, |
| "rewards/rejected": -2.7209537029266357, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.9748756869929338, |
| "grad_norm": 10.831700325012207, |
| "learning_rate": 5.077204920177964e-07, |
| "logits/chosen": -3.0219333171844482, |
| "logits/rejected": -3.1004836559295654, |
| "logps/chosen": -319.2060852050781, |
| "logps/rejected": -278.51043701171875, |
| "loss": 0.4765, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.4795074462890625, |
| "rewards/margins": 1.5765758752822876, |
| "rewards/rejected": -3.0560834407806396, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.9761842449620518, |
| "grad_norm": 8.930159568786621, |
| "learning_rate": 4.815493326354357e-07, |
| "logits/chosen": -2.993366003036499, |
| "logits/rejected": -3.02050518989563, |
| "logps/chosen": -330.6523132324219, |
| "logps/rejected": -347.84375, |
| "loss": 0.4885, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.2553311586380005, |
| "rewards/margins": 1.5938208103179932, |
| "rewards/rejected": -2.849151849746704, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.9774928029311698, |
| "grad_norm": 8.077557563781738, |
| "learning_rate": 4.5537817325307516e-07, |
| "logits/chosen": -3.0191256999969482, |
| "logits/rejected": -2.950045347213745, |
| "logps/chosen": -328.70172119140625, |
| "logps/rejected": -297.58648681640625, |
| "loss": 0.4838, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.5267088413238525, |
| "rewards/margins": 1.4226138591766357, |
| "rewards/rejected": -2.9493229389190674, |
| "step": 3735 |
| }, |
| { |
| "epoch": 0.9788013609002879, |
| "grad_norm": 11.086000442504883, |
| "learning_rate": 4.2920701387071454e-07, |
| "logits/chosen": -2.9949474334716797, |
| "logits/rejected": -3.0554397106170654, |
| "logps/chosen": -310.36053466796875, |
| "logps/rejected": -330.1317138671875, |
| "loss": 0.6288, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.3402494192123413, |
| "rewards/margins": 1.4754968881607056, |
| "rewards/rejected": -2.815746307373047, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.9801099188694059, |
| "grad_norm": 9.768365859985352, |
| "learning_rate": 4.0303585448835386e-07, |
| "logits/chosen": -2.7967236042022705, |
| "logits/rejected": -2.9284253120422363, |
| "logps/chosen": -260.5437927246094, |
| "logps/rejected": -275.2959899902344, |
| "loss": 0.5966, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.4888842105865479, |
| "rewards/margins": 1.163434386253357, |
| "rewards/rejected": -2.6523184776306152, |
| "step": 3745 |
| }, |
| { |
| "epoch": 0.9814184768385239, |
| "grad_norm": 5.782496452331543, |
| "learning_rate": 3.7686469510599324e-07, |
| "logits/chosen": -3.020731210708618, |
| "logits/rejected": -3.06559157371521, |
| "logps/chosen": -329.1947937011719, |
| "logps/rejected": -303.79425048828125, |
| "loss": 0.4177, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.4123915433883667, |
| "rewards/margins": 1.5942955017089844, |
| "rewards/rejected": -3.0066869258880615, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.982727034807642, |
| "grad_norm": 6.72975492477417, |
| "learning_rate": 3.506935357236326e-07, |
| "logits/chosen": -3.057302951812744, |
| "logits/rejected": -3.0887694358825684, |
| "logps/chosen": -249.98825073242188, |
| "logps/rejected": -255.726806640625, |
| "loss": 0.48, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.3327161073684692, |
| "rewards/margins": 1.2151663303375244, |
| "rewards/rejected": -2.547882318496704, |
| "step": 3755 |
| }, |
| { |
| "epoch": 0.98403559277676, |
| "grad_norm": 5.480231285095215, |
| "learning_rate": 3.2452237634127194e-07, |
| "logits/chosen": -2.945413112640381, |
| "logits/rejected": -3.0293281078338623, |
| "logps/chosen": -278.617431640625, |
| "logps/rejected": -259.78509521484375, |
| "loss": 0.3754, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.1209717988967896, |
| "rewards/margins": 1.8064987659454346, |
| "rewards/rejected": -2.9274706840515137, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.985344150745878, |
| "grad_norm": 8.33041763305664, |
| "learning_rate": 2.983512169589113e-07, |
| "logits/chosen": -2.937929630279541, |
| "logits/rejected": -2.863677501678467, |
| "logps/chosen": -267.8946838378906, |
| "logps/rejected": -290.6978454589844, |
| "loss": 0.4701, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.3163795471191406, |
| "rewards/margins": 1.3320225477218628, |
| "rewards/rejected": -2.648401975631714, |
| "step": 3765 |
| }, |
| { |
| "epoch": 0.9866527087149961, |
| "grad_norm": 7.798052787780762, |
| "learning_rate": 2.7218005757655065e-07, |
| "logits/chosen": -2.946786642074585, |
| "logits/rejected": -3.0757522583007812, |
| "logps/chosen": -303.80364990234375, |
| "logps/rejected": -338.39898681640625, |
| "loss": 0.6613, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.5532114505767822, |
| "rewards/margins": 1.0035349130630493, |
| "rewards/rejected": -2.556746482849121, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.9879612666841141, |
| "grad_norm": 8.479823112487793, |
| "learning_rate": 2.4600889819419e-07, |
| "logits/chosen": -2.9792187213897705, |
| "logits/rejected": -3.042782783508301, |
| "logps/chosen": -345.6416015625, |
| "logps/rejected": -288.8009033203125, |
| "loss": 0.5072, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -1.4748677015304565, |
| "rewards/margins": 1.32082998752594, |
| "rewards/rejected": -2.7956976890563965, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.9892698246532321, |
| "grad_norm": 7.717909812927246, |
| "learning_rate": 2.198377388118294e-07, |
| "logits/chosen": -2.9219813346862793, |
| "logits/rejected": -3.069363832473755, |
| "logps/chosen": -334.748046875, |
| "logps/rejected": -300.7204895019531, |
| "loss": 0.3629, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.2521204948425293, |
| "rewards/margins": 1.8430713415145874, |
| "rewards/rejected": -3.095191717147827, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.9905783826223502, |
| "grad_norm": 8.238800048828125, |
| "learning_rate": 1.9366657942946875e-07, |
| "logits/chosen": -3.0556554794311523, |
| "logits/rejected": -2.9894509315490723, |
| "logps/chosen": -259.3734130859375, |
| "logps/rejected": -261.5130310058594, |
| "loss": 0.5617, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.3929826021194458, |
| "rewards/margins": 1.1740365028381348, |
| "rewards/rejected": -2.567018985748291, |
| "step": 3785 |
| }, |
| { |
| "epoch": 0.9918869405914682, |
| "grad_norm": 6.61944580078125, |
| "learning_rate": 1.6749542004710808e-07, |
| "logits/chosen": -2.9529948234558105, |
| "logits/rejected": -3.041637897491455, |
| "logps/chosen": -303.34112548828125, |
| "logps/rejected": -273.41619873046875, |
| "loss": 0.4298, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.2014284133911133, |
| "rewards/margins": 1.7370707988739014, |
| "rewards/rejected": -2.9384992122650146, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.9931954985605862, |
| "grad_norm": 8.906229972839355, |
| "learning_rate": 1.4132426066474745e-07, |
| "logits/chosen": -2.8641409873962402, |
| "logits/rejected": -2.9114432334899902, |
| "logps/chosen": -270.04541015625, |
| "logps/rejected": -307.6247863769531, |
| "loss": 0.553, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.198313593864441, |
| "rewards/margins": 1.2590378522872925, |
| "rewards/rejected": -2.4573516845703125, |
| "step": 3795 |
| }, |
| { |
| "epoch": 0.9945040565297043, |
| "grad_norm": 8.40211009979248, |
| "learning_rate": 1.1515310128238682e-07, |
| "logits/chosen": -2.9926180839538574, |
| "logits/rejected": -3.0532259941101074, |
| "logps/chosen": -248.5250244140625, |
| "logps/rejected": -262.3029479980469, |
| "loss": 0.5102, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.4260847568511963, |
| "rewards/margins": 1.5013940334320068, |
| "rewards/rejected": -2.927478790283203, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.9958126144988223, |
| "grad_norm": 8.588583946228027, |
| "learning_rate": 8.898194190002618e-08, |
| "logits/chosen": -3.000002384185791, |
| "logits/rejected": -3.0484166145324707, |
| "logps/chosen": -324.4134826660156, |
| "logps/rejected": -311.4660339355469, |
| "loss": 0.5217, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.3233641386032104, |
| "rewards/margins": 1.193496584892273, |
| "rewards/rejected": -2.5168607234954834, |
| "step": 3805 |
| }, |
| { |
| "epoch": 0.9971211724679403, |
| "grad_norm": 7.888390064239502, |
| "learning_rate": 6.281078251766554e-08, |
| "logits/chosen": -3.020127773284912, |
| "logits/rejected": -3.0420382022857666, |
| "logps/chosen": -278.3679504394531, |
| "logps/rejected": -265.987060546875, |
| "loss": 0.5211, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -1.2625151872634888, |
| "rewards/margins": 1.2696560621261597, |
| "rewards/rejected": -2.5321712493896484, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.9984297304370584, |
| "grad_norm": 7.750138759613037, |
| "learning_rate": 3.6639623135304896e-08, |
| "logits/chosen": -2.9767794609069824, |
| "logits/rejected": -3.0281155109405518, |
| "logps/chosen": -273.447509765625, |
| "logps/rejected": -290.362060546875, |
| "loss": 0.5369, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.5177167654037476, |
| "rewards/margins": 1.1918412446975708, |
| "rewards/rejected": -2.7095580101013184, |
| "step": 3815 |
| }, |
| { |
| "epoch": 0.9997382884061764, |
| "grad_norm": 8.182830810546875, |
| "learning_rate": 1.0468463752944255e-08, |
| "logits/chosen": -2.9969050884246826, |
| "logits/rejected": -3.121110200881958, |
| "logps/chosen": -325.16021728515625, |
| "logps/rejected": -278.646728515625, |
| "loss": 0.5479, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.2306115627288818, |
| "rewards/margins": 1.541174054145813, |
| "rewards/rejected": -2.7717857360839844, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 3821, |
| "total_flos": 0.0, |
| "train_loss": 0.5167291775107914, |
| "train_runtime": 38075.0654, |
| "train_samples_per_second": 1.606, |
| "train_steps_per_second": 0.1 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 3821, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 400, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|